blob: 744fc838f842d1eade777db6dead57a542786807 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000586 * SAX2 defaulted attributes handling *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlDetectSAX2:
592 * @ctxt: an XML parser context
593 *
594 * Do the SAX2 detection and specific intialization
595 */
596static void
597xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
598 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000599#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000600 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
601 ((ctxt->sax->startElementNs != NULL) ||
602 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000603#else
604 ctxt->sax2 = 1;
605#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000606
607 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
608 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
609 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000610 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
611 (ctxt->str_xml_ns == NULL)) {
612 xmlErrMemory(ctxt, NULL);
613 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000614}
615
Daniel Veillarde57ec792003-09-10 10:50:59 +0000616typedef struct _xmlDefAttrs xmlDefAttrs;
617typedef xmlDefAttrs *xmlDefAttrsPtr;
618struct _xmlDefAttrs {
619 int nbAttrs; /* number of defaulted attributes on that element */
620 int maxAttrs; /* the size of the array */
621 const xmlChar *values[4]; /* array of localname/prefix/values */
622};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000623
624/**
625 * xmlAddDefAttrs:
626 * @ctxt: an XML parser context
627 * @fullname: the element fullname
628 * @fullattr: the attribute fullname
629 * @value: the attribute value
630 *
631 * Add a defaulted attribute for an element
632 */
633static void
634xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
635 const xmlChar *fullname,
636 const xmlChar *fullattr,
637 const xmlChar *value) {
638 xmlDefAttrsPtr defaults;
639 int len;
640 const xmlChar *name;
641 const xmlChar *prefix;
642
643 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000644 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000645 if (ctxt->attsDefault == NULL)
646 goto mem_error;
647 }
648
649 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000650 * split the element name into prefix:localname , the string found
651 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000652 */
653 name = xmlSplitQName3(fullname, &len);
654 if (name == NULL) {
655 name = xmlDictLookup(ctxt->dict, fullname, -1);
656 prefix = NULL;
657 } else {
658 name = xmlDictLookup(ctxt->dict, name, -1);
659 prefix = xmlDictLookup(ctxt->dict, fullname, len);
660 }
661
662 /*
663 * make sure there is some storage
664 */
665 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
666 if (defaults == NULL) {
667 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000668 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000669 if (defaults == NULL)
670 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000672 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000673 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
674 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000675 xmlDefAttrsPtr temp;
676
677 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000678 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000679 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000680 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000681 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000682 defaults->maxAttrs *= 2;
683 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
684 }
685
686 /*
687 * plit the element name into prefix:localname , the string found
688 * are within the DTD and hen not associated to namespace names.
689 */
690 name = xmlSplitQName3(fullattr, &len);
691 if (name == NULL) {
692 name = xmlDictLookup(ctxt->dict, fullattr, -1);
693 prefix = NULL;
694 } else {
695 name = xmlDictLookup(ctxt->dict, name, -1);
696 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
697 }
698
699 defaults->values[4 * defaults->nbAttrs] = name;
700 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
701 /* intern the string and precompute the end */
702 len = xmlStrlen(value);
703 value = xmlDictLookup(ctxt->dict, value, len);
704 defaults->values[4 * defaults->nbAttrs + 2] = value;
705 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
706 defaults->nbAttrs++;
707
708 return;
709
710mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000711 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000712 return;
713}
714
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000715/**
716 * xmlAddSpecialAttr:
717 * @ctxt: an XML parser context
718 * @fullname: the element fullname
719 * @fullattr: the attribute fullname
720 * @type: the attribute type
721 *
722 * Register that this attribute is not CDATA
723 */
724static void
725xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
726 const xmlChar *fullname,
727 const xmlChar *fullattr,
728 int type)
729{
730 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000731 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000732 if (ctxt->attsSpecial == NULL)
733 goto mem_error;
734 }
735
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000736 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
737 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000738 return;
739
740mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000741 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000742 return;
743}
744
Daniel Veillard4432df22003-09-28 18:58:27 +0000745/**
746 * xmlCheckLanguageID:
747 * @lang: pointer to the string value
748 *
749 * Checks that the value conforms to the LanguageID production:
750 *
751 * NOTE: this is somewhat deprecated, those productions were removed from
752 * the XML Second edition.
753 *
754 * [33] LanguageID ::= Langcode ('-' Subcode)*
755 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
756 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
757 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
758 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
759 * [38] Subcode ::= ([a-z] | [A-Z])+
760 *
761 * Returns 1 if correct 0 otherwise
762 **/
763int
764xmlCheckLanguageID(const xmlChar * lang)
765{
766 const xmlChar *cur = lang;
767
768 if (cur == NULL)
769 return (0);
770 if (((cur[0] == 'i') && (cur[1] == '-')) ||
771 ((cur[0] == 'I') && (cur[1] == '-'))) {
772 /*
773 * IANA code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
780 ((cur[0] == 'X') && (cur[1] == '-'))) {
781 /*
782 * User code
783 */
784 cur += 2;
785 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
789 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
790 /*
791 * ISO639
792 */
793 cur++;
794 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
795 ((cur[0] >= 'a') && (cur[0] <= 'z')))
796 cur++;
797 else
798 return (0);
799 } else
800 return (0);
801 while (cur[0] != 0) { /* non input consuming */
802 if (cur[0] != '-')
803 return (0);
804 cur++;
805 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
806 ((cur[0] >= 'a') && (cur[0] <= 'z')))
807 cur++;
808 else
809 return (0);
810 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
811 ((cur[0] >= 'a') && (cur[0] <= 'z')))
812 cur++;
813 }
814 return (1);
815}
816
Owen Taylor3473f882001-02-23 17:55:21 +0000817/************************************************************************
818 * *
819 * Parser stacks related functions and macros *
820 * *
821 ************************************************************************/
822
823xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
824 const xmlChar ** str);
825
Daniel Veillard0fb18932003-09-07 09:14:37 +0000826#ifdef SAX2
827/**
828 * nsPush:
829 * @ctxt: an XML parser context
830 * @prefix: the namespace prefix or NULL
831 * @URL: the namespace name
832 *
833 * Pushes a new parser namespace on top of the ns stack
834 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000835 * Returns -1 in case of error, -2 if the namespace should be discarded
836 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000837 */
838static int
839nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
840{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000841 if (ctxt->options & XML_PARSE_NSCLEAN) {
842 int i;
843 for (i = 0;i < ctxt->nsNr;i += 2) {
844 if (ctxt->nsTab[i] == prefix) {
845 /* in scope */
846 if (ctxt->nsTab[i + 1] == URL)
847 return(-2);
848 /* out of scope keep it */
849 break;
850 }
851 }
852 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000853 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
854 ctxt->nsMax = 10;
855 ctxt->nsNr = 0;
856 ctxt->nsTab = (const xmlChar **)
857 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
858 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000859 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000860 ctxt->nsMax = 0;
861 return (-1);
862 }
863 } else if (ctxt->nsNr >= ctxt->nsMax) {
864 ctxt->nsMax *= 2;
865 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000866 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000867 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
868 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000869 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000870 ctxt->nsMax /= 2;
871 return (-1);
872 }
873 }
874 ctxt->nsTab[ctxt->nsNr++] = prefix;
875 ctxt->nsTab[ctxt->nsNr++] = URL;
876 return (ctxt->nsNr);
877}
878/**
879 * nsPop:
880 * @ctxt: an XML parser context
881 * @nr: the number to pop
882 *
883 * Pops the top @nr parser prefix/namespace from the ns stack
884 *
885 * Returns the number of namespaces removed
886 */
887static int
888nsPop(xmlParserCtxtPtr ctxt, int nr)
889{
890 int i;
891
892 if (ctxt->nsTab == NULL) return(0);
893 if (ctxt->nsNr < nr) {
894 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
895 nr = ctxt->nsNr;
896 }
897 if (ctxt->nsNr <= 0)
898 return (0);
899
900 for (i = 0;i < nr;i++) {
901 ctxt->nsNr--;
902 ctxt->nsTab[ctxt->nsNr] = NULL;
903 }
904 return(nr);
905}
906#endif
907
908static int
909xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
910 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000912 int maxatts;
913
914 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000916 atts = (const xmlChar **)
917 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000919 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
921 if (attallocs == NULL) goto mem_error;
922 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000923 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000924 } else if (nr + 5 > ctxt->maxatts) {
925 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000926 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
927 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000929 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
931 (maxatts / 5) * sizeof(int));
932 if (attallocs == NULL) goto mem_error;
933 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000934 ctxt->maxatts = maxatts;
935 }
936 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000937mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000938 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000939 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000940}
941
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000942/**
943 * inputPush:
944 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000945 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000946 *
947 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000948 *
949 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000950 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000951int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000952inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
953{
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000954 if ((ctxt == NULL) || (value == NULL))
955 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000956 if (ctxt->inputNr >= ctxt->inputMax) {
957 ctxt->inputMax *= 2;
958 ctxt->inputTab =
959 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
960 ctxt->inputMax *
961 sizeof(ctxt->inputTab[0]));
962 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000963 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 return (0);
965 }
966 }
967 ctxt->inputTab[ctxt->inputNr] = value;
968 ctxt->input = value;
969 return (ctxt->inputNr++);
970}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000971/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000972 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000973 * @ctxt: an XML parser context
974 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000975 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000976 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000977 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000978 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000979xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000980inputPop(xmlParserCtxtPtr ctxt)
981{
982 xmlParserInputPtr ret;
983
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000984 if (ctxt == NULL)
985 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000986 if (ctxt->inputNr <= 0)
987 return (0);
988 ctxt->inputNr--;
989 if (ctxt->inputNr > 0)
990 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
991 else
992 ctxt->input = NULL;
993 ret = ctxt->inputTab[ctxt->inputNr];
994 ctxt->inputTab[ctxt->inputNr] = 0;
995 return (ret);
996}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000997/**
998 * nodePush:
999 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001000 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001001 *
1002 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001003 *
1004 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001005 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001006int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001007nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1008{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001009 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001010 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001011 xmlNodePtr *tmp;
1012
1013 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1014 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001015 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001016 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001017 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001018 return (0);
1019 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001020 ctxt->nodeTab = tmp;
1021 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001022 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001023 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001024 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001025 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1026 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001027 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001028 return(0);
1029 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001030 ctxt->nodeTab[ctxt->nodeNr] = value;
1031 ctxt->node = value;
1032 return (ctxt->nodeNr++);
1033}
1034/**
1035 * nodePop:
1036 * @ctxt: an XML parser context
1037 *
1038 * Pops the top element node from the node stack
1039 *
1040 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001041 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001042xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001043nodePop(xmlParserCtxtPtr ctxt)
1044{
1045 xmlNodePtr ret;
1046
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001047 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001048 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001049 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001050 ctxt->nodeNr--;
1051 if (ctxt->nodeNr > 0)
1052 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1053 else
1054 ctxt->node = NULL;
1055 ret = ctxt->nodeTab[ctxt->nodeNr];
1056 ctxt->nodeTab[ctxt->nodeNr] = 0;
1057 return (ret);
1058}
Daniel Veillarda2351322004-06-27 12:08:10 +00001059
1060#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001061/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 * nameNsPush:
1063 * @ctxt: an XML parser context
1064 * @value: the element name
1065 * @prefix: the element prefix
1066 * @URI: the element namespace name
1067 *
1068 * Pushes a new element name/prefix/URL on top of the name stack
1069 *
1070 * Returns -1 in case of error, the index in the stack otherwise
1071 */
1072static int
1073nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1074 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1075{
1076 if (ctxt->nameNr >= ctxt->nameMax) {
1077 const xmlChar * *tmp;
1078 void **tmp2;
1079 ctxt->nameMax *= 2;
1080 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1081 ctxt->nameMax *
1082 sizeof(ctxt->nameTab[0]));
1083 if (tmp == NULL) {
1084 ctxt->nameMax /= 2;
1085 goto mem_error;
1086 }
1087 ctxt->nameTab = tmp;
1088 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1089 ctxt->nameMax * 3 *
1090 sizeof(ctxt->pushTab[0]));
1091 if (tmp2 == NULL) {
1092 ctxt->nameMax /= 2;
1093 goto mem_error;
1094 }
1095 ctxt->pushTab = tmp2;
1096 }
1097 ctxt->nameTab[ctxt->nameNr] = value;
1098 ctxt->name = value;
1099 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1100 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001101 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102 return (ctxt->nameNr++);
1103mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001104 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001105 return (-1);
1106}
1107/**
1108 * nameNsPop:
1109 * @ctxt: an XML parser context
1110 *
1111 * Pops the top element/prefix/URI name from the name stack
1112 *
1113 * Returns the name just removed
1114 */
1115static const xmlChar *
1116nameNsPop(xmlParserCtxtPtr ctxt)
1117{
1118 const xmlChar *ret;
1119
1120 if (ctxt->nameNr <= 0)
1121 return (0);
1122 ctxt->nameNr--;
1123 if (ctxt->nameNr > 0)
1124 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1125 else
1126 ctxt->name = NULL;
1127 ret = ctxt->nameTab[ctxt->nameNr];
1128 ctxt->nameTab[ctxt->nameNr] = NULL;
1129 return (ret);
1130}
Daniel Veillarda2351322004-06-27 12:08:10 +00001131#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001132
1133/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 * namePush:
1135 * @ctxt: an XML parser context
1136 * @value: the element name
1137 *
1138 * Pushes a new element name on top of the name stack
1139 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001141 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001142int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001143namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001144{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001145 if (ctxt == NULL) return (-1);
1146
Daniel Veillard1c732d22002-11-30 11:22:59 +00001147 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001149 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001150 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001151 ctxt->nameMax *
1152 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 if (tmp == NULL) {
1154 ctxt->nameMax /= 2;
1155 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158 }
1159 ctxt->nameTab[ctxt->nameNr] = value;
1160 ctxt->name = value;
1161 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001163 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001164 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001165}
1166/**
1167 * namePop:
1168 * @ctxt: an XML parser context
1169 *
1170 * Pops the top element name from the name stack
1171 *
1172 * Returns the name just removed
1173 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001174const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001175namePop(xmlParserCtxtPtr ctxt)
1176{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001177 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001179 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1180 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 ctxt->nameNr--;
1182 if (ctxt->nameNr > 0)
1183 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1184 else
1185 ctxt->name = NULL;
1186 ret = ctxt->nameTab[ctxt->nameNr];
1187 ctxt->nameTab[ctxt->nameNr] = 0;
1188 return (ret);
1189}
Owen Taylor3473f882001-02-23 17:55:21 +00001190
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001191static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001192 if (ctxt->spaceNr >= ctxt->spaceMax) {
1193 ctxt->spaceMax *= 2;
1194 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1195 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1196 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001197 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001198 return(0);
1199 }
1200 }
1201 ctxt->spaceTab[ctxt->spaceNr] = val;
1202 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1203 return(ctxt->spaceNr++);
1204}
1205
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001206static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001207 int ret;
1208 if (ctxt->spaceNr <= 0) return(0);
1209 ctxt->spaceNr--;
1210 if (ctxt->spaceNr > 0)
1211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1212 else
1213 ctxt->space = NULL;
1214 ret = ctxt->spaceTab[ctxt->spaceNr];
1215 ctxt->spaceTab[ctxt->spaceNr] = -1;
1216 return(ret);
1217}
1218
1219/*
1220 * Macros for accessing the content. Those should be used only by the parser,
1221 * and not exported.
1222 *
1223 * Dirty macros, i.e. one often need to make assumption on the context to
1224 * use them
1225 *
1226 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1227 * To be used with extreme caution since operations consuming
1228 * characters may move the input buffer to a different location !
1229 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1230 * This should be used internally by the parser
1231 * only to compare to ASCII values otherwise it would break when
1232 * running with UTF-8 encoding.
1233 * RAW same as CUR but in the input buffer, bypass any token
1234 * extraction that may have been done
1235 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1236 * to compare on ASCII based substring.
1237 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001238 * strings without newlines within the parser.
1239 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1240 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001241 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1242 *
1243 * NEXT Skip to the next character, this does the proper decoding
1244 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001245 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001246 * CUR_CHAR(l) returns the current unicode character (int), set l
1247 * to the number of xmlChars used for the encoding [0-5].
1248 * CUR_SCHAR same but operate on a string instead of the context
1249 * COPY_BUF copy the current unicode char to the target buffer, increment
1250 * the index
1251 * GROW, SHRINK handling of input buffers
1252 */
1253
Daniel Veillardfdc91562002-07-01 21:52:03 +00001254#define RAW (*ctxt->input->cur)
1255#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001256#define NXT(val) ctxt->input->cur[(val)]
1257#define CUR_PTR ctxt->input->cur
1258
Daniel Veillarda07050d2003-10-19 14:46:32 +00001259#define CMP4( s, c1, c2, c3, c4 ) \
1260 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1261 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1262#define CMP5( s, c1, c2, c3, c4, c5 ) \
1263 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1264#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1265 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1266#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1267 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1268#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1269 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1270#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1271 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1272 ((unsigned char *) s)[ 8 ] == c9 )
1273#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1274 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1275 ((unsigned char *) s)[ 9 ] == c10 )
1276
Owen Taylor3473f882001-02-23 17:55:21 +00001277#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001278 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001280 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001281 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1282 xmlPopInput(ctxt); \
1283 } while (0)
1284
Daniel Veillard0b787f32004-03-26 17:29:53 +00001285#define SKIPL(val) do { \
1286 int skipl; \
1287 for(skipl=0; skipl<val; skipl++) { \
1288 if (*(ctxt->input->cur) == '\n') { \
1289 ctxt->input->line++; ctxt->input->col = 1; \
1290 } else ctxt->input->col++; \
1291 ctxt->nbChars++; \
1292 ctxt->input->cur++; \
1293 } \
1294 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1295 if ((*ctxt->input->cur == 0) && \
1296 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1297 xmlPopInput(ctxt); \
1298 } while (0)
1299
Daniel Veillarda880b122003-04-21 21:36:41 +00001300#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001301 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1302 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001303 xmlSHRINK (ctxt);
1304
1305static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1306 xmlParserInputShrink(ctxt->input);
1307 if ((*ctxt->input->cur == 0) &&
1308 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1309 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001310 }
Owen Taylor3473f882001-02-23 17:55:21 +00001311
Daniel Veillarda880b122003-04-21 21:36:41 +00001312#define GROW if ((ctxt->progressive == 0) && \
1313 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001314 xmlGROW (ctxt);
1315
1316static void xmlGROW (xmlParserCtxtPtr ctxt) {
1317 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1318 if ((*ctxt->input->cur == 0) &&
1319 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1320 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001321}
Owen Taylor3473f882001-02-23 17:55:21 +00001322
1323#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1324
1325#define NEXT xmlNextChar(ctxt)
1326
Daniel Veillard21a0f912001-02-25 19:54:14 +00001327#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001328 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001329 ctxt->input->cur++; \
1330 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001331 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001332 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1333 }
1334
Owen Taylor3473f882001-02-23 17:55:21 +00001335#define NEXTL(l) do { \
1336 if (*(ctxt->input->cur) == '\n') { \
1337 ctxt->input->line++; ctxt->input->col = 1; \
1338 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001339 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001340 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001341 } while (0)
1342
1343#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1344#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1345
1346#define COPY_BUF(l,b,i,v) \
1347 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001348 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001349
1350/**
1351 * xmlSkipBlankChars:
1352 * @ctxt: the XML parser context
1353 *
1354 * skip all blanks character found at that point in the input streams.
1355 * It pops up finished entities in the process if allowable at that point.
1356 *
1357 * Returns the number of space chars skipped
1358 */
1359
1360int
1361xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001362 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001363
1364 /*
1365 * It's Okay to use CUR/NEXT here since all the blanks are on
1366 * the ASCII range.
1367 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001368 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1369 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001370 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001371 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001372 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001373 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001374 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001375 if (*cur == '\n') {
1376 ctxt->input->line++; ctxt->input->col = 1;
1377 }
1378 cur++;
1379 res++;
1380 if (*cur == 0) {
1381 ctxt->input->cur = cur;
1382 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1383 cur = ctxt->input->cur;
1384 }
1385 }
1386 ctxt->input->cur = cur;
1387 } else {
1388 int cur;
1389 do {
1390 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001391 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001392 NEXT;
1393 cur = CUR;
1394 res++;
1395 }
1396 while ((cur == 0) && (ctxt->inputNr > 1) &&
1397 (ctxt->instate != XML_PARSER_COMMENT)) {
1398 xmlPopInput(ctxt);
1399 cur = CUR;
1400 }
1401 /*
1402 * Need to handle support of entities branching here
1403 */
1404 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1405 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1406 }
Owen Taylor3473f882001-02-23 17:55:21 +00001407 return(res);
1408}
1409
1410/************************************************************************
1411 * *
1412 * Commodity functions to handle entities *
1413 * *
1414 ************************************************************************/
1415
1416/**
1417 * xmlPopInput:
1418 * @ctxt: an XML parser context
1419 *
1420 * xmlPopInput: the current input pointed by ctxt->input came to an end
1421 * pop it and return the next char.
1422 *
1423 * Returns the current xmlChar in the parser context
1424 */
1425xmlChar
1426xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001427 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001428 if (xmlParserDebugEntities)
1429 xmlGenericError(xmlGenericErrorContext,
1430 "Popping input %d\n", ctxt->inputNr);
1431 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001432 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001433 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1434 return(xmlPopInput(ctxt));
1435 return(CUR);
1436}
1437
1438/**
1439 * xmlPushInput:
1440 * @ctxt: an XML parser context
1441 * @input: an XML parser input fragment (entity, XML fragment ...).
1442 *
1443 * xmlPushInput: switch to a new input stream which is stacked on top
1444 * of the previous one(s).
1445 */
1446void
1447xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1448 if (input == NULL) return;
1449
1450 if (xmlParserDebugEntities) {
1451 if ((ctxt->input != NULL) && (ctxt->input->filename))
1452 xmlGenericError(xmlGenericErrorContext,
1453 "%s(%d): ", ctxt->input->filename,
1454 ctxt->input->line);
1455 xmlGenericError(xmlGenericErrorContext,
1456 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1457 }
1458 inputPush(ctxt, input);
1459 GROW;
1460}
1461
1462/**
1463 * xmlParseCharRef:
1464 * @ctxt: an XML parser context
1465 *
1466 * parse Reference declarations
1467 *
1468 * [66] CharRef ::= '&#' [0-9]+ ';' |
1469 * '&#x' [0-9a-fA-F]+ ';'
1470 *
1471 * [ WFC: Legal Character ]
1472 * Characters referred to using character references must match the
1473 * production for Char.
1474 *
1475 * Returns the value parsed (as an int), 0 in case of error
1476 */
1477int
1478xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001479 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001480 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001481 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001482
Owen Taylor3473f882001-02-23 17:55:21 +00001483 /*
1484 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1485 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (NXT(2) == 'x')) {
1488 SKIP(3);
1489 GROW;
1490 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001491 if (count++ > 20) {
1492 count = 0;
1493 GROW;
1494 }
1495 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001496 val = val * 16 + (CUR - '0');
1497 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1498 val = val * 16 + (CUR - 'a') + 10;
1499 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1500 val = val * 16 + (CUR - 'A') + 10;
1501 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 val = 0;
1504 break;
1505 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001506 if (val > 0x10FFFF)
1507 outofrange = val;
1508
Owen Taylor3473f882001-02-23 17:55:21 +00001509 NEXT;
1510 count++;
1511 }
1512 if (RAW == ';') {
1513 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001514 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001515 ctxt->nbChars ++;
1516 ctxt->input->cur++;
1517 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001519 SKIP(2);
1520 GROW;
1521 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001522 if (count++ > 20) {
1523 count = 0;
1524 GROW;
1525 }
1526 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001527 val = val * 10 + (CUR - '0');
1528 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001529 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001530 val = 0;
1531 break;
1532 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001533 if (val > 0x10FFFF)
1534 outofrange = val;
1535
Owen Taylor3473f882001-02-23 17:55:21 +00001536 NEXT;
1537 count++;
1538 }
1539 if (RAW == ';') {
1540 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001541 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001542 ctxt->nbChars ++;
1543 ctxt->input->cur++;
1544 }
1545 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001546 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001547 }
1548
1549 /*
1550 * [ WFC: Legal Character ]
1551 * Characters referred to using character references must match the
1552 * production for Char.
1553 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001554 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001555 return(val);
1556 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001557 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1558 "xmlParseCharRef: invalid xmlChar value %d\n",
1559 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001560 }
1561 return(0);
1562}
1563
1564/**
1565 * xmlParseStringCharRef:
1566 * @ctxt: an XML parser context
1567 * @str: a pointer to an index in the string
1568 *
1569 * parse Reference declarations, variant parsing from a string rather
1570 * than an an input flow.
1571 *
1572 * [66] CharRef ::= '&#' [0-9]+ ';' |
1573 * '&#x' [0-9a-fA-F]+ ';'
1574 *
1575 * [ WFC: Legal Character ]
1576 * Characters referred to using character references must match the
1577 * production for Char.
1578 *
1579 * Returns the value parsed (as an int), 0 in case of error, str will be
1580 * updated to the current value of the index
1581 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001582static int
Owen Taylor3473f882001-02-23 17:55:21 +00001583xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1584 const xmlChar *ptr;
1585 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001586 unsigned int val = 0;
1587 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001588
1589 if ((str == NULL) || (*str == NULL)) return(0);
1590 ptr = *str;
1591 cur = *ptr;
1592 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1593 ptr += 3;
1594 cur = *ptr;
1595 while (cur != ';') { /* Non input consuming loop */
1596 if ((cur >= '0') && (cur <= '9'))
1597 val = val * 16 + (cur - '0');
1598 else if ((cur >= 'a') && (cur <= 'f'))
1599 val = val * 16 + (cur - 'a') + 10;
1600 else if ((cur >= 'A') && (cur <= 'F'))
1601 val = val * 16 + (cur - 'A') + 10;
1602 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001603 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001604 val = 0;
1605 break;
1606 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001607 if (val > 0x10FFFF)
1608 outofrange = val;
1609
Owen Taylor3473f882001-02-23 17:55:21 +00001610 ptr++;
1611 cur = *ptr;
1612 }
1613 if (cur == ';')
1614 ptr++;
1615 } else if ((cur == '&') && (ptr[1] == '#')){
1616 ptr += 2;
1617 cur = *ptr;
1618 while (cur != ';') { /* Non input consuming loops */
1619 if ((cur >= '0') && (cur <= '9'))
1620 val = val * 10 + (cur - '0');
1621 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001622 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001623 val = 0;
1624 break;
1625 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001626 if (val > 0x10FFFF)
1627 outofrange = val;
1628
Owen Taylor3473f882001-02-23 17:55:21 +00001629 ptr++;
1630 cur = *ptr;
1631 }
1632 if (cur == ';')
1633 ptr++;
1634 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001635 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001636 return(0);
1637 }
1638 *str = ptr;
1639
1640 /*
1641 * [ WFC: Legal Character ]
1642 * Characters referred to using character references must match the
1643 * production for Char.
1644 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001645 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001646 return(val);
1647 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001648 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1649 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1650 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001651 }
1652 return(0);
1653}
1654
1655/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001656 * xmlNewBlanksWrapperInputStream:
1657 * @ctxt: an XML parser context
1658 * @entity: an Entity pointer
1659 *
1660 * Create a new input stream for wrapping
1661 * blanks around a PEReference
1662 *
1663 * Returns the new input stream or NULL
1664 */
1665
1666static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1667
Daniel Veillardf4862f02002-09-10 11:13:43 +00001668static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001669xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1670 xmlParserInputPtr input;
1671 xmlChar *buffer;
1672 size_t length;
1673 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001674 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1675 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001676 return(NULL);
1677 }
1678 if (xmlParserDebugEntities)
1679 xmlGenericError(xmlGenericErrorContext,
1680 "new blanks wrapper for entity: %s\n", entity->name);
1681 input = xmlNewInputStream(ctxt);
1682 if (input == NULL) {
1683 return(NULL);
1684 }
1685 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001686 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001687 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001688 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001689 return(NULL);
1690 }
1691 buffer [0] = ' ';
1692 buffer [1] = '%';
1693 buffer [length-3] = ';';
1694 buffer [length-2] = ' ';
1695 buffer [length-1] = 0;
1696 memcpy(buffer + 2, entity->name, length - 5);
1697 input->free = deallocblankswrapper;
1698 input->base = buffer;
1699 input->cur = buffer;
1700 input->length = length;
1701 input->end = &buffer[length];
1702 return(input);
1703}
1704
1705/**
Owen Taylor3473f882001-02-23 17:55:21 +00001706 * xmlParserHandlePEReference:
1707 * @ctxt: the parser context
1708 *
1709 * [69] PEReference ::= '%' Name ';'
1710 *
1711 * [ WFC: No Recursion ]
1712 * A parsed entity must not contain a recursive
1713 * reference to itself, either directly or indirectly.
1714 *
1715 * [ WFC: Entity Declared ]
1716 * In a document without any DTD, a document with only an internal DTD
1717 * subset which contains no parameter entity references, or a document
1718 * with "standalone='yes'", ... ... The declaration of a parameter
1719 * entity must precede any reference to it...
1720 *
1721 * [ VC: Entity Declared ]
1722 * In a document with an external subset or external parameter entities
1723 * with "standalone='no'", ... ... The declaration of a parameter entity
1724 * must precede any reference to it...
1725 *
1726 * [ WFC: In DTD ]
1727 * Parameter-entity references may only appear in the DTD.
1728 * NOTE: misleading but this is handled.
1729 *
1730 * A PEReference may have been detected in the current input stream
1731 * the handling is done accordingly to
1732 * http://www.w3.org/TR/REC-xml#entproc
1733 * i.e.
1734 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001735 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001736 */
1737void
1738xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001739 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001740 xmlEntityPtr entity = NULL;
1741 xmlParserInputPtr input;
1742
Owen Taylor3473f882001-02-23 17:55:21 +00001743 if (RAW != '%') return;
1744 switch(ctxt->instate) {
1745 case XML_PARSER_CDATA_SECTION:
1746 return;
1747 case XML_PARSER_COMMENT:
1748 return;
1749 case XML_PARSER_START_TAG:
1750 return;
1751 case XML_PARSER_END_TAG:
1752 return;
1753 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001754 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001755 return;
1756 case XML_PARSER_PROLOG:
1757 case XML_PARSER_START:
1758 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001759 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001760 return;
1761 case XML_PARSER_ENTITY_DECL:
1762 case XML_PARSER_CONTENT:
1763 case XML_PARSER_ATTRIBUTE_VALUE:
1764 case XML_PARSER_PI:
1765 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001766 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001767 /* we just ignore it there */
1768 return;
1769 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001770 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001771 return;
1772 case XML_PARSER_ENTITY_VALUE:
1773 /*
1774 * NOTE: in the case of entity values, we don't do the
1775 * substitution here since we need the literal
1776 * entity value to be able to save the internal
1777 * subset of the document.
1778 * This will be handled by xmlStringDecodeEntities
1779 */
1780 return;
1781 case XML_PARSER_DTD:
1782 /*
1783 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1784 * In the internal DTD subset, parameter-entity references
1785 * can occur only where markup declarations can occur, not
1786 * within markup declarations.
1787 * In that case this is handled in xmlParseMarkupDecl
1788 */
1789 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1790 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001791 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001792 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001793 break;
1794 case XML_PARSER_IGNORE:
1795 return;
1796 }
1797
1798 NEXT;
1799 name = xmlParseName(ctxt);
1800 if (xmlParserDebugEntities)
1801 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001802 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001803 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001804 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001805 } else {
1806 if (RAW == ';') {
1807 NEXT;
1808 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1809 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1810 if (entity == NULL) {
1811
1812 /*
1813 * [ WFC: Entity Declared ]
1814 * In a document without any DTD, a document with only an
1815 * internal DTD subset which contains no parameter entity
1816 * references, or a document with "standalone='yes'", ...
1817 * ... The declaration of a parameter entity must precede
1818 * any reference to it...
1819 */
1820 if ((ctxt->standalone == 1) ||
1821 ((ctxt->hasExternalSubset == 0) &&
1822 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001823 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001824 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001825 } else {
1826 /*
1827 * [ VC: Entity Declared ]
1828 * In a document with an external subset or external
1829 * parameter entities with "standalone='no'", ...
1830 * ... The declaration of a parameter entity must precede
1831 * any reference to it...
1832 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001833 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1834 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1835 "PEReference: %%%s; not found\n",
1836 name);
1837 } else
1838 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1839 "PEReference: %%%s; not found\n",
1840 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001841 ctxt->valid = 0;
1842 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001843 } else if (ctxt->input->free != deallocblankswrapper) {
1844 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1845 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001846 } else {
1847 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1848 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001849 xmlChar start[4];
1850 xmlCharEncoding enc;
1851
Owen Taylor3473f882001-02-23 17:55:21 +00001852 /*
1853 * handle the extra spaces added before and after
1854 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001855 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001856 */
1857 input = xmlNewEntityInputStream(ctxt, entity);
1858 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001859
1860 /*
1861 * Get the 4 first bytes and decode the charset
1862 * if enc != XML_CHAR_ENCODING_NONE
1863 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001864 * Note that, since we may have some non-UTF8
1865 * encoding (like UTF16, bug 135229), the 'length'
1866 * is not known, but we can calculate based upon
1867 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001868 */
1869 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001870 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001871 start[0] = RAW;
1872 start[1] = NXT(1);
1873 start[2] = NXT(2);
1874 start[3] = NXT(3);
1875 enc = xmlDetectCharEncoding(start, 4);
1876 if (enc != XML_CHAR_ENCODING_NONE) {
1877 xmlSwitchEncoding(ctxt, enc);
1878 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001879 }
1880
Owen Taylor3473f882001-02-23 17:55:21 +00001881 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001882 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1883 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 xmlParseTextDecl(ctxt);
1885 }
Owen Taylor3473f882001-02-23 17:55:21 +00001886 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001887 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1888 "PEReference: %s is not a parameter entity\n",
1889 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001890 }
1891 }
1892 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001893 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001894 }
Owen Taylor3473f882001-02-23 17:55:21 +00001895 }
1896}
1897
1898/*
1899 * Macro used to grow the current buffer.
1900 */
1901#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001902 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001903 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001904 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001905 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001906 if (tmp == NULL) goto mem_error; \
1907 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001908}
1909
1910/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001911 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * @ctxt: the parser context
1913 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001914 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001915 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1916 * @end: an end marker xmlChar, 0 if none
1917 * @end2: an end marker xmlChar, 0 if none
1918 * @end3: an end marker xmlChar, 0 if none
1919 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001920 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001921 *
1922 * [67] Reference ::= EntityRef | CharRef
1923 *
1924 * [69] PEReference ::= '%' Name ';'
1925 *
1926 * Returns A newly allocated string with the substitution done. The caller
1927 * must deallocate it !
1928 */
1929xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001930xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1931 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 xmlChar *buffer = NULL;
1933 int buffer_size = 0;
1934
1935 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001936 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001937 xmlEntityPtr ent;
1938 int c,l;
1939 int nbchars = 0;
1940
Daniel Veillarda82b1822004-11-08 16:24:57 +00001941 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001942 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001943 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001944
1945 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001946 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001947 return(NULL);
1948 }
1949
1950 /*
1951 * allocate a translation buffer.
1952 */
1953 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001954 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001955 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001956
1957 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001958 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001959 * we are operating on already parsed values.
1960 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001961 if (str < last)
1962 c = CUR_SCHAR(str, l);
1963 else
1964 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001965 while ((c != 0) && (c != end) && /* non input consuming loop */
1966 (c != end2) && (c != end3)) {
1967
1968 if (c == 0) break;
1969 if ((c == '&') && (str[1] == '#')) {
1970 int val = xmlParseStringCharRef(ctxt, &str);
1971 if (val != 0) {
1972 COPY_BUF(0,buffer,nbchars,val);
1973 }
1974 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1975 if (xmlParserDebugEntities)
1976 xmlGenericError(xmlGenericErrorContext,
1977 "String decoding Entity Reference: %.30s\n",
1978 str);
1979 ent = xmlParseStringEntityRef(ctxt, &str);
1980 if ((ent != NULL) &&
1981 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1982 if (ent->content != NULL) {
1983 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1984 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001985 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1986 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001987 }
1988 } else if ((ent != NULL) && (ent->content != NULL)) {
1989 xmlChar *rep;
1990
1991 ctxt->depth++;
1992 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1993 0, 0, 0);
1994 ctxt->depth--;
1995 if (rep != NULL) {
1996 current = rep;
1997 while (*current != 0) { /* non input consuming loop */
1998 buffer[nbchars++] = *current++;
1999 if (nbchars >
2000 buffer_size - XML_PARSER_BUFFER_SIZE) {
2001 growBuffer(buffer);
2002 }
2003 }
2004 xmlFree(rep);
2005 }
2006 } else if (ent != NULL) {
2007 int i = xmlStrlen(ent->name);
2008 const xmlChar *cur = ent->name;
2009
2010 buffer[nbchars++] = '&';
2011 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2012 growBuffer(buffer);
2013 }
2014 for (;i > 0;i--)
2015 buffer[nbchars++] = *cur++;
2016 buffer[nbchars++] = ';';
2017 }
2018 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2019 if (xmlParserDebugEntities)
2020 xmlGenericError(xmlGenericErrorContext,
2021 "String decoding PE Reference: %.30s\n", str);
2022 ent = xmlParseStringPEReference(ctxt, &str);
2023 if (ent != NULL) {
2024 xmlChar *rep;
2025
2026 ctxt->depth++;
2027 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2028 0, 0, 0);
2029 ctxt->depth--;
2030 if (rep != NULL) {
2031 current = rep;
2032 while (*current != 0) { /* non input consuming loop */
2033 buffer[nbchars++] = *current++;
2034 if (nbchars >
2035 buffer_size - XML_PARSER_BUFFER_SIZE) {
2036 growBuffer(buffer);
2037 }
2038 }
2039 xmlFree(rep);
2040 }
2041 }
2042 } else {
2043 COPY_BUF(l,buffer,nbchars,c);
2044 str += l;
2045 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2046 growBuffer(buffer);
2047 }
2048 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002049 if (str < last)
2050 c = CUR_SCHAR(str, l);
2051 else
2052 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002053 }
2054 buffer[nbchars++] = 0;
2055 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002056
2057mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002058 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002059 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002060}
2061
Daniel Veillarde57ec792003-09-10 10:50:59 +00002062/**
2063 * xmlStringDecodeEntities:
2064 * @ctxt: the parser context
2065 * @str: the input string
2066 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2067 * @end: an end marker xmlChar, 0 if none
2068 * @end2: an end marker xmlChar, 0 if none
2069 * @end3: an end marker xmlChar, 0 if none
2070 *
2071 * Takes a entity string content and process to do the adequate substitutions.
2072 *
2073 * [67] Reference ::= EntityRef | CharRef
2074 *
2075 * [69] PEReference ::= '%' Name ';'
2076 *
2077 * Returns A newly allocated string with the substitution done. The caller
2078 * must deallocate it !
2079 */
2080xmlChar *
2081xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2082 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002083 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002084 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2085 end, end2, end3));
2086}
Owen Taylor3473f882001-02-23 17:55:21 +00002087
2088/************************************************************************
2089 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002090 * Commodity functions, cleanup needed ? *
2091 * *
2092 ************************************************************************/
2093
2094/**
2095 * areBlanks:
2096 * @ctxt: an XML parser context
2097 * @str: a xmlChar *
2098 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002099 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002100 *
2101 * Is this a sequence of blank chars that one can ignore ?
2102 *
2103 * Returns 1 if ignorable 0 otherwise.
2104 */
2105
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002106static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2107 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002108 int i, ret;
2109 xmlNodePtr lastChild;
2110
Daniel Veillard05c13a22001-09-09 08:38:09 +00002111 /*
2112 * Don't spend time trying to differentiate them, the same callback is
2113 * used !
2114 */
2115 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002116 return(0);
2117
Owen Taylor3473f882001-02-23 17:55:21 +00002118 /*
2119 * Check for xml:space value.
2120 */
2121 if (*(ctxt->space) == 1)
2122 return(0);
2123
2124 /*
2125 * Check that the string is made of blanks
2126 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002127 if (blank_chars == 0) {
2128 for (i = 0;i < len;i++)
2129 if (!(IS_BLANK_CH(str[i]))) return(0);
2130 }
Owen Taylor3473f882001-02-23 17:55:21 +00002131
2132 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002133 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002134 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002135 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002136 if (ctxt->myDoc != NULL) {
2137 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2138 if (ret == 0) return(1);
2139 if (ret == 1) return(0);
2140 }
2141
2142 /*
2143 * Otherwise, heuristic :-\
2144 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002145 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002146 if ((ctxt->node->children == NULL) &&
2147 (RAW == '<') && (NXT(1) == '/')) return(0);
2148
2149 lastChild = xmlGetLastChild(ctxt->node);
2150 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002151 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2152 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 } else if (xmlNodeIsText(lastChild))
2154 return(0);
2155 else if ((ctxt->node->children != NULL) &&
2156 (xmlNodeIsText(ctxt->node->children)))
2157 return(0);
2158 return(1);
2159}
2160
Owen Taylor3473f882001-02-23 17:55:21 +00002161/************************************************************************
2162 * *
2163 * Extra stuff for namespace support *
2164 * Relates to http://www.w3.org/TR/WD-xml-names *
2165 * *
2166 ************************************************************************/
2167
2168/**
2169 * xmlSplitQName:
2170 * @ctxt: an XML parser context
2171 * @name: an XML parser context
2172 * @prefix: a xmlChar **
2173 *
2174 * parse an UTF8 encoded XML qualified name string
2175 *
2176 * [NS 5] QName ::= (Prefix ':')? LocalPart
2177 *
2178 * [NS 6] Prefix ::= NCName
2179 *
2180 * [NS 7] LocalPart ::= NCName
2181 *
2182 * Returns the local part, and prefix is updated
2183 * to get the Prefix if any.
2184 */
2185
2186xmlChar *
2187xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2188 xmlChar buf[XML_MAX_NAMELEN + 5];
2189 xmlChar *buffer = NULL;
2190 int len = 0;
2191 int max = XML_MAX_NAMELEN;
2192 xmlChar *ret = NULL;
2193 const xmlChar *cur = name;
2194 int c;
2195
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002196 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002197 *prefix = NULL;
2198
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002199 if (cur == NULL) return(NULL);
2200
Owen Taylor3473f882001-02-23 17:55:21 +00002201#ifndef XML_XML_NAMESPACE
2202 /* xml: prefix is not really a namespace */
2203 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2204 (cur[2] == 'l') && (cur[3] == ':'))
2205 return(xmlStrdup(name));
2206#endif
2207
Daniel Veillard597bc482003-07-24 16:08:28 +00002208 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002209 if (cur[0] == ':')
2210 return(xmlStrdup(name));
2211
2212 c = *cur++;
2213 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2214 buf[len++] = c;
2215 c = *cur++;
2216 }
2217 if (len >= max) {
2218 /*
2219 * Okay someone managed to make a huge name, so he's ready to pay
2220 * for the processing speed.
2221 */
2222 max = len * 2;
2223
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002224 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002225 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002226 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002227 return(NULL);
2228 }
2229 memcpy(buffer, buf, len);
2230 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2231 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002232 xmlChar *tmp;
2233
Owen Taylor3473f882001-02-23 17:55:21 +00002234 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002235 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002236 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002237 if (tmp == NULL) {
2238 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002239 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002240 return(NULL);
2241 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002242 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002243 }
2244 buffer[len++] = c;
2245 c = *cur++;
2246 }
2247 buffer[len] = 0;
2248 }
2249
Daniel Veillard597bc482003-07-24 16:08:28 +00002250 /* nasty but well=formed
2251 if ((c == ':') && (*cur == 0)) {
2252 return(xmlStrdup(name));
2253 } */
2254
Owen Taylor3473f882001-02-23 17:55:21 +00002255 if (buffer == NULL)
2256 ret = xmlStrndup(buf, len);
2257 else {
2258 ret = buffer;
2259 buffer = NULL;
2260 max = XML_MAX_NAMELEN;
2261 }
2262
2263
2264 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002265 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002266 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002267 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002268 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 len = 0;
2271
Daniel Veillardbb284f42002-10-16 18:02:47 +00002272 /*
2273 * Check that the first character is proper to start
2274 * a new name
2275 */
2276 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2277 ((c >= 0x41) && (c <= 0x5A)) ||
2278 (c == '_') || (c == ':'))) {
2279 int l;
2280 int first = CUR_SCHAR(cur, l);
2281
2282 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002283 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002284 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002285 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002286 }
2287 }
2288 cur++;
2289
Owen Taylor3473f882001-02-23 17:55:21 +00002290 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2291 buf[len++] = c;
2292 c = *cur++;
2293 }
2294 if (len >= max) {
2295 /*
2296 * Okay someone managed to make a huge name, so he's ready to pay
2297 * for the processing speed.
2298 */
2299 max = len * 2;
2300
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002301 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002302 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002303 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304 return(NULL);
2305 }
2306 memcpy(buffer, buf, len);
2307 while (c != 0) { /* tested bigname2.xml */
2308 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002309 xmlChar *tmp;
2310
Owen Taylor3473f882001-02-23 17:55:21 +00002311 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002312 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002313 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002314 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002315 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002316 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002317 return(NULL);
2318 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002319 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002320 }
2321 buffer[len++] = c;
2322 c = *cur++;
2323 }
2324 buffer[len] = 0;
2325 }
2326
2327 if (buffer == NULL)
2328 ret = xmlStrndup(buf, len);
2329 else {
2330 ret = buffer;
2331 }
2332 }
2333
2334 return(ret);
2335}
2336
2337/************************************************************************
2338 * *
2339 * The parser itself *
2340 * Relates to http://www.w3.org/TR/REC-xml *
2341 * *
2342 ************************************************************************/
2343
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002344static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002345static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002346 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002347
Owen Taylor3473f882001-02-23 17:55:21 +00002348/**
2349 * xmlParseName:
2350 * @ctxt: an XML parser context
2351 *
2352 * parse an XML name.
2353 *
2354 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2355 * CombiningChar | Extender
2356 *
2357 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2358 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002359 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002360 *
2361 * Returns the Name parsed or NULL
2362 */
2363
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002364const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002365xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002366 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002367 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002368 int count = 0;
2369
2370 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002371
2372 /*
2373 * Accelerator for simple ASCII names
2374 */
2375 in = ctxt->input->cur;
2376 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2377 ((*in >= 0x41) && (*in <= 0x5A)) ||
2378 (*in == '_') || (*in == ':')) {
2379 in++;
2380 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2381 ((*in >= 0x41) && (*in <= 0x5A)) ||
2382 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002383 (*in == '_') || (*in == '-') ||
2384 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002385 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002386 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002387 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002388 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002389 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002390 ctxt->nbChars += count;
2391 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002392 if (ret == NULL)
2393 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002394 return(ret);
2395 }
2396 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002397 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002398}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002399
Daniel Veillard46de64e2002-05-29 08:21:33 +00002400/**
2401 * xmlParseNameAndCompare:
2402 * @ctxt: an XML parser context
2403 *
2404 * parse an XML name and compares for match
2405 * (specialized for endtag parsing)
2406 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002407 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2408 * and the name for mismatch
2409 */
2410
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002411static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002412xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002413 register const xmlChar *cmp = other;
2414 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002415 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002416
2417 GROW;
2418
2419 in = ctxt->input->cur;
2420 while (*in != 0 && *in == *cmp) {
2421 ++in;
2422 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002423 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002424 }
William M. Brack76e95df2003-10-18 16:20:14 +00002425 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002426 /* success */
2427 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002428 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002429 }
2430 /* failure (or end of input buffer), check with full function */
2431 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002432 /* strings coming from the dictionnary direct compare possible */
2433 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002434 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002435 }
2436 return ret;
2437}
2438
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002439static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002440xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002441 int len = 0, l;
2442 int c;
2443 int count = 0;
2444
2445 /*
2446 * Handler for more complex cases
2447 */
2448 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002449 c = CUR_CHAR(l);
2450 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2451 (!IS_LETTER(c) && (c != '_') &&
2452 (c != ':'))) {
2453 return(NULL);
2454 }
2455
2456 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002457 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002458 (c == '.') || (c == '-') ||
2459 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002460 (IS_COMBINING(c)) ||
2461 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002462 if (count++ > 100) {
2463 count = 0;
2464 GROW;
2465 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002466 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002467 NEXTL(l);
2468 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002469 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002470 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002471}
2472
2473/**
2474 * xmlParseStringName:
2475 * @ctxt: an XML parser context
2476 * @str: a pointer to the string pointer (IN/OUT)
2477 *
2478 * parse an XML name.
2479 *
2480 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2481 * CombiningChar | Extender
2482 *
2483 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2484 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002485 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002486 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002487 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002488 * is updated to the current location in the string.
2489 */
2490
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002491static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002492xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2493 xmlChar buf[XML_MAX_NAMELEN + 5];
2494 const xmlChar *cur = *str;
2495 int len = 0, l;
2496 int c;
2497
2498 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002499 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002500 (c != ':')) {
2501 return(NULL);
2502 }
2503
William M. Brack871611b2003-10-18 04:53:14 +00002504 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002505 (c == '.') || (c == '-') ||
2506 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002507 (IS_COMBINING(c)) ||
2508 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002509 COPY_BUF(l,buf,len,c);
2510 cur += l;
2511 c = CUR_SCHAR(cur, l);
2512 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2513 /*
2514 * Okay someone managed to make a huge name, so he's ready to pay
2515 * for the processing speed.
2516 */
2517 xmlChar *buffer;
2518 int max = len * 2;
2519
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002520 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002521 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002522 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002523 return(NULL);
2524 }
2525 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002526 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002527 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002528 (c == '.') || (c == '-') ||
2529 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002530 (IS_COMBINING(c)) ||
2531 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002532 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002533 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002534 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002535 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002536 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002537 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002538 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002539 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002540 return(NULL);
2541 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002542 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002543 }
2544 COPY_BUF(l,buffer,len,c);
2545 cur += l;
2546 c = CUR_SCHAR(cur, l);
2547 }
2548 buffer[len] = 0;
2549 *str = cur;
2550 return(buffer);
2551 }
2552 }
2553 *str = cur;
2554 return(xmlStrndup(buf, len));
2555}
2556
2557/**
2558 * xmlParseNmtoken:
2559 * @ctxt: an XML parser context
2560 *
2561 * parse an XML Nmtoken.
2562 *
2563 * [7] Nmtoken ::= (NameChar)+
2564 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002565 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002566 *
2567 * Returns the Nmtoken parsed or NULL
2568 */
2569
2570xmlChar *
2571xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2572 xmlChar buf[XML_MAX_NAMELEN + 5];
2573 int len = 0, l;
2574 int c;
2575 int count = 0;
2576
2577 GROW;
2578 c = CUR_CHAR(l);
2579
William M. Brack871611b2003-10-18 04:53:14 +00002580 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002581 (c == '.') || (c == '-') ||
2582 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002583 (IS_COMBINING(c)) ||
2584 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002585 if (count++ > 100) {
2586 count = 0;
2587 GROW;
2588 }
2589 COPY_BUF(l,buf,len,c);
2590 NEXTL(l);
2591 c = CUR_CHAR(l);
2592 if (len >= XML_MAX_NAMELEN) {
2593 /*
2594 * Okay someone managed to make a huge token, so he's ready to pay
2595 * for the processing speed.
2596 */
2597 xmlChar *buffer;
2598 int max = len * 2;
2599
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002600 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002601 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002602 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002603 return(NULL);
2604 }
2605 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002606 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002607 (c == '.') || (c == '-') ||
2608 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002609 (IS_COMBINING(c)) ||
2610 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002611 if (count++ > 100) {
2612 count = 0;
2613 GROW;
2614 }
2615 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002616 xmlChar *tmp;
2617
Owen Taylor3473f882001-02-23 17:55:21 +00002618 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002619 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002620 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002621 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002622 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002623 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002624 return(NULL);
2625 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002626 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002627 }
2628 COPY_BUF(l,buffer,len,c);
2629 NEXTL(l);
2630 c = CUR_CHAR(l);
2631 }
2632 buffer[len] = 0;
2633 return(buffer);
2634 }
2635 }
2636 if (len == 0)
2637 return(NULL);
2638 return(xmlStrndup(buf, len));
2639}
2640
2641/**
2642 * xmlParseEntityValue:
2643 * @ctxt: an XML parser context
2644 * @orig: if non-NULL store a copy of the original entity value
2645 *
2646 * parse a value for ENTITY declarations
2647 *
2648 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2649 * "'" ([^%&'] | PEReference | Reference)* "'"
2650 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002651 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002652 */
2653
2654xmlChar *
2655xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2656 xmlChar *buf = NULL;
2657 int len = 0;
2658 int size = XML_PARSER_BUFFER_SIZE;
2659 int c, l;
2660 xmlChar stop;
2661 xmlChar *ret = NULL;
2662 const xmlChar *cur = NULL;
2663 xmlParserInputPtr input;
2664
2665 if (RAW == '"') stop = '"';
2666 else if (RAW == '\'') stop = '\'';
2667 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002668 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002669 return(NULL);
2670 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002671 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002672 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002673 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002674 return(NULL);
2675 }
2676
2677 /*
2678 * The content of the entity definition is copied in a buffer.
2679 */
2680
2681 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2682 input = ctxt->input;
2683 GROW;
2684 NEXT;
2685 c = CUR_CHAR(l);
2686 /*
2687 * NOTE: 4.4.5 Included in Literal
2688 * When a parameter entity reference appears in a literal entity
2689 * value, ... a single or double quote character in the replacement
2690 * text is always treated as a normal data character and will not
2691 * terminate the literal.
2692 * In practice it means we stop the loop only when back at parsing
2693 * the initial entity and the quote is found
2694 */
William M. Brack871611b2003-10-18 04:53:14 +00002695 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002696 (ctxt->input != input))) {
2697 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002698 xmlChar *tmp;
2699
Owen Taylor3473f882001-02-23 17:55:21 +00002700 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002701 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2702 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002703 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002704 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002705 return(NULL);
2706 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002707 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002708 }
2709 COPY_BUF(l,buf,len,c);
2710 NEXTL(l);
2711 /*
2712 * Pop-up of finished entities.
2713 */
2714 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2715 xmlPopInput(ctxt);
2716
2717 GROW;
2718 c = CUR_CHAR(l);
2719 if (c == 0) {
2720 GROW;
2721 c = CUR_CHAR(l);
2722 }
2723 }
2724 buf[len] = 0;
2725
2726 /*
2727 * Raise problem w.r.t. '&' and '%' being used in non-entities
2728 * reference constructs. Note Charref will be handled in
2729 * xmlStringDecodeEntities()
2730 */
2731 cur = buf;
2732 while (*cur != 0) { /* non input consuming */
2733 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2734 xmlChar *name;
2735 xmlChar tmp = *cur;
2736
2737 cur++;
2738 name = xmlParseStringName(ctxt, &cur);
2739 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002740 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002741 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002742 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002743 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002744 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2745 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002746 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002747 }
2748 if (name != NULL)
2749 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002750 if (*cur == 0)
2751 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002752 }
2753 cur++;
2754 }
2755
2756 /*
2757 * Then PEReference entities are substituted.
2758 */
2759 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002760 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002761 xmlFree(buf);
2762 } else {
2763 NEXT;
2764 /*
2765 * NOTE: 4.4.7 Bypassed
2766 * When a general entity reference appears in the EntityValue in
2767 * an entity declaration, it is bypassed and left as is.
2768 * so XML_SUBSTITUTE_REF is not set here.
2769 */
2770 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2771 0, 0, 0);
2772 if (orig != NULL)
2773 *orig = buf;
2774 else
2775 xmlFree(buf);
2776 }
2777
2778 return(ret);
2779}
2780
2781/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002782 * xmlParseAttValueComplex:
2783 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002784 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002785 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002786 *
2787 * parse a value for an attribute, this is the fallback function
2788 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002789 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002790 *
2791 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2792 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002793static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002794xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002795 xmlChar limit = 0;
2796 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002797 int len = 0;
2798 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002799 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002800 xmlChar *current = NULL;
2801 xmlEntityPtr ent;
2802
Owen Taylor3473f882001-02-23 17:55:21 +00002803 if (NXT(0) == '"') {
2804 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2805 limit = '"';
2806 NEXT;
2807 } else if (NXT(0) == '\'') {
2808 limit = '\'';
2809 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2810 NEXT;
2811 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002812 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002813 return(NULL);
2814 }
2815
2816 /*
2817 * allocate a translation buffer.
2818 */
2819 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002820 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002821 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002822
2823 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002824 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002825 */
2826 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002827 while ((NXT(0) != limit) && /* checked */
2828 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002829 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002830 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002831 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002832 if (NXT(1) == '#') {
2833 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002834
Owen Taylor3473f882001-02-23 17:55:21 +00002835 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002836 if (ctxt->replaceEntities) {
2837 if (len > buf_size - 10) {
2838 growBuffer(buf);
2839 }
2840 buf[len++] = '&';
2841 } else {
2842 /*
2843 * The reparsing will be done in xmlStringGetNodeList()
2844 * called by the attribute() function in SAX.c
2845 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002846 if (len > buf_size - 10) {
2847 growBuffer(buf);
2848 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002849 buf[len++] = '&';
2850 buf[len++] = '#';
2851 buf[len++] = '3';
2852 buf[len++] = '8';
2853 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002854 }
2855 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002856 if (len > buf_size - 10) {
2857 growBuffer(buf);
2858 }
Owen Taylor3473f882001-02-23 17:55:21 +00002859 len += xmlCopyChar(0, &buf[len], val);
2860 }
2861 } else {
2862 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002863 if ((ent != NULL) &&
2864 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2865 if (len > buf_size - 10) {
2866 growBuffer(buf);
2867 }
2868 if ((ctxt->replaceEntities == 0) &&
2869 (ent->content[0] == '&')) {
2870 buf[len++] = '&';
2871 buf[len++] = '#';
2872 buf[len++] = '3';
2873 buf[len++] = '8';
2874 buf[len++] = ';';
2875 } else {
2876 buf[len++] = ent->content[0];
2877 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002878 } else if ((ent != NULL) &&
2879 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002880 xmlChar *rep;
2881
2882 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2883 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002884 XML_SUBSTITUTE_REF,
2885 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (rep != NULL) {
2887 current = rep;
2888 while (*current != 0) { /* non input consuming */
2889 buf[len++] = *current++;
2890 if (len > buf_size - 10) {
2891 growBuffer(buf);
2892 }
2893 }
2894 xmlFree(rep);
2895 }
2896 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002897 if (len > buf_size - 10) {
2898 growBuffer(buf);
2899 }
Owen Taylor3473f882001-02-23 17:55:21 +00002900 if (ent->content != NULL)
2901 buf[len++] = ent->content[0];
2902 }
2903 } else if (ent != NULL) {
2904 int i = xmlStrlen(ent->name);
2905 const xmlChar *cur = ent->name;
2906
2907 /*
2908 * This may look absurd but is needed to detect
2909 * entities problems
2910 */
2911 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2912 (ent->content != NULL)) {
2913 xmlChar *rep;
2914 rep = xmlStringDecodeEntities(ctxt, ent->content,
2915 XML_SUBSTITUTE_REF, 0, 0, 0);
2916 if (rep != NULL)
2917 xmlFree(rep);
2918 }
2919
2920 /*
2921 * Just output the reference
2922 */
2923 buf[len++] = '&';
2924 if (len > buf_size - i - 10) {
2925 growBuffer(buf);
2926 }
2927 for (;i > 0;i--)
2928 buf[len++] = *cur++;
2929 buf[len++] = ';';
2930 }
2931 }
2932 } else {
2933 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002934 if ((len != 0) || (!normalize)) {
2935 if ((!normalize) || (!in_space)) {
2936 COPY_BUF(l,buf,len,0x20);
2937 if (len > buf_size - 10) {
2938 growBuffer(buf);
2939 }
2940 }
2941 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002942 }
2943 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002944 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002945 COPY_BUF(l,buf,len,c);
2946 if (len > buf_size - 10) {
2947 growBuffer(buf);
2948 }
2949 }
2950 NEXTL(l);
2951 }
2952 GROW;
2953 c = CUR_CHAR(l);
2954 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002955 if ((in_space) && (normalize)) {
2956 while (buf[len - 1] == 0x20) len--;
2957 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002958 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002959 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002962 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2963 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002964 } else
2965 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002966 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002967 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002968
2969mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002970 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002971 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002972}
2973
2974/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002975 * xmlParseAttValue:
2976 * @ctxt: an XML parser context
2977 *
2978 * parse a value for an attribute
2979 * Note: the parser won't do substitution of entities here, this
2980 * will be handled later in xmlStringGetNodeList
2981 *
2982 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2983 * "'" ([^<&'] | Reference)* "'"
2984 *
2985 * 3.3.3 Attribute-Value Normalization:
2986 * Before the value of an attribute is passed to the application or
2987 * checked for validity, the XML processor must normalize it as follows:
2988 * - a character reference is processed by appending the referenced
2989 * character to the attribute value
2990 * - an entity reference is processed by recursively processing the
2991 * replacement text of the entity
2992 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2993 * appending #x20 to the normalized value, except that only a single
2994 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2995 * parsed entity or the literal entity value of an internal parsed entity
2996 * - other characters are processed by appending them to the normalized value
2997 * If the declared value is not CDATA, then the XML processor must further
2998 * process the normalized attribute value by discarding any leading and
2999 * trailing space (#x20) characters, and by replacing sequences of space
3000 * (#x20) characters by a single space (#x20) character.
3001 * All attributes for which no declaration has been read should be treated
3002 * by a non-validating parser as if declared CDATA.
3003 *
3004 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3005 */
3006
3007
3008xmlChar *
3009xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003010 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003011 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003012}
3013
3014/**
Owen Taylor3473f882001-02-23 17:55:21 +00003015 * xmlParseSystemLiteral:
3016 * @ctxt: an XML parser context
3017 *
3018 * parse an XML Literal
3019 *
3020 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3021 *
3022 * Returns the SystemLiteral parsed or NULL
3023 */
3024
3025xmlChar *
3026xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3027 xmlChar *buf = NULL;
3028 int len = 0;
3029 int size = XML_PARSER_BUFFER_SIZE;
3030 int cur, l;
3031 xmlChar stop;
3032 int state = ctxt->instate;
3033 int count = 0;
3034
3035 SHRINK;
3036 if (RAW == '"') {
3037 NEXT;
3038 stop = '"';
3039 } else if (RAW == '\'') {
3040 NEXT;
3041 stop = '\'';
3042 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003043 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003044 return(NULL);
3045 }
3046
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003047 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003048 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003049 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003050 return(NULL);
3051 }
3052 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3053 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003054 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003055 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003056 xmlChar *tmp;
3057
Owen Taylor3473f882001-02-23 17:55:21 +00003058 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003059 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3060 if (tmp == NULL) {
3061 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003062 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003063 ctxt->instate = (xmlParserInputState) state;
3064 return(NULL);
3065 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003066 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003067 }
3068 count++;
3069 if (count > 50) {
3070 GROW;
3071 count = 0;
3072 }
3073 COPY_BUF(l,buf,len,cur);
3074 NEXTL(l);
3075 cur = CUR_CHAR(l);
3076 if (cur == 0) {
3077 GROW;
3078 SHRINK;
3079 cur = CUR_CHAR(l);
3080 }
3081 }
3082 buf[len] = 0;
3083 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003084 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003085 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003086 } else {
3087 NEXT;
3088 }
3089 return(buf);
3090}
3091
3092/**
3093 * xmlParsePubidLiteral:
3094 * @ctxt: an XML parser context
3095 *
3096 * parse an XML public literal
3097 *
3098 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3099 *
3100 * Returns the PubidLiteral parsed or NULL.
3101 */
3102
3103xmlChar *
3104xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3105 xmlChar *buf = NULL;
3106 int len = 0;
3107 int size = XML_PARSER_BUFFER_SIZE;
3108 xmlChar cur;
3109 xmlChar stop;
3110 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003111 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003112
3113 SHRINK;
3114 if (RAW == '"') {
3115 NEXT;
3116 stop = '"';
3117 } else if (RAW == '\'') {
3118 NEXT;
3119 stop = '\'';
3120 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003121 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003122 return(NULL);
3123 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003124 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003125 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003126 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003127 return(NULL);
3128 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003129 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003130 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003131 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003132 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003133 xmlChar *tmp;
3134
Owen Taylor3473f882001-02-23 17:55:21 +00003135 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003136 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3137 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003138 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003139 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003140 return(NULL);
3141 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003142 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003143 }
3144 buf[len++] = cur;
3145 count++;
3146 if (count > 50) {
3147 GROW;
3148 count = 0;
3149 }
3150 NEXT;
3151 cur = CUR;
3152 if (cur == 0) {
3153 GROW;
3154 SHRINK;
3155 cur = CUR;
3156 }
3157 }
3158 buf[len] = 0;
3159 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003160 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003161 } else {
3162 NEXT;
3163 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003164 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003165 return(buf);
3166}
3167
Daniel Veillard48b2f892001-02-25 16:11:03 +00003168void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003169/**
3170 * xmlParseCharData:
3171 * @ctxt: an XML parser context
3172 * @cdata: int indicating whether we are within a CDATA section
3173 *
3174 * parse a CharData section.
3175 * if we are within a CDATA section ']]>' marks an end of section.
3176 *
3177 * The right angle bracket (>) may be represented using the string "&gt;",
3178 * and must, for compatibility, be escaped using "&gt;" or a character
3179 * reference when it appears in the string "]]>" in content, when that
3180 * string is not marking the end of a CDATA section.
3181 *
3182 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3183 */
3184
3185void
3186xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003187 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003188 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003189 int line = ctxt->input->line;
3190 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003191 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003192
3193 SHRINK;
3194 GROW;
3195 /*
3196 * Accelerated common case where input don't need to be
3197 * modified before passing it to the handler.
3198 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003199 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003200 in = ctxt->input->cur;
3201 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003202get_more_space:
3203 while (*in == 0x20) in++;
3204 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003205 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003206 in++;
3207 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003208 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003209 in++;
3210 }
3211 goto get_more_space;
3212 }
3213 if (*in == '<') {
3214 nbchar = in - ctxt->input->cur;
3215 if (nbchar > 0) {
3216 const xmlChar *tmp = ctxt->input->cur;
3217 ctxt->input->cur = in;
3218
Daniel Veillard34099b42004-11-04 17:34:35 +00003219 if ((ctxt->sax != NULL) &&
3220 (ctxt->sax->ignorableWhitespace !=
3221 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003222 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003223 if (ctxt->sax->ignorableWhitespace != NULL)
3224 ctxt->sax->ignorableWhitespace(ctxt->userData,
3225 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003226 } else if (ctxt->sax->characters != NULL)
3227 ctxt->sax->characters(ctxt->userData,
3228 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003229 } else if ((ctxt->sax != NULL) &&
3230 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003231 ctxt->sax->characters(ctxt->userData,
3232 tmp, nbchar);
3233 }
3234 }
3235 return;
3236 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003237
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003238get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003239 ccol = ctxt->input->col;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003240 while (((*in > ']') && (*in <= 0x7F)) ||
3241 ((*in > '&') && (*in < '<')) ||
3242 ((*in > '<') && (*in < ']')) ||
3243 ((*in >= 0x20) && (*in < '&')) ||
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003244 (*in == 0x09)) {
3245 in++;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003246 ccol++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003247 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003248 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003249 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003250 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003251 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003252 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003253 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003254 in++;
3255 }
3256 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003257 }
3258 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003259 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003260 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003261 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003262 return;
3263 }
3264 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003265 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003266 goto get_more;
3267 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003268 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003269 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003270 if ((ctxt->sax != NULL) &&
3271 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003272 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003273 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003274 const xmlChar *tmp = ctxt->input->cur;
3275 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003276
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003277 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003278 if (ctxt->sax->ignorableWhitespace != NULL)
3279 ctxt->sax->ignorableWhitespace(ctxt->userData,
3280 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003281 } else if (ctxt->sax->characters != NULL)
3282 ctxt->sax->characters(ctxt->userData,
3283 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003284 line = ctxt->input->line;
3285 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003286 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003287 if (ctxt->sax->characters != NULL)
3288 ctxt->sax->characters(ctxt->userData,
3289 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003290 line = ctxt->input->line;
3291 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003292 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003293 }
3294 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003295 if (*in == 0xD) {
3296 in++;
3297 if (*in == 0xA) {
3298 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003299 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003300 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003301 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003302 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003303 in--;
3304 }
3305 if (*in == '<') {
3306 return;
3307 }
3308 if (*in == '&') {
3309 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003310 }
3311 SHRINK;
3312 GROW;
3313 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003314 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003315 nbchar = 0;
3316 }
Daniel Veillard50582112001-03-26 22:52:16 +00003317 ctxt->input->line = line;
3318 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003319 xmlParseCharDataComplex(ctxt, cdata);
3320}
3321
Daniel Veillard01c13b52002-12-10 15:19:08 +00003322/**
3323 * xmlParseCharDataComplex:
3324 * @ctxt: an XML parser context
3325 * @cdata: int indicating whether we are within a CDATA section
3326 *
3327 * parse a CharData section.this is the fallback function
3328 * of xmlParseCharData() when the parsing requires handling
3329 * of non-ASCII characters.
3330 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003331void
3332xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003333 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3334 int nbchar = 0;
3335 int cur, l;
3336 int count = 0;
3337
3338 SHRINK;
3339 GROW;
3340 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003341 while ((cur != '<') && /* checked */
3342 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003343 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003344 if ((cur == ']') && (NXT(1) == ']') &&
3345 (NXT(2) == '>')) {
3346 if (cdata) break;
3347 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003348 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003349 }
3350 }
3351 COPY_BUF(l,buf,nbchar,cur);
3352 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003353 buf[nbchar] = 0;
3354
Owen Taylor3473f882001-02-23 17:55:21 +00003355 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003356 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003357 */
3358 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003359 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003360 if (ctxt->sax->ignorableWhitespace != NULL)
3361 ctxt->sax->ignorableWhitespace(ctxt->userData,
3362 buf, nbchar);
3363 } else {
3364 if (ctxt->sax->characters != NULL)
3365 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3366 }
3367 }
3368 nbchar = 0;
3369 }
3370 count++;
3371 if (count > 50) {
3372 GROW;
3373 count = 0;
3374 }
3375 NEXTL(l);
3376 cur = CUR_CHAR(l);
3377 }
3378 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003379 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003380 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003381 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003382 */
3383 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003384 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003385 if (ctxt->sax->ignorableWhitespace != NULL)
3386 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3387 } else {
3388 if (ctxt->sax->characters != NULL)
3389 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3390 }
3391 }
3392 }
3393}
3394
3395/**
3396 * xmlParseExternalID:
3397 * @ctxt: an XML parser context
3398 * @publicID: a xmlChar** receiving PubidLiteral
3399 * @strict: indicate whether we should restrict parsing to only
3400 * production [75], see NOTE below
3401 *
3402 * Parse an External ID or a Public ID
3403 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003404 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003405 * 'PUBLIC' S PubidLiteral S SystemLiteral
3406 *
3407 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3408 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3409 *
3410 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3411 *
3412 * Returns the function returns SystemLiteral and in the second
3413 * case publicID receives PubidLiteral, is strict is off
3414 * it is possible to return NULL and have publicID set.
3415 */
3416
3417xmlChar *
3418xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3419 xmlChar *URI = NULL;
3420
3421 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003422
3423 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003424 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003425 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003426 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003427 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3428 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003429 }
3430 SKIP_BLANKS;
3431 URI = xmlParseSystemLiteral(ctxt);
3432 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003433 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003434 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003435 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003436 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003437 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003438 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003439 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003440 }
3441 SKIP_BLANKS;
3442 *publicID = xmlParsePubidLiteral(ctxt);
3443 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003444 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003445 }
3446 if (strict) {
3447 /*
3448 * We don't handle [83] so "S SystemLiteral" is required.
3449 */
William M. Brack76e95df2003-10-18 16:20:14 +00003450 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003451 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003452 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003453 }
3454 } else {
3455 /*
3456 * We handle [83] so we return immediately, if
3457 * "S SystemLiteral" is not detected. From a purely parsing
3458 * point of view that's a nice mess.
3459 */
3460 const xmlChar *ptr;
3461 GROW;
3462
3463 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003464 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003465
William M. Brack76e95df2003-10-18 16:20:14 +00003466 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003467 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3468 }
3469 SKIP_BLANKS;
3470 URI = xmlParseSystemLiteral(ctxt);
3471 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003472 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003473 }
3474 }
3475 return(URI);
3476}
3477
3478/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003479 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003480 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003481 * @buf: the already parsed part of the buffer
3482 * @len: number of bytes filles in the buffer
3483 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003484 *
3485 * Skip an XML (SGML) comment <!-- .... -->
3486 * The spec says that "For compatibility, the string "--" (double-hyphen)
3487 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003488 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003489 *
3490 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3491 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003492static void
3493xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003494 int q, ql;
3495 int r, rl;
3496 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003497 xmlParserInputPtr input = ctxt->input;
3498 int count = 0;
3499
Owen Taylor3473f882001-02-23 17:55:21 +00003500 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003501 len = 0;
3502 size = XML_PARSER_BUFFER_SIZE;
3503 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3504 if (buf == NULL) {
3505 xmlErrMemory(ctxt, NULL);
3506 return;
3507 }
Owen Taylor3473f882001-02-23 17:55:21 +00003508 }
3509 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003510 if (q == 0)
3511 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003512 NEXTL(ql);
3513 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003514 if (r == 0)
3515 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003516 NEXTL(rl);
3517 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003518 if (cur == 0)
3519 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003520 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003521 ((cur != '>') ||
3522 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003523 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003524 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003525 }
3526 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003527 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003528 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003529 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3530 if (new_buf == NULL) {
3531 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003532 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003533 return;
3534 }
William M. Bracka3215c72004-07-31 16:24:01 +00003535 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003536 }
3537 COPY_BUF(ql,buf,len,q);
3538 q = r;
3539 ql = rl;
3540 r = cur;
3541 rl = l;
3542
3543 count++;
3544 if (count > 50) {
3545 GROW;
3546 count = 0;
3547 }
3548 NEXTL(l);
3549 cur = CUR_CHAR(l);
3550 if (cur == 0) {
3551 SHRINK;
3552 GROW;
3553 cur = CUR_CHAR(l);
3554 }
3555 }
3556 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003557 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003558 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003559 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003560 xmlFree(buf);
3561 } else {
3562 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003563 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3564 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003565 }
3566 NEXT;
3567 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3568 (!ctxt->disableSAX))
3569 ctxt->sax->comment(ctxt->userData, buf);
3570 xmlFree(buf);
3571 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003572 return;
3573not_terminated:
3574 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3575 "Comment not terminated\n", NULL);
3576 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003577}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003578/**
3579 * xmlParseComment:
3580 * @ctxt: an XML parser context
3581 *
3582 * Skip an XML (SGML) comment <!-- .... -->
3583 * The spec says that "For compatibility, the string "--" (double-hyphen)
3584 * must not occur within comments. "
3585 *
3586 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3587 */
3588void
3589xmlParseComment(xmlParserCtxtPtr ctxt) {
3590 xmlChar *buf = NULL;
3591 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003592 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003593 xmlParserInputState state;
3594 const xmlChar *in;
3595 int nbchar = 0, ccol;
3596
3597 /*
3598 * Check that there is a comment right here.
3599 */
3600 if ((RAW != '<') || (NXT(1) != '!') ||
3601 (NXT(2) != '-') || (NXT(3) != '-')) return;
3602
3603 state = ctxt->instate;
3604 ctxt->instate = XML_PARSER_COMMENT;
3605 SKIP(4);
3606 SHRINK;
3607 GROW;
3608
3609 /*
3610 * Accelerated common case where input don't need to be
3611 * modified before passing it to the handler.
3612 */
3613 in = ctxt->input->cur;
3614 do {
3615 if (*in == 0xA) {
3616 ctxt->input->line++; ctxt->input->col = 1;
3617 in++;
3618 while (*in == 0xA) {
3619 ctxt->input->line++; ctxt->input->col = 1;
3620 in++;
3621 }
3622 }
3623get_more:
3624 ccol = ctxt->input->col;
3625 while (((*in > '-') && (*in <= 0x7F)) ||
3626 ((*in >= 0x20) && (*in < '-')) ||
3627 (*in == 0x09)) {
3628 in++;
3629 ccol++;
3630 }
3631 ctxt->input->col = ccol;
3632 if (*in == 0xA) {
3633 ctxt->input->line++; ctxt->input->col = 1;
3634 in++;
3635 while (*in == 0xA) {
3636 ctxt->input->line++; ctxt->input->col = 1;
3637 in++;
3638 }
3639 goto get_more;
3640 }
3641 nbchar = in - ctxt->input->cur;
3642 /*
3643 * save current set of data
3644 */
3645 if (nbchar > 0) {
3646 if ((ctxt->sax != NULL) &&
3647 (ctxt->sax->comment != NULL)) {
3648 if (buf == NULL) {
3649 if ((*in == '-') && (in[1] == '-'))
3650 size = nbchar + 1;
3651 else
3652 size = XML_PARSER_BUFFER_SIZE + nbchar;
3653 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3654 if (buf == NULL) {
3655 xmlErrMemory(ctxt, NULL);
3656 ctxt->instate = state;
3657 return;
3658 }
3659 len = 0;
3660 } else if (len + nbchar + 1 >= size) {
3661 xmlChar *new_buf;
3662 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3663 new_buf = (xmlChar *) xmlRealloc(buf,
3664 size * sizeof(xmlChar));
3665 if (new_buf == NULL) {
3666 xmlFree (buf);
3667 xmlErrMemory(ctxt, NULL);
3668 ctxt->instate = state;
3669 return;
3670 }
3671 buf = new_buf;
3672 }
3673 memcpy(&buf[len], ctxt->input->cur, nbchar);
3674 len += nbchar;
3675 buf[len] = 0;
3676 }
3677 }
3678 ctxt->input->cur = in;
3679 if (*in == 0xA)
3680
3681 if (*in == 0xD) {
3682 in++;
3683 if (*in == 0xA) {
3684 ctxt->input->cur = in;
3685 in++;
3686 ctxt->input->line++; ctxt->input->col = 1;
3687 continue; /* while */
3688 }
3689 in--;
3690 }
3691 SHRINK;
3692 GROW;
3693 in = ctxt->input->cur;
3694 if (*in == '-') {
3695 if (in[1] == '-') {
3696 if (in[2] == '>') {
3697 SKIP(3);
3698 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3699 (!ctxt->disableSAX)) {
3700 if (buf != NULL)
3701 ctxt->sax->comment(ctxt->userData, buf);
3702 else
3703 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3704 }
3705 if (buf != NULL)
3706 xmlFree(buf);
3707 ctxt->instate = state;
3708 return;
3709 }
3710 if (buf != NULL)
3711 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3712 "Comment not terminated \n<!--%.50s\n",
3713 buf);
3714 else
3715 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3716 "Comment not terminated \n", NULL);
3717 in++;
3718 ctxt->input->col++;
3719 }
3720 in++;
3721 ctxt->input->col++;
3722 goto get_more;
3723 }
3724 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3725 xmlParseCommentComplex(ctxt, buf, len, size);
3726 ctxt->instate = state;
3727 return;
3728}
3729
Owen Taylor3473f882001-02-23 17:55:21 +00003730
3731/**
3732 * xmlParsePITarget:
3733 * @ctxt: an XML parser context
3734 *
3735 * parse the name of a PI
3736 *
3737 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3738 *
3739 * Returns the PITarget name or NULL
3740 */
3741
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003742const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003743xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003744 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003745
3746 name = xmlParseName(ctxt);
3747 if ((name != NULL) &&
3748 ((name[0] == 'x') || (name[0] == 'X')) &&
3749 ((name[1] == 'm') || (name[1] == 'M')) &&
3750 ((name[2] == 'l') || (name[2] == 'L'))) {
3751 int i;
3752 if ((name[0] == 'x') && (name[1] == 'm') &&
3753 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003754 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003755 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003756 return(name);
3757 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003758 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003759 return(name);
3760 }
3761 for (i = 0;;i++) {
3762 if (xmlW3CPIs[i] == NULL) break;
3763 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3764 return(name);
3765 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003766 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3767 "xmlParsePITarget: invalid name prefix 'xml'\n",
3768 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003769 }
3770 return(name);
3771}
3772
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003773#ifdef LIBXML_CATALOG_ENABLED
3774/**
3775 * xmlParseCatalogPI:
3776 * @ctxt: an XML parser context
3777 * @catalog: the PI value string
3778 *
3779 * parse an XML Catalog Processing Instruction.
3780 *
3781 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3782 *
3783 * Occurs only if allowed by the user and if happening in the Misc
3784 * part of the document before any doctype informations
3785 * This will add the given catalog to the parsing context in order
3786 * to be used if there is a resolution need further down in the document
3787 */
3788
3789static void
3790xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3791 xmlChar *URL = NULL;
3792 const xmlChar *tmp, *base;
3793 xmlChar marker;
3794
3795 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003796 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003797 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3798 goto error;
3799 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003800 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003801 if (*tmp != '=') {
3802 return;
3803 }
3804 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003805 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003806 marker = *tmp;
3807 if ((marker != '\'') && (marker != '"'))
3808 goto error;
3809 tmp++;
3810 base = tmp;
3811 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3812 if (*tmp == 0)
3813 goto error;
3814 URL = xmlStrndup(base, tmp - base);
3815 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003816 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003817 if (*tmp != 0)
3818 goto error;
3819
3820 if (URL != NULL) {
3821 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3822 xmlFree(URL);
3823 }
3824 return;
3825
3826error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003827 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3828 "Catalog PI syntax error: %s\n",
3829 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003830 if (URL != NULL)
3831 xmlFree(URL);
3832}
3833#endif
3834
Owen Taylor3473f882001-02-23 17:55:21 +00003835/**
3836 * xmlParsePI:
3837 * @ctxt: an XML parser context
3838 *
3839 * parse an XML Processing Instruction.
3840 *
3841 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3842 *
3843 * The processing is transfered to SAX once parsed.
3844 */
3845
3846void
3847xmlParsePI(xmlParserCtxtPtr ctxt) {
3848 xmlChar *buf = NULL;
3849 int len = 0;
3850 int size = XML_PARSER_BUFFER_SIZE;
3851 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003852 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003853 xmlParserInputState state;
3854 int count = 0;
3855
3856 if ((RAW == '<') && (NXT(1) == '?')) {
3857 xmlParserInputPtr input = ctxt->input;
3858 state = ctxt->instate;
3859 ctxt->instate = XML_PARSER_PI;
3860 /*
3861 * this is a Processing Instruction.
3862 */
3863 SKIP(2);
3864 SHRINK;
3865
3866 /*
3867 * Parse the target name and check for special support like
3868 * namespace.
3869 */
3870 target = xmlParsePITarget(ctxt);
3871 if (target != NULL) {
3872 if ((RAW == '?') && (NXT(1) == '>')) {
3873 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003874 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3875 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003876 }
3877 SKIP(2);
3878
3879 /*
3880 * SAX: PI detected.
3881 */
3882 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3883 (ctxt->sax->processingInstruction != NULL))
3884 ctxt->sax->processingInstruction(ctxt->userData,
3885 target, NULL);
3886 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003887 return;
3888 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003889 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003890 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003891 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003892 ctxt->instate = state;
3893 return;
3894 }
3895 cur = CUR;
3896 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003897 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3898 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003899 }
3900 SKIP_BLANKS;
3901 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003902 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003903 ((cur != '?') || (NXT(1) != '>'))) {
3904 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003905 xmlChar *tmp;
3906
Owen Taylor3473f882001-02-23 17:55:21 +00003907 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003908 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3909 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003910 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003911 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003912 ctxt->instate = state;
3913 return;
3914 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003915 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003916 }
3917 count++;
3918 if (count > 50) {
3919 GROW;
3920 count = 0;
3921 }
3922 COPY_BUF(l,buf,len,cur);
3923 NEXTL(l);
3924 cur = CUR_CHAR(l);
3925 if (cur == 0) {
3926 SHRINK;
3927 GROW;
3928 cur = CUR_CHAR(l);
3929 }
3930 }
3931 buf[len] = 0;
3932 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003933 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3934 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003935 } else {
3936 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003937 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3938 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003939 }
3940 SKIP(2);
3941
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003942#ifdef LIBXML_CATALOG_ENABLED
3943 if (((state == XML_PARSER_MISC) ||
3944 (state == XML_PARSER_START)) &&
3945 (xmlStrEqual(target, XML_CATALOG_PI))) {
3946 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3947 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3948 (allow == XML_CATA_ALLOW_ALL))
3949 xmlParseCatalogPI(ctxt, buf);
3950 }
3951#endif
3952
3953
Owen Taylor3473f882001-02-23 17:55:21 +00003954 /*
3955 * SAX: PI detected.
3956 */
3957 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3958 (ctxt->sax->processingInstruction != NULL))
3959 ctxt->sax->processingInstruction(ctxt->userData,
3960 target, buf);
3961 }
3962 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003963 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003964 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003965 }
3966 ctxt->instate = state;
3967 }
3968}
3969
3970/**
3971 * xmlParseNotationDecl:
3972 * @ctxt: an XML parser context
3973 *
3974 * parse a notation declaration
3975 *
3976 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3977 *
3978 * Hence there is actually 3 choices:
3979 * 'PUBLIC' S PubidLiteral
3980 * 'PUBLIC' S PubidLiteral S SystemLiteral
3981 * and 'SYSTEM' S SystemLiteral
3982 *
3983 * See the NOTE on xmlParseExternalID().
3984 */
3985
3986void
3987xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003988 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003989 xmlChar *Pubid;
3990 xmlChar *Systemid;
3991
Daniel Veillarda07050d2003-10-19 14:46:32 +00003992 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003993 xmlParserInputPtr input = ctxt->input;
3994 SHRINK;
3995 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003996 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003997 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3998 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003999 return;
4000 }
4001 SKIP_BLANKS;
4002
Daniel Veillard76d66f42001-05-16 21:05:17 +00004003 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004004 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004005 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004006 return;
4007 }
William M. Brack76e95df2003-10-18 16:20:14 +00004008 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004009 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004010 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004011 return;
4012 }
4013 SKIP_BLANKS;
4014
4015 /*
4016 * Parse the IDs.
4017 */
4018 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4019 SKIP_BLANKS;
4020
4021 if (RAW == '>') {
4022 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004023 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4024 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004025 }
4026 NEXT;
4027 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4028 (ctxt->sax->notationDecl != NULL))
4029 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4030 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004031 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004032 }
Owen Taylor3473f882001-02-23 17:55:21 +00004033 if (Systemid != NULL) xmlFree(Systemid);
4034 if (Pubid != NULL) xmlFree(Pubid);
4035 }
4036}
4037
4038/**
4039 * xmlParseEntityDecl:
4040 * @ctxt: an XML parser context
4041 *
4042 * parse <!ENTITY declarations
4043 *
4044 * [70] EntityDecl ::= GEDecl | PEDecl
4045 *
4046 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4047 *
4048 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4049 *
4050 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4051 *
4052 * [74] PEDef ::= EntityValue | ExternalID
4053 *
4054 * [76] NDataDecl ::= S 'NDATA' S Name
4055 *
4056 * [ VC: Notation Declared ]
4057 * The Name must match the declared name of a notation.
4058 */
4059
4060void
4061xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004062 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004063 xmlChar *value = NULL;
4064 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004065 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004066 int isParameter = 0;
4067 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004068 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004069
Daniel Veillard4c778d82005-01-23 17:37:44 +00004070 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004071 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004072 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004073 SHRINK;
4074 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004075 skipped = SKIP_BLANKS;
4076 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004077 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4078 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004079 }
Owen Taylor3473f882001-02-23 17:55:21 +00004080
4081 if (RAW == '%') {
4082 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004083 skipped = SKIP_BLANKS;
4084 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004085 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4086 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004087 }
Owen Taylor3473f882001-02-23 17:55:21 +00004088 isParameter = 1;
4089 }
4090
Daniel Veillard76d66f42001-05-16 21:05:17 +00004091 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004092 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004093 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4094 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004095 return;
4096 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004097 skipped = SKIP_BLANKS;
4098 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004099 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4100 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004101 }
Owen Taylor3473f882001-02-23 17:55:21 +00004102
Daniel Veillardf5582f12002-06-11 10:08:16 +00004103 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004104 /*
4105 * handle the various case of definitions...
4106 */
4107 if (isParameter) {
4108 if ((RAW == '"') || (RAW == '\'')) {
4109 value = xmlParseEntityValue(ctxt, &orig);
4110 if (value) {
4111 if ((ctxt->sax != NULL) &&
4112 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4113 ctxt->sax->entityDecl(ctxt->userData, name,
4114 XML_INTERNAL_PARAMETER_ENTITY,
4115 NULL, NULL, value);
4116 }
4117 } else {
4118 URI = xmlParseExternalID(ctxt, &literal, 1);
4119 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004120 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004121 }
4122 if (URI) {
4123 xmlURIPtr uri;
4124
4125 uri = xmlParseURI((const char *) URI);
4126 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004127 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4128 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004129 /*
4130 * This really ought to be a well formedness error
4131 * but the XML Core WG decided otherwise c.f. issue
4132 * E26 of the XML erratas.
4133 */
Owen Taylor3473f882001-02-23 17:55:21 +00004134 } else {
4135 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004136 /*
4137 * Okay this is foolish to block those but not
4138 * invalid URIs.
4139 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004140 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004141 } else {
4142 if ((ctxt->sax != NULL) &&
4143 (!ctxt->disableSAX) &&
4144 (ctxt->sax->entityDecl != NULL))
4145 ctxt->sax->entityDecl(ctxt->userData, name,
4146 XML_EXTERNAL_PARAMETER_ENTITY,
4147 literal, URI, NULL);
4148 }
4149 xmlFreeURI(uri);
4150 }
4151 }
4152 }
4153 } else {
4154 if ((RAW == '"') || (RAW == '\'')) {
4155 value = xmlParseEntityValue(ctxt, &orig);
4156 if ((ctxt->sax != NULL) &&
4157 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4158 ctxt->sax->entityDecl(ctxt->userData, name,
4159 XML_INTERNAL_GENERAL_ENTITY,
4160 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004161 /*
4162 * For expat compatibility in SAX mode.
4163 */
4164 if ((ctxt->myDoc == NULL) ||
4165 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4166 if (ctxt->myDoc == NULL) {
4167 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4168 }
4169 if (ctxt->myDoc->intSubset == NULL)
4170 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4171 BAD_CAST "fake", NULL, NULL);
4172
Daniel Veillard1af9a412003-08-20 22:54:39 +00004173 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4174 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004175 }
Owen Taylor3473f882001-02-23 17:55:21 +00004176 } else {
4177 URI = xmlParseExternalID(ctxt, &literal, 1);
4178 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004179 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004180 }
4181 if (URI) {
4182 xmlURIPtr uri;
4183
4184 uri = xmlParseURI((const char *)URI);
4185 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004186 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4187 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004188 /*
4189 * This really ought to be a well formedness error
4190 * but the XML Core WG decided otherwise c.f. issue
4191 * E26 of the XML erratas.
4192 */
Owen Taylor3473f882001-02-23 17:55:21 +00004193 } else {
4194 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004195 /*
4196 * Okay this is foolish to block those but not
4197 * invalid URIs.
4198 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004199 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004200 }
4201 xmlFreeURI(uri);
4202 }
4203 }
William M. Brack76e95df2003-10-18 16:20:14 +00004204 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004205 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4206 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004207 }
4208 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004209 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004210 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004211 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004212 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4213 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004214 }
4215 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004216 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004217 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4218 (ctxt->sax->unparsedEntityDecl != NULL))
4219 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4220 literal, URI, ndata);
4221 } else {
4222 if ((ctxt->sax != NULL) &&
4223 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4224 ctxt->sax->entityDecl(ctxt->userData, name,
4225 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4226 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004227 /*
4228 * For expat compatibility in SAX mode.
4229 * assuming the entity repalcement was asked for
4230 */
4231 if ((ctxt->replaceEntities != 0) &&
4232 ((ctxt->myDoc == NULL) ||
4233 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4234 if (ctxt->myDoc == NULL) {
4235 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4236 }
4237
4238 if (ctxt->myDoc->intSubset == NULL)
4239 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4240 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004241 xmlSAX2EntityDecl(ctxt, name,
4242 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4243 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004244 }
Owen Taylor3473f882001-02-23 17:55:21 +00004245 }
4246 }
4247 }
4248 SKIP_BLANKS;
4249 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004250 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004251 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004252 } else {
4253 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004254 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4255 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004256 }
4257 NEXT;
4258 }
4259 if (orig != NULL) {
4260 /*
4261 * Ugly mechanism to save the raw entity value.
4262 */
4263 xmlEntityPtr cur = NULL;
4264
4265 if (isParameter) {
4266 if ((ctxt->sax != NULL) &&
4267 (ctxt->sax->getParameterEntity != NULL))
4268 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4269 } else {
4270 if ((ctxt->sax != NULL) &&
4271 (ctxt->sax->getEntity != NULL))
4272 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004273 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004274 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004275 }
Owen Taylor3473f882001-02-23 17:55:21 +00004276 }
4277 if (cur != NULL) {
4278 if (cur->orig != NULL)
4279 xmlFree(orig);
4280 else
4281 cur->orig = orig;
4282 } else
4283 xmlFree(orig);
4284 }
Owen Taylor3473f882001-02-23 17:55:21 +00004285 if (value != NULL) xmlFree(value);
4286 if (URI != NULL) xmlFree(URI);
4287 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004288 }
4289}
4290
4291/**
4292 * xmlParseDefaultDecl:
4293 * @ctxt: an XML parser context
4294 * @value: Receive a possible fixed default value for the attribute
4295 *
4296 * Parse an attribute default declaration
4297 *
4298 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4299 *
4300 * [ VC: Required Attribute ]
4301 * if the default declaration is the keyword #REQUIRED, then the
4302 * attribute must be specified for all elements of the type in the
4303 * attribute-list declaration.
4304 *
4305 * [ VC: Attribute Default Legal ]
4306 * The declared default value must meet the lexical constraints of
4307 * the declared attribute type c.f. xmlValidateAttributeDecl()
4308 *
4309 * [ VC: Fixed Attribute Default ]
4310 * if an attribute has a default value declared with the #FIXED
4311 * keyword, instances of that attribute must match the default value.
4312 *
4313 * [ WFC: No < in Attribute Values ]
4314 * handled in xmlParseAttValue()
4315 *
4316 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4317 * or XML_ATTRIBUTE_FIXED.
4318 */
4319
4320int
4321xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4322 int val;
4323 xmlChar *ret;
4324
4325 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004326 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004327 SKIP(9);
4328 return(XML_ATTRIBUTE_REQUIRED);
4329 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004330 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004331 SKIP(8);
4332 return(XML_ATTRIBUTE_IMPLIED);
4333 }
4334 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004335 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004336 SKIP(6);
4337 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004338 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004339 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4340 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004341 }
4342 SKIP_BLANKS;
4343 }
4344 ret = xmlParseAttValue(ctxt);
4345 ctxt->instate = XML_PARSER_DTD;
4346 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004347 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004348 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004349 } else
4350 *value = ret;
4351 return(val);
4352}
4353
4354/**
4355 * xmlParseNotationType:
4356 * @ctxt: an XML parser context
4357 *
4358 * parse an Notation attribute type.
4359 *
4360 * Note: the leading 'NOTATION' S part has already being parsed...
4361 *
4362 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4363 *
4364 * [ VC: Notation Attributes ]
4365 * Values of this type must match one of the notation names included
4366 * in the declaration; all notation names in the declaration must be declared.
4367 *
4368 * Returns: the notation attribute tree built while parsing
4369 */
4370
4371xmlEnumerationPtr
4372xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004373 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004374 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4375
4376 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004377 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004378 return(NULL);
4379 }
4380 SHRINK;
4381 do {
4382 NEXT;
4383 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004384 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004385 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004386 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4387 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004388 return(ret);
4389 }
4390 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004391 if (cur == NULL) return(ret);
4392 if (last == NULL) ret = last = cur;
4393 else {
4394 last->next = cur;
4395 last = cur;
4396 }
4397 SKIP_BLANKS;
4398 } while (RAW == '|');
4399 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004400 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004401 if ((last != NULL) && (last != ret))
4402 xmlFreeEnumeration(last);
4403 return(ret);
4404 }
4405 NEXT;
4406 return(ret);
4407}
4408
4409/**
4410 * xmlParseEnumerationType:
4411 * @ctxt: an XML parser context
4412 *
4413 * parse an Enumeration attribute type.
4414 *
4415 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4416 *
4417 * [ VC: Enumeration ]
4418 * Values of this type must match one of the Nmtoken tokens in
4419 * the declaration
4420 *
4421 * Returns: the enumeration attribute tree built while parsing
4422 */
4423
4424xmlEnumerationPtr
4425xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4426 xmlChar *name;
4427 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4428
4429 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004430 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004431 return(NULL);
4432 }
4433 SHRINK;
4434 do {
4435 NEXT;
4436 SKIP_BLANKS;
4437 name = xmlParseNmtoken(ctxt);
4438 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004439 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004440 return(ret);
4441 }
4442 cur = xmlCreateEnumeration(name);
4443 xmlFree(name);
4444 if (cur == NULL) return(ret);
4445 if (last == NULL) ret = last = cur;
4446 else {
4447 last->next = cur;
4448 last = cur;
4449 }
4450 SKIP_BLANKS;
4451 } while (RAW == '|');
4452 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004453 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004454 return(ret);
4455 }
4456 NEXT;
4457 return(ret);
4458}
4459
4460/**
4461 * xmlParseEnumeratedType:
4462 * @ctxt: an XML parser context
4463 * @tree: the enumeration tree built while parsing
4464 *
4465 * parse an Enumerated attribute type.
4466 *
4467 * [57] EnumeratedType ::= NotationType | Enumeration
4468 *
4469 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4470 *
4471 *
4472 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4473 */
4474
4475int
4476xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004477 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004478 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004479 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004480 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4481 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004482 return(0);
4483 }
4484 SKIP_BLANKS;
4485 *tree = xmlParseNotationType(ctxt);
4486 if (*tree == NULL) return(0);
4487 return(XML_ATTRIBUTE_NOTATION);
4488 }
4489 *tree = xmlParseEnumerationType(ctxt);
4490 if (*tree == NULL) return(0);
4491 return(XML_ATTRIBUTE_ENUMERATION);
4492}
4493
4494/**
4495 * xmlParseAttributeType:
4496 * @ctxt: an XML parser context
4497 * @tree: the enumeration tree built while parsing
4498 *
4499 * parse the Attribute list def for an element
4500 *
4501 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4502 *
4503 * [55] StringType ::= 'CDATA'
4504 *
4505 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4506 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4507 *
4508 * Validity constraints for attribute values syntax are checked in
4509 * xmlValidateAttributeValue()
4510 *
4511 * [ VC: ID ]
4512 * Values of type ID must match the Name production. A name must not
4513 * appear more than once in an XML document as a value of this type;
4514 * i.e., ID values must uniquely identify the elements which bear them.
4515 *
4516 * [ VC: One ID per Element Type ]
4517 * No element type may have more than one ID attribute specified.
4518 *
4519 * [ VC: ID Attribute Default ]
4520 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4521 *
4522 * [ VC: IDREF ]
4523 * Values of type IDREF must match the Name production, and values
4524 * of type IDREFS must match Names; each IDREF Name must match the value
4525 * of an ID attribute on some element in the XML document; i.e. IDREF
4526 * values must match the value of some ID attribute.
4527 *
4528 * [ VC: Entity Name ]
4529 * Values of type ENTITY must match the Name production, values
4530 * of type ENTITIES must match Names; each Entity Name must match the
4531 * name of an unparsed entity declared in the DTD.
4532 *
4533 * [ VC: Name Token ]
4534 * Values of type NMTOKEN must match the Nmtoken production; values
4535 * of type NMTOKENS must match Nmtokens.
4536 *
4537 * Returns the attribute type
4538 */
4539int
4540xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4541 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004542 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004543 SKIP(5);
4544 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004545 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004546 SKIP(6);
4547 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004548 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004549 SKIP(5);
4550 return(XML_ATTRIBUTE_IDREF);
4551 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4552 SKIP(2);
4553 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004554 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004555 SKIP(6);
4556 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004557 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004558 SKIP(8);
4559 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004560 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004561 SKIP(8);
4562 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004563 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004564 SKIP(7);
4565 return(XML_ATTRIBUTE_NMTOKEN);
4566 }
4567 return(xmlParseEnumeratedType(ctxt, tree));
4568}
4569
4570/**
4571 * xmlParseAttributeListDecl:
4572 * @ctxt: an XML parser context
4573 *
4574 * : parse the Attribute list def for an element
4575 *
4576 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4577 *
4578 * [53] AttDef ::= S Name S AttType S DefaultDecl
4579 *
4580 */
4581void
4582xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004583 const xmlChar *elemName;
4584 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004585 xmlEnumerationPtr tree;
4586
Daniel Veillarda07050d2003-10-19 14:46:32 +00004587 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004588 xmlParserInputPtr input = ctxt->input;
4589
4590 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004591 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004592 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004593 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004594 }
4595 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004596 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004597 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004598 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4599 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004600 return;
4601 }
4602 SKIP_BLANKS;
4603 GROW;
4604 while (RAW != '>') {
4605 const xmlChar *check = CUR_PTR;
4606 int type;
4607 int def;
4608 xmlChar *defaultValue = NULL;
4609
4610 GROW;
4611 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004612 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004613 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004614 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4615 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004616 break;
4617 }
4618 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004619 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004621 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004622 if (defaultValue != NULL)
4623 xmlFree(defaultValue);
4624 break;
4625 }
4626 SKIP_BLANKS;
4627
4628 type = xmlParseAttributeType(ctxt, &tree);
4629 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004630 if (defaultValue != NULL)
4631 xmlFree(defaultValue);
4632 break;
4633 }
4634
4635 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004636 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004637 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4638 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004639 if (defaultValue != NULL)
4640 xmlFree(defaultValue);
4641 if (tree != NULL)
4642 xmlFreeEnumeration(tree);
4643 break;
4644 }
4645 SKIP_BLANKS;
4646
4647 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4648 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004649 if (defaultValue != NULL)
4650 xmlFree(defaultValue);
4651 if (tree != NULL)
4652 xmlFreeEnumeration(tree);
4653 break;
4654 }
4655
4656 GROW;
4657 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004658 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004659 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004660 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004661 if (defaultValue != NULL)
4662 xmlFree(defaultValue);
4663 if (tree != NULL)
4664 xmlFreeEnumeration(tree);
4665 break;
4666 }
4667 SKIP_BLANKS;
4668 }
4669 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004670 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4671 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004672 if (defaultValue != NULL)
4673 xmlFree(defaultValue);
4674 if (tree != NULL)
4675 xmlFreeEnumeration(tree);
4676 break;
4677 }
4678 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4679 (ctxt->sax->attributeDecl != NULL))
4680 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4681 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004682 else if (tree != NULL)
4683 xmlFreeEnumeration(tree);
4684
4685 if ((ctxt->sax2) && (defaultValue != NULL) &&
4686 (def != XML_ATTRIBUTE_IMPLIED) &&
4687 (def != XML_ATTRIBUTE_REQUIRED)) {
4688 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4689 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004690 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4691 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4692 }
Owen Taylor3473f882001-02-23 17:55:21 +00004693 if (defaultValue != NULL)
4694 xmlFree(defaultValue);
4695 GROW;
4696 }
4697 if (RAW == '>') {
4698 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004699 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4700 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004701 }
4702 NEXT;
4703 }
Owen Taylor3473f882001-02-23 17:55:21 +00004704 }
4705}
4706
4707/**
4708 * xmlParseElementMixedContentDecl:
4709 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004710 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004711 *
4712 * parse the declaration for a Mixed Element content
4713 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4714 *
4715 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4716 * '(' S? '#PCDATA' S? ')'
4717 *
4718 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4719 *
4720 * [ VC: No Duplicate Types ]
4721 * The same name must not appear more than once in a single
4722 * mixed-content declaration.
4723 *
4724 * returns: the list of the xmlElementContentPtr describing the element choices
4725 */
4726xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004727xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004728 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004729 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004730
4731 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004732 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004733 SKIP(7);
4734 SKIP_BLANKS;
4735 SHRINK;
4736 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004737 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004738 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4739"Element content declaration doesn't start and stop in the same entity\n",
4740 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004741 }
Owen Taylor3473f882001-02-23 17:55:21 +00004742 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004743 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004744 if (RAW == '*') {
4745 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4746 NEXT;
4747 }
4748 return(ret);
4749 }
4750 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004751 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004752 if (ret == NULL) return(NULL);
4753 }
4754 while (RAW == '|') {
4755 NEXT;
4756 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004757 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004758 if (ret == NULL) return(NULL);
4759 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004760 if (cur != NULL)
4761 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004762 cur = ret;
4763 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004764 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004765 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004766 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004767 if (n->c1 != NULL)
4768 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004769 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004770 if (n != NULL)
4771 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004772 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004773 }
4774 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004775 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004776 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004777 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004778 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004779 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004780 return(NULL);
4781 }
4782 SKIP_BLANKS;
4783 GROW;
4784 }
4785 if ((RAW == ')') && (NXT(1) == '*')) {
4786 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004787 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00004788 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004789 if (cur->c2 != NULL)
4790 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004791 }
4792 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004793 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004794 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4795"Element content declaration doesn't start and stop in the same entity\n",
4796 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004797 }
Owen Taylor3473f882001-02-23 17:55:21 +00004798 SKIP(2);
4799 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004800 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004801 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004802 return(NULL);
4803 }
4804
4805 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004806 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004807 }
4808 return(ret);
4809}
4810
4811/**
4812 * xmlParseElementChildrenContentDecl:
4813 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004814 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004815 *
4816 * parse the declaration for a Mixed Element content
4817 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4818 *
4819 *
4820 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4821 *
4822 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4823 *
4824 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4825 *
4826 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4827 *
4828 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4829 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004830 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004831 * opening or closing parentheses in a choice, seq, or Mixed
4832 * construct is contained in the replacement text for a parameter
4833 * entity, both must be contained in the same replacement text. For
4834 * interoperability, if a parameter-entity reference appears in a
4835 * choice, seq, or Mixed construct, its replacement text should not
4836 * be empty, and neither the first nor last non-blank character of
4837 * the replacement text should be a connector (| or ,).
4838 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004839 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004840 * hierarchy.
4841 */
4842xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004843xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004844 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004845 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004846 xmlChar type = 0;
4847
4848 SKIP_BLANKS;
4849 GROW;
4850 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004851 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004852
Owen Taylor3473f882001-02-23 17:55:21 +00004853 /* Recurse on first child */
4854 NEXT;
4855 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004856 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004857 SKIP_BLANKS;
4858 GROW;
4859 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004860 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004861 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004862 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004863 return(NULL);
4864 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004865 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004866 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004867 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004868 return(NULL);
4869 }
Owen Taylor3473f882001-02-23 17:55:21 +00004870 GROW;
4871 if (RAW == '?') {
4872 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4873 NEXT;
4874 } else if (RAW == '*') {
4875 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4876 NEXT;
4877 } else if (RAW == '+') {
4878 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4879 NEXT;
4880 } else {
4881 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4882 }
Owen Taylor3473f882001-02-23 17:55:21 +00004883 GROW;
4884 }
4885 SKIP_BLANKS;
4886 SHRINK;
4887 while (RAW != ')') {
4888 /*
4889 * Each loop we parse one separator and one element.
4890 */
4891 if (RAW == ',') {
4892 if (type == 0) type = CUR;
4893
4894 /*
4895 * Detect "Name | Name , Name" error
4896 */
4897 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004898 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004899 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004900 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004901 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004902 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004903 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004904 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004905 return(NULL);
4906 }
4907 NEXT;
4908
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004909 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00004910 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004911 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004912 xmlFreeDocElementContent(ctxt->myDoc, last);
4913 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004914 return(NULL);
4915 }
4916 if (last == NULL) {
4917 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004918 if (ret != NULL)
4919 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004920 ret = cur = op;
4921 } else {
4922 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004923 if (op != NULL)
4924 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004925 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004926 if (last != NULL)
4927 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004928 cur =op;
4929 last = NULL;
4930 }
4931 } else if (RAW == '|') {
4932 if (type == 0) type = CUR;
4933
4934 /*
4935 * Detect "Name , Name | Name" error
4936 */
4937 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004938 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004939 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004940 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004941 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004942 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004943 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004944 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004945 return(NULL);
4946 }
4947 NEXT;
4948
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004949 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004950 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004951 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004952 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004953 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004954 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004955 return(NULL);
4956 }
4957 if (last == NULL) {
4958 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004959 if (ret != NULL)
4960 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004961 ret = cur = op;
4962 } else {
4963 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004964 if (op != NULL)
4965 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004966 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004967 if (last != NULL)
4968 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004969 cur =op;
4970 last = NULL;
4971 }
4972 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004973 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004974 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004975 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004976 return(NULL);
4977 }
4978 GROW;
4979 SKIP_BLANKS;
4980 GROW;
4981 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004982 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004983 /* Recurse on second child */
4984 NEXT;
4985 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004986 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004987 SKIP_BLANKS;
4988 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004989 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004990 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004991 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004992 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004993 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004994 return(NULL);
4995 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004996 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004997 if (RAW == '?') {
4998 last->ocur = XML_ELEMENT_CONTENT_OPT;
4999 NEXT;
5000 } else if (RAW == '*') {
5001 last->ocur = XML_ELEMENT_CONTENT_MULT;
5002 NEXT;
5003 } else if (RAW == '+') {
5004 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5005 NEXT;
5006 } else {
5007 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5008 }
5009 }
5010 SKIP_BLANKS;
5011 GROW;
5012 }
5013 if ((cur != NULL) && (last != NULL)) {
5014 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005015 if (last != NULL)
5016 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005017 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005018 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005019 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5020"Element content declaration doesn't start and stop in the same entity\n",
5021 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005022 }
Owen Taylor3473f882001-02-23 17:55:21 +00005023 NEXT;
5024 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005025 if (ret != NULL) {
5026 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5027 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5028 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5029 else
5030 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5031 }
Owen Taylor3473f882001-02-23 17:55:21 +00005032 NEXT;
5033 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005034 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005035 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005036 cur = ret;
5037 /*
5038 * Some normalization:
5039 * (a | b* | c?)* == (a | b | c)*
5040 */
5041 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5042 if ((cur->c1 != NULL) &&
5043 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5044 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5045 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5046 if ((cur->c2 != NULL) &&
5047 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5048 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5049 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5050 cur = cur->c2;
5051 }
5052 }
Owen Taylor3473f882001-02-23 17:55:21 +00005053 NEXT;
5054 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005055 if (ret != NULL) {
5056 int found = 0;
5057
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005058 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5059 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5060 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005061 else
5062 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005063 /*
5064 * Some normalization:
5065 * (a | b*)+ == (a | b)*
5066 * (a | b?)+ == (a | b)*
5067 */
5068 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5069 if ((cur->c1 != NULL) &&
5070 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5071 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5072 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5073 found = 1;
5074 }
5075 if ((cur->c2 != NULL) &&
5076 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5077 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5078 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5079 found = 1;
5080 }
5081 cur = cur->c2;
5082 }
5083 if (found)
5084 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5085 }
Owen Taylor3473f882001-02-23 17:55:21 +00005086 NEXT;
5087 }
5088 return(ret);
5089}
5090
5091/**
5092 * xmlParseElementContentDecl:
5093 * @ctxt: an XML parser context
5094 * @name: the name of the element being defined.
5095 * @result: the Element Content pointer will be stored here if any
5096 *
5097 * parse the declaration for an Element content either Mixed or Children,
5098 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5099 *
5100 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5101 *
5102 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5103 */
5104
5105int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005106xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005107 xmlElementContentPtr *result) {
5108
5109 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005110 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005111 int res;
5112
5113 *result = NULL;
5114
5115 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005116 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005117 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005118 return(-1);
5119 }
5120 NEXT;
5121 GROW;
5122 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005123 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005124 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005125 res = XML_ELEMENT_TYPE_MIXED;
5126 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005127 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005128 res = XML_ELEMENT_TYPE_ELEMENT;
5129 }
Owen Taylor3473f882001-02-23 17:55:21 +00005130 SKIP_BLANKS;
5131 *result = tree;
5132 return(res);
5133}
5134
5135/**
5136 * xmlParseElementDecl:
5137 * @ctxt: an XML parser context
5138 *
5139 * parse an Element declaration.
5140 *
5141 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5142 *
5143 * [ VC: Unique Element Type Declaration ]
5144 * No element type may be declared more than once
5145 *
5146 * Returns the type of the element, or -1 in case of error
5147 */
5148int
5149xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005150 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005151 int ret = -1;
5152 xmlElementContentPtr content = NULL;
5153
Daniel Veillard4c778d82005-01-23 17:37:44 +00005154 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005155 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005156 xmlParserInputPtr input = ctxt->input;
5157
5158 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005159 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005160 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5161 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005162 }
5163 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005164 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005165 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005166 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5167 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005168 return(-1);
5169 }
5170 while ((RAW == 0) && (ctxt->inputNr > 1))
5171 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005172 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005173 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5174 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005175 }
5176 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005177 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005178 SKIP(5);
5179 /*
5180 * Element must always be empty.
5181 */
5182 ret = XML_ELEMENT_TYPE_EMPTY;
5183 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5184 (NXT(2) == 'Y')) {
5185 SKIP(3);
5186 /*
5187 * Element is a generic container.
5188 */
5189 ret = XML_ELEMENT_TYPE_ANY;
5190 } else if (RAW == '(') {
5191 ret = xmlParseElementContentDecl(ctxt, name, &content);
5192 } else {
5193 /*
5194 * [ WFC: PEs in Internal Subset ] error handling.
5195 */
5196 if ((RAW == '%') && (ctxt->external == 0) &&
5197 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005198 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005199 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005200 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005201 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005202 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5203 }
Owen Taylor3473f882001-02-23 17:55:21 +00005204 return(-1);
5205 }
5206
5207 SKIP_BLANKS;
5208 /*
5209 * Pop-up of finished entities.
5210 */
5211 while ((RAW == 0) && (ctxt->inputNr > 1))
5212 xmlPopInput(ctxt);
5213 SKIP_BLANKS;
5214
5215 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005216 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005217 if (content != NULL) {
5218 xmlFreeDocElementContent(ctxt->myDoc, content);
5219 }
Owen Taylor3473f882001-02-23 17:55:21 +00005220 } else {
5221 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005222 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5223 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005224 }
5225
5226 NEXT;
5227 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005228 (ctxt->sax->elementDecl != NULL)) {
5229 if (content != NULL)
5230 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005231 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5232 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005233 if ((content != NULL) && (content->parent == NULL)) {
5234 /*
5235 * this is a trick: if xmlAddElementDecl is called,
5236 * instead of copying the full tree it is plugged directly
5237 * if called from the parser. Avoid duplicating the
5238 * interfaces or change the API/ABI
5239 */
5240 xmlFreeDocElementContent(ctxt->myDoc, content);
5241 }
5242 } else if (content != NULL) {
5243 xmlFreeDocElementContent(ctxt->myDoc, content);
5244 }
Owen Taylor3473f882001-02-23 17:55:21 +00005245 }
Owen Taylor3473f882001-02-23 17:55:21 +00005246 }
5247 return(ret);
5248}
5249
5250/**
Owen Taylor3473f882001-02-23 17:55:21 +00005251 * xmlParseConditionalSections
5252 * @ctxt: an XML parser context
5253 *
5254 * [61] conditionalSect ::= includeSect | ignoreSect
5255 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5256 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5257 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5258 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5259 */
5260
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005261static void
Owen Taylor3473f882001-02-23 17:55:21 +00005262xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5263 SKIP(3);
5264 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005265 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005266 SKIP(7);
5267 SKIP_BLANKS;
5268 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005269 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 } else {
5271 NEXT;
5272 }
5273 if (xmlParserDebugEntities) {
5274 if ((ctxt->input != NULL) && (ctxt->input->filename))
5275 xmlGenericError(xmlGenericErrorContext,
5276 "%s(%d): ", ctxt->input->filename,
5277 ctxt->input->line);
5278 xmlGenericError(xmlGenericErrorContext,
5279 "Entering INCLUDE Conditional Section\n");
5280 }
5281
5282 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5283 (NXT(2) != '>'))) {
5284 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005285 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005286
5287 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5288 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005289 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005290 NEXT;
5291 } else if (RAW == '%') {
5292 xmlParsePEReference(ctxt);
5293 } else
5294 xmlParseMarkupDecl(ctxt);
5295
5296 /*
5297 * Pop-up of finished entities.
5298 */
5299 while ((RAW == 0) && (ctxt->inputNr > 1))
5300 xmlPopInput(ctxt);
5301
Daniel Veillardfdc91562002-07-01 21:52:03 +00005302 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005303 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005304 break;
5305 }
5306 }
5307 if (xmlParserDebugEntities) {
5308 if ((ctxt->input != NULL) && (ctxt->input->filename))
5309 xmlGenericError(xmlGenericErrorContext,
5310 "%s(%d): ", ctxt->input->filename,
5311 ctxt->input->line);
5312 xmlGenericError(xmlGenericErrorContext,
5313 "Leaving INCLUDE Conditional Section\n");
5314 }
5315
Daniel Veillarda07050d2003-10-19 14:46:32 +00005316 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005317 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005318 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005319 int depth = 0;
5320
5321 SKIP(6);
5322 SKIP_BLANKS;
5323 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005324 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005325 } else {
5326 NEXT;
5327 }
5328 if (xmlParserDebugEntities) {
5329 if ((ctxt->input != NULL) && (ctxt->input->filename))
5330 xmlGenericError(xmlGenericErrorContext,
5331 "%s(%d): ", ctxt->input->filename,
5332 ctxt->input->line);
5333 xmlGenericError(xmlGenericErrorContext,
5334 "Entering IGNORE Conditional Section\n");
5335 }
5336
5337 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005338 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005339 * But disable SAX event generating DTD building in the meantime
5340 */
5341 state = ctxt->disableSAX;
5342 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005343 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005344 ctxt->instate = XML_PARSER_IGNORE;
5345
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005346 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005347 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5348 depth++;
5349 SKIP(3);
5350 continue;
5351 }
5352 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5353 if (--depth >= 0) SKIP(3);
5354 continue;
5355 }
5356 NEXT;
5357 continue;
5358 }
5359
5360 ctxt->disableSAX = state;
5361 ctxt->instate = instate;
5362
5363 if (xmlParserDebugEntities) {
5364 if ((ctxt->input != NULL) && (ctxt->input->filename))
5365 xmlGenericError(xmlGenericErrorContext,
5366 "%s(%d): ", ctxt->input->filename,
5367 ctxt->input->line);
5368 xmlGenericError(xmlGenericErrorContext,
5369 "Leaving IGNORE Conditional Section\n");
5370 }
5371
5372 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005373 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005374 }
5375
5376 if (RAW == 0)
5377 SHRINK;
5378
5379 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005380 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005381 } else {
5382 SKIP(3);
5383 }
5384}
5385
5386/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005387 * xmlParseMarkupDecl:
5388 * @ctxt: an XML parser context
5389 *
5390 * parse Markup declarations
5391 *
5392 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5393 * NotationDecl | PI | Comment
5394 *
5395 * [ VC: Proper Declaration/PE Nesting ]
5396 * Parameter-entity replacement text must be properly nested with
5397 * markup declarations. That is to say, if either the first character
5398 * or the last character of a markup declaration (markupdecl above) is
5399 * contained in the replacement text for a parameter-entity reference,
5400 * both must be contained in the same replacement text.
5401 *
5402 * [ WFC: PEs in Internal Subset ]
5403 * In the internal DTD subset, parameter-entity references can occur
5404 * only where markup declarations can occur, not within markup declarations.
5405 * (This does not apply to references that occur in external parameter
5406 * entities or to the external subset.)
5407 */
5408void
5409xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5410 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005411 if (CUR == '<') {
5412 if (NXT(1) == '!') {
5413 switch (NXT(2)) {
5414 case 'E':
5415 if (NXT(3) == 'L')
5416 xmlParseElementDecl(ctxt);
5417 else if (NXT(3) == 'N')
5418 xmlParseEntityDecl(ctxt);
5419 break;
5420 case 'A':
5421 xmlParseAttributeListDecl(ctxt);
5422 break;
5423 case 'N':
5424 xmlParseNotationDecl(ctxt);
5425 break;
5426 case '-':
5427 xmlParseComment(ctxt);
5428 break;
5429 default:
5430 /* there is an error but it will be detected later */
5431 break;
5432 }
5433 } else if (NXT(1) == '?') {
5434 xmlParsePI(ctxt);
5435 }
5436 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005437 /*
5438 * This is only for internal subset. On external entities,
5439 * the replacement is done before parsing stage
5440 */
5441 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5442 xmlParsePEReference(ctxt);
5443
5444 /*
5445 * Conditional sections are allowed from entities included
5446 * by PE References in the internal subset.
5447 */
5448 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5449 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5450 xmlParseConditionalSections(ctxt);
5451 }
5452 }
5453
5454 ctxt->instate = XML_PARSER_DTD;
5455}
5456
5457/**
5458 * xmlParseTextDecl:
5459 * @ctxt: an XML parser context
5460 *
5461 * parse an XML declaration header for external entities
5462 *
5463 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5464 *
5465 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5466 */
5467
5468void
5469xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5470 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005471 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005472
5473 /*
5474 * We know that '<?xml' is here.
5475 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005476 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005477 SKIP(5);
5478 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005479 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005480 return;
5481 }
5482
William M. Brack76e95df2003-10-18 16:20:14 +00005483 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005484 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5485 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005486 }
5487 SKIP_BLANKS;
5488
5489 /*
5490 * We may have the VersionInfo here.
5491 */
5492 version = xmlParseVersionInfo(ctxt);
5493 if (version == NULL)
5494 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005495 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005496 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5498 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005499 }
5500 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005501 ctxt->input->version = version;
5502
5503 /*
5504 * We must have the encoding declaration
5505 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005506 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005507 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5508 /*
5509 * The XML REC instructs us to stop parsing right here
5510 */
5511 return;
5512 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005513 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5514 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5515 "Missing encoding in text declaration\n");
5516 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005517
5518 SKIP_BLANKS;
5519 if ((RAW == '?') && (NXT(1) == '>')) {
5520 SKIP(2);
5521 } else if (RAW == '>') {
5522 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005523 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005524 NEXT;
5525 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005526 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005527 MOVETO_ENDTAG(CUR_PTR);
5528 NEXT;
5529 }
5530}
5531
5532/**
Owen Taylor3473f882001-02-23 17:55:21 +00005533 * xmlParseExternalSubset:
5534 * @ctxt: an XML parser context
5535 * @ExternalID: the external identifier
5536 * @SystemID: the system identifier (or URL)
5537 *
5538 * parse Markup declarations from an external subset
5539 *
5540 * [30] extSubset ::= textDecl? extSubsetDecl
5541 *
5542 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5543 */
5544void
5545xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5546 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005547 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005548 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005549 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005550 xmlParseTextDecl(ctxt);
5551 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5552 /*
5553 * The XML REC instructs us to stop parsing right here
5554 */
5555 ctxt->instate = XML_PARSER_EOF;
5556 return;
5557 }
5558 }
5559 if (ctxt->myDoc == NULL) {
5560 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5561 }
5562 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5563 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5564
5565 ctxt->instate = XML_PARSER_DTD;
5566 ctxt->external = 1;
5567 while (((RAW == '<') && (NXT(1) == '?')) ||
5568 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005569 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005570 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005571 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005572
5573 GROW;
5574 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5575 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005576 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005577 NEXT;
5578 } else if (RAW == '%') {
5579 xmlParsePEReference(ctxt);
5580 } else
5581 xmlParseMarkupDecl(ctxt);
5582
5583 /*
5584 * Pop-up of finished entities.
5585 */
5586 while ((RAW == 0) && (ctxt->inputNr > 1))
5587 xmlPopInput(ctxt);
5588
Daniel Veillardfdc91562002-07-01 21:52:03 +00005589 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005590 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005591 break;
5592 }
5593 }
5594
5595 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005596 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005597 }
5598
5599}
5600
5601/**
5602 * xmlParseReference:
5603 * @ctxt: an XML parser context
5604 *
5605 * parse and handle entity references in content, depending on the SAX
5606 * interface, this may end-up in a call to character() if this is a
5607 * CharRef, a predefined entity, if there is no reference() callback.
5608 * or if the parser was asked to switch to that mode.
5609 *
5610 * [67] Reference ::= EntityRef | CharRef
5611 */
5612void
5613xmlParseReference(xmlParserCtxtPtr ctxt) {
5614 xmlEntityPtr ent;
5615 xmlChar *val;
5616 if (RAW != '&') return;
5617
5618 if (NXT(1) == '#') {
5619 int i = 0;
5620 xmlChar out[10];
5621 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005622 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005623
5624 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5625 /*
5626 * So we are using non-UTF-8 buffers
5627 * Check that the char fit on 8bits, if not
5628 * generate a CharRef.
5629 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005630 if (value <= 0xFF) {
5631 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005632 out[1] = 0;
5633 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5634 (!ctxt->disableSAX))
5635 ctxt->sax->characters(ctxt->userData, out, 1);
5636 } else {
5637 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005638 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005639 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005640 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005641 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5642 (!ctxt->disableSAX))
5643 ctxt->sax->reference(ctxt->userData, out);
5644 }
5645 } else {
5646 /*
5647 * Just encode the value in UTF-8
5648 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005649 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005650 out[i] = 0;
5651 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5652 (!ctxt->disableSAX))
5653 ctxt->sax->characters(ctxt->userData, out, i);
5654 }
5655 } else {
5656 ent = xmlParseEntityRef(ctxt);
5657 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005658 if (!ctxt->wellFormed)
5659 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005660 if ((ent->name != NULL) &&
5661 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5662 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005663 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005664
5665
5666 /*
5667 * The first reference to the entity trigger a parsing phase
5668 * where the ent->children is filled with the result from
5669 * the parsing.
5670 */
5671 if (ent->children == NULL) {
5672 xmlChar *value;
5673 value = ent->content;
5674
5675 /*
5676 * Check that this entity is well formed
5677 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005678 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005679 (value[1] == 0) && (value[0] == '<') &&
5680 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5681 /*
5682 * DONE: get definite answer on this !!!
5683 * Lots of entity decls are used to declare a single
5684 * char
5685 * <!ENTITY lt "<">
5686 * Which seems to be valid since
5687 * 2.4: The ampersand character (&) and the left angle
5688 * bracket (<) may appear in their literal form only
5689 * when used ... They are also legal within the literal
5690 * entity value of an internal entity declaration;i
5691 * see "4.3.2 Well-Formed Parsed Entities".
5692 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5693 * Looking at the OASIS test suite and James Clark
5694 * tests, this is broken. However the XML REC uses
5695 * it. Is the XML REC not well-formed ????
5696 * This is a hack to avoid this problem
5697 *
5698 * ANSWER: since lt gt amp .. are already defined,
5699 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005700 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005701 * is lousy but acceptable.
5702 */
5703 list = xmlNewDocText(ctxt->myDoc, value);
5704 if (list != NULL) {
5705 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5706 (ent->children == NULL)) {
5707 ent->children = list;
5708 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005709 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005710 list->parent = (xmlNodePtr) ent;
5711 } else {
5712 xmlFreeNodeList(list);
5713 }
5714 } else if (list != NULL) {
5715 xmlFreeNodeList(list);
5716 }
5717 } else {
5718 /*
5719 * 4.3.2: An internal general parsed entity is well-formed
5720 * if its replacement text matches the production labeled
5721 * content.
5722 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005723
5724 void *user_data;
5725 /*
5726 * This is a bit hackish but this seems the best
5727 * way to make sure both SAX and DOM entity support
5728 * behaves okay.
5729 */
5730 if (ctxt->userData == ctxt)
5731 user_data = NULL;
5732 else
5733 user_data = ctxt->userData;
5734
Owen Taylor3473f882001-02-23 17:55:21 +00005735 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5736 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005737 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5738 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005739 ctxt->depth--;
5740 } else if (ent->etype ==
5741 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5742 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005743 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005744 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005745 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005746 ctxt->depth--;
5747 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005748 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005749 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5750 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005751 }
5752 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005753 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005754 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005755 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005756 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5757 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005758 (ent->children == NULL)) {
5759 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005760 if (ctxt->replaceEntities) {
5761 /*
5762 * Prune it directly in the generated document
5763 * except for single text nodes.
5764 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005765 if (((list->type == XML_TEXT_NODE) &&
5766 (list->next == NULL)) ||
5767 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005768 list->parent = (xmlNodePtr) ent;
5769 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005770 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005771 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005772 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005773 while (list != NULL) {
5774 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005775 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005776 if (list->next == NULL)
5777 ent->last = list;
5778 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005779 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005780 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005781#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005782 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5783 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005784#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005785 }
5786 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005787 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005788 while (list != NULL) {
5789 list->parent = (xmlNodePtr) ent;
5790 if (list->next == NULL)
5791 ent->last = list;
5792 list = list->next;
5793 }
Owen Taylor3473f882001-02-23 17:55:21 +00005794 }
5795 } else {
5796 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005797 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005798 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005799 } else if ((ret != XML_ERR_OK) &&
5800 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005801 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005802 } else if (list != NULL) {
5803 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005804 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005805 }
5806 }
5807 }
5808 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5809 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5810 /*
5811 * Create a node.
5812 */
5813 ctxt->sax->reference(ctxt->userData, ent->name);
5814 return;
5815 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005816 /*
5817 * There is a problem on the handling of _private for entities
5818 * (bug 155816): Should we copy the content of the field from
5819 * the entity (possibly overwriting some value set by the user
5820 * when a copy is created), should we leave it alone, or should
5821 * we try to take care of different situations? The problem
5822 * is exacerbated by the usage of this field by the xmlReader.
5823 * To fix this bug, we look at _private on the created node
5824 * and, if it's NULL, we copy in whatever was in the entity.
5825 * If it's not NULL we leave it alone. This is somewhat of a
5826 * hack - maybe we should have further tests to determine
5827 * what to do.
5828 */
Owen Taylor3473f882001-02-23 17:55:21 +00005829 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5830 /*
5831 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005832 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005833 * In the first occurrence list contains the replacement.
5834 * progressive == 2 means we are operating on the Reader
5835 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005836 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005837 if (((list == NULL) && (ent->owner == 0)) ||
5838 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005839 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005840
5841 /*
5842 * when operating on a reader, the entities definitions
5843 * are always owning the entities subtree.
5844 if (ctxt->parseMode == XML_PARSE_READER)
5845 ent->owner = 1;
5846 */
5847
Daniel Veillard62f313b2001-07-04 19:49:14 +00005848 cur = ent->children;
5849 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005850 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005851 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005852 if (nw->_private == NULL)
5853 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005854 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005855 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005856 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005857 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005858 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005859 if (cur == ent->last) {
5860 /*
5861 * needed to detect some strange empty
5862 * node cases in the reader tests
5863 */
5864 if ((ctxt->parseMode == XML_PARSE_READER) &&
5865 (nw->type == XML_ELEMENT_NODE) &&
5866 (nw->children == NULL))
5867 nw->extra = 1;
5868
Daniel Veillard62f313b2001-07-04 19:49:14 +00005869 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005870 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005871 cur = cur->next;
5872 }
Daniel Veillard81273902003-09-30 00:43:48 +00005873#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005874 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005875 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005876#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005877 } else if (list == NULL) {
5878 xmlNodePtr nw = NULL, cur, next, last,
5879 firstChild = NULL;
5880 /*
5881 * Copy the entity child list and make it the new
5882 * entity child list. The goal is to make sure any
5883 * ID or REF referenced will be the one from the
5884 * document content and not the entity copy.
5885 */
5886 cur = ent->children;
5887 ent->children = NULL;
5888 last = ent->last;
5889 ent->last = NULL;
5890 while (cur != NULL) {
5891 next = cur->next;
5892 cur->next = NULL;
5893 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005894 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005895 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005896 if (nw->_private == NULL)
5897 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005898 if (firstChild == NULL){
5899 firstChild = cur;
5900 }
5901 xmlAddChild((xmlNodePtr) ent, nw);
5902 xmlAddChild(ctxt->node, cur);
5903 }
5904 if (cur == last)
5905 break;
5906 cur = next;
5907 }
5908 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005909#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005910 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5911 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005912#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005913 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005914 const xmlChar *nbktext;
5915
Daniel Veillard62f313b2001-07-04 19:49:14 +00005916 /*
5917 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005918 * node with a possible previous text one which
5919 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005920 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005921 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5922 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005923 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005924 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005925 if ((ent->last != ent->children) &&
5926 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005927 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005928 xmlAddChildList(ctxt->node, ent->children);
5929 }
5930
Owen Taylor3473f882001-02-23 17:55:21 +00005931 /*
5932 * This is to avoid a nasty side effect, see
5933 * characters() in SAX.c
5934 */
5935 ctxt->nodemem = 0;
5936 ctxt->nodelen = 0;
5937 return;
5938 } else {
5939 /*
5940 * Probably running in SAX mode
5941 */
5942 xmlParserInputPtr input;
5943
5944 input = xmlNewEntityInputStream(ctxt, ent);
5945 xmlPushInput(ctxt, input);
5946 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005947 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5948 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005949 xmlParseTextDecl(ctxt);
5950 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5951 /*
5952 * The XML REC instructs us to stop parsing right here
5953 */
5954 ctxt->instate = XML_PARSER_EOF;
5955 return;
5956 }
5957 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005958 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5959 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005960 }
5961 }
5962 return;
5963 }
5964 }
5965 } else {
5966 val = ent->content;
5967 if (val == NULL) return;
5968 /*
5969 * inline the entity.
5970 */
5971 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5972 (!ctxt->disableSAX))
5973 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5974 }
5975 }
5976}
5977
5978/**
5979 * xmlParseEntityRef:
5980 * @ctxt: an XML parser context
5981 *
5982 * parse ENTITY references declarations
5983 *
5984 * [68] EntityRef ::= '&' Name ';'
5985 *
5986 * [ WFC: Entity Declared ]
5987 * In a document without any DTD, a document with only an internal DTD
5988 * subset which contains no parameter entity references, or a document
5989 * with "standalone='yes'", the Name given in the entity reference
5990 * must match that in an entity declaration, except that well-formed
5991 * documents need not declare any of the following entities: amp, lt,
5992 * gt, apos, quot. The declaration of a parameter entity must precede
5993 * any reference to it. Similarly, the declaration of a general entity
5994 * must precede any reference to it which appears in a default value in an
5995 * attribute-list declaration. Note that if entities are declared in the
5996 * external subset or in external parameter entities, a non-validating
5997 * processor is not obligated to read and process their declarations;
5998 * for such documents, the rule that an entity must be declared is a
5999 * well-formedness constraint only if standalone='yes'.
6000 *
6001 * [ WFC: Parsed Entity ]
6002 * An entity reference must not contain the name of an unparsed entity
6003 *
6004 * Returns the xmlEntityPtr if found, or NULL otherwise.
6005 */
6006xmlEntityPtr
6007xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006008 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006009 xmlEntityPtr ent = NULL;
6010
6011 GROW;
6012
6013 if (RAW == '&') {
6014 NEXT;
6015 name = xmlParseName(ctxt);
6016 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006017 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6018 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006019 } else {
6020 if (RAW == ';') {
6021 NEXT;
6022 /*
6023 * Ask first SAX for entity resolution, otherwise try the
6024 * predefined set.
6025 */
6026 if (ctxt->sax != NULL) {
6027 if (ctxt->sax->getEntity != NULL)
6028 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006029 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006030 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006031 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6032 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006033 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006034 }
Owen Taylor3473f882001-02-23 17:55:21 +00006035 }
6036 /*
6037 * [ WFC: Entity Declared ]
6038 * In a document without any DTD, a document with only an
6039 * internal DTD subset which contains no parameter entity
6040 * references, or a document with "standalone='yes'", the
6041 * Name given in the entity reference must match that in an
6042 * entity declaration, except that well-formed documents
6043 * need not declare any of the following entities: amp, lt,
6044 * gt, apos, quot.
6045 * The declaration of a parameter entity must precede any
6046 * reference to it.
6047 * Similarly, the declaration of a general entity must
6048 * precede any reference to it which appears in a default
6049 * value in an attribute-list declaration. Note that if
6050 * entities are declared in the external subset or in
6051 * external parameter entities, a non-validating processor
6052 * is not obligated to read and process their declarations;
6053 * for such documents, the rule that an entity must be
6054 * declared is a well-formedness constraint only if
6055 * standalone='yes'.
6056 */
6057 if (ent == NULL) {
6058 if ((ctxt->standalone == 1) ||
6059 ((ctxt->hasExternalSubset == 0) &&
6060 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006062 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006063 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006064 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006065 "Entity '%s' not defined\n", name);
6066 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006067 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006068 }
6069
6070 /*
6071 * [ WFC: Parsed Entity ]
6072 * An entity reference must not contain the name of an
6073 * unparsed entity
6074 */
6075 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006076 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006077 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006078 }
6079
6080 /*
6081 * [ WFC: No External Entity References ]
6082 * Attribute values cannot contain direct or indirect
6083 * entity references to external entities.
6084 */
6085 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6086 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006087 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6088 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006089 }
6090 /*
6091 * [ WFC: No < in Attribute Values ]
6092 * The replacement text of any entity referred to directly or
6093 * indirectly in an attribute value (other than "&lt;") must
6094 * not contain a <.
6095 */
6096 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6097 (ent != NULL) &&
6098 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6099 (ent->content != NULL) &&
6100 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006101 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006102 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006103 }
6104
6105 /*
6106 * Internal check, no parameter entities here ...
6107 */
6108 else {
6109 switch (ent->etype) {
6110 case XML_INTERNAL_PARAMETER_ENTITY:
6111 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006112 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6113 "Attempt to reference the parameter entity '%s'\n",
6114 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006115 break;
6116 default:
6117 break;
6118 }
6119 }
6120
6121 /*
6122 * [ WFC: No Recursion ]
6123 * A parsed entity must not contain a recursive reference
6124 * to itself, either directly or indirectly.
6125 * Done somewhere else
6126 */
6127
6128 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006129 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006130 }
Owen Taylor3473f882001-02-23 17:55:21 +00006131 }
6132 }
6133 return(ent);
6134}
6135
6136/**
6137 * xmlParseStringEntityRef:
6138 * @ctxt: an XML parser context
6139 * @str: a pointer to an index in the string
6140 *
6141 * parse ENTITY references declarations, but this version parses it from
6142 * a string value.
6143 *
6144 * [68] EntityRef ::= '&' Name ';'
6145 *
6146 * [ WFC: Entity Declared ]
6147 * In a document without any DTD, a document with only an internal DTD
6148 * subset which contains no parameter entity references, or a document
6149 * with "standalone='yes'", the Name given in the entity reference
6150 * must match that in an entity declaration, except that well-formed
6151 * documents need not declare any of the following entities: amp, lt,
6152 * gt, apos, quot. The declaration of a parameter entity must precede
6153 * any reference to it. Similarly, the declaration of a general entity
6154 * must precede any reference to it which appears in a default value in an
6155 * attribute-list declaration. Note that if entities are declared in the
6156 * external subset or in external parameter entities, a non-validating
6157 * processor is not obligated to read and process their declarations;
6158 * for such documents, the rule that an entity must be declared is a
6159 * well-formedness constraint only if standalone='yes'.
6160 *
6161 * [ WFC: Parsed Entity ]
6162 * An entity reference must not contain the name of an unparsed entity
6163 *
6164 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6165 * is updated to the current location in the string.
6166 */
6167xmlEntityPtr
6168xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6169 xmlChar *name;
6170 const xmlChar *ptr;
6171 xmlChar cur;
6172 xmlEntityPtr ent = NULL;
6173
6174 if ((str == NULL) || (*str == NULL))
6175 return(NULL);
6176 ptr = *str;
6177 cur = *ptr;
6178 if (cur == '&') {
6179 ptr++;
6180 cur = *ptr;
6181 name = xmlParseStringName(ctxt, &ptr);
6182 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006183 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6184 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006185 } else {
6186 if (*ptr == ';') {
6187 ptr++;
6188 /*
6189 * Ask first SAX for entity resolution, otherwise try the
6190 * predefined set.
6191 */
6192 if (ctxt->sax != NULL) {
6193 if (ctxt->sax->getEntity != NULL)
6194 ent = ctxt->sax->getEntity(ctxt->userData, name);
6195 if (ent == NULL)
6196 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006197 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006198 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006199 }
Owen Taylor3473f882001-02-23 17:55:21 +00006200 }
6201 /*
6202 * [ WFC: Entity Declared ]
6203 * In a document without any DTD, a document with only an
6204 * internal DTD subset which contains no parameter entity
6205 * references, or a document with "standalone='yes'", the
6206 * Name given in the entity reference must match that in an
6207 * entity declaration, except that well-formed documents
6208 * need not declare any of the following entities: amp, lt,
6209 * gt, apos, quot.
6210 * The declaration of a parameter entity must precede any
6211 * reference to it.
6212 * Similarly, the declaration of a general entity must
6213 * precede any reference to it which appears in a default
6214 * value in an attribute-list declaration. Note that if
6215 * entities are declared in the external subset or in
6216 * external parameter entities, a non-validating processor
6217 * is not obligated to read and process their declarations;
6218 * for such documents, the rule that an entity must be
6219 * declared is a well-formedness constraint only if
6220 * standalone='yes'.
6221 */
6222 if (ent == NULL) {
6223 if ((ctxt->standalone == 1) ||
6224 ((ctxt->hasExternalSubset == 0) &&
6225 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006226 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006227 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006228 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006229 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006230 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006231 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006232 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006233 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006234 }
6235
6236 /*
6237 * [ WFC: Parsed Entity ]
6238 * An entity reference must not contain the name of an
6239 * unparsed entity
6240 */
6241 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006242 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006243 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006244 }
6245
6246 /*
6247 * [ WFC: No External Entity References ]
6248 * Attribute values cannot contain direct or indirect
6249 * entity references to external entities.
6250 */
6251 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6252 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006253 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006254 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006255 }
6256 /*
6257 * [ WFC: No < in Attribute Values ]
6258 * The replacement text of any entity referred to directly or
6259 * indirectly in an attribute value (other than "&lt;") must
6260 * not contain a <.
6261 */
6262 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6263 (ent != NULL) &&
6264 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6265 (ent->content != NULL) &&
6266 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006267 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6268 "'<' in entity '%s' is not allowed in attributes values\n",
6269 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006270 }
6271
6272 /*
6273 * Internal check, no parameter entities here ...
6274 */
6275 else {
6276 switch (ent->etype) {
6277 case XML_INTERNAL_PARAMETER_ENTITY:
6278 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006279 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6280 "Attempt to reference the parameter entity '%s'\n",
6281 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006282 break;
6283 default:
6284 break;
6285 }
6286 }
6287
6288 /*
6289 * [ WFC: No Recursion ]
6290 * A parsed entity must not contain a recursive reference
6291 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006292 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006293 */
6294
6295 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006296 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006297 }
6298 xmlFree(name);
6299 }
6300 }
6301 *str = ptr;
6302 return(ent);
6303}
6304
6305/**
6306 * xmlParsePEReference:
6307 * @ctxt: an XML parser context
6308 *
6309 * parse PEReference declarations
6310 * The entity content is handled directly by pushing it's content as
6311 * a new input stream.
6312 *
6313 * [69] PEReference ::= '%' Name ';'
6314 *
6315 * [ WFC: No Recursion ]
6316 * A parsed entity must not contain a recursive
6317 * reference to itself, either directly or indirectly.
6318 *
6319 * [ WFC: Entity Declared ]
6320 * In a document without any DTD, a document with only an internal DTD
6321 * subset which contains no parameter entity references, or a document
6322 * with "standalone='yes'", ... ... The declaration of a parameter
6323 * entity must precede any reference to it...
6324 *
6325 * [ VC: Entity Declared ]
6326 * In a document with an external subset or external parameter entities
6327 * with "standalone='no'", ... ... The declaration of a parameter entity
6328 * must precede any reference to it...
6329 *
6330 * [ WFC: In DTD ]
6331 * Parameter-entity references may only appear in the DTD.
6332 * NOTE: misleading but this is handled.
6333 */
6334void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006335xmlParsePEReference(xmlParserCtxtPtr ctxt)
6336{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006337 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006338 xmlEntityPtr entity = NULL;
6339 xmlParserInputPtr input;
6340
6341 if (RAW == '%') {
6342 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006343 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006344 if (name == NULL) {
6345 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6346 "xmlParsePEReference: no name\n");
6347 } else {
6348 if (RAW == ';') {
6349 NEXT;
6350 if ((ctxt->sax != NULL) &&
6351 (ctxt->sax->getParameterEntity != NULL))
6352 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6353 name);
6354 if (entity == NULL) {
6355 /*
6356 * [ WFC: Entity Declared ]
6357 * In a document without any DTD, a document with only an
6358 * internal DTD subset which contains no parameter entity
6359 * references, or a document with "standalone='yes'", ...
6360 * ... The declaration of a parameter entity must precede
6361 * any reference to it...
6362 */
6363 if ((ctxt->standalone == 1) ||
6364 ((ctxt->hasExternalSubset == 0) &&
6365 (ctxt->hasPErefs == 0))) {
6366 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6367 "PEReference: %%%s; not found\n",
6368 name);
6369 } else {
6370 /*
6371 * [ VC: Entity Declared ]
6372 * In a document with an external subset or external
6373 * parameter entities with "standalone='no'", ...
6374 * ... The declaration of a parameter entity must
6375 * precede any reference to it...
6376 */
6377 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6378 "PEReference: %%%s; not found\n",
6379 name, NULL);
6380 ctxt->valid = 0;
6381 }
6382 } else {
6383 /*
6384 * Internal checking in case the entity quest barfed
6385 */
6386 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6387 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6388 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6389 "Internal: %%%s; is not a parameter entity\n",
6390 name, NULL);
6391 } else if (ctxt->input->free != deallocblankswrapper) {
6392 input =
6393 xmlNewBlanksWrapperInputStream(ctxt, entity);
6394 xmlPushInput(ctxt, input);
6395 } else {
6396 /*
6397 * TODO !!!
6398 * handle the extra spaces added before and after
6399 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6400 */
6401 input = xmlNewEntityInputStream(ctxt, entity);
6402 xmlPushInput(ctxt, input);
6403 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006404 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006405 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006406 xmlParseTextDecl(ctxt);
6407 if (ctxt->errNo ==
6408 XML_ERR_UNSUPPORTED_ENCODING) {
6409 /*
6410 * The XML REC instructs us to stop parsing
6411 * right here
6412 */
6413 ctxt->instate = XML_PARSER_EOF;
6414 return;
6415 }
6416 }
6417 }
6418 }
6419 ctxt->hasPErefs = 1;
6420 } else {
6421 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6422 }
6423 }
Owen Taylor3473f882001-02-23 17:55:21 +00006424 }
6425}
6426
6427/**
6428 * xmlParseStringPEReference:
6429 * @ctxt: an XML parser context
6430 * @str: a pointer to an index in the string
6431 *
6432 * parse PEReference declarations
6433 *
6434 * [69] PEReference ::= '%' Name ';'
6435 *
6436 * [ WFC: No Recursion ]
6437 * A parsed entity must not contain a recursive
6438 * reference to itself, either directly or indirectly.
6439 *
6440 * [ WFC: Entity Declared ]
6441 * In a document without any DTD, a document with only an internal DTD
6442 * subset which contains no parameter entity references, or a document
6443 * with "standalone='yes'", ... ... The declaration of a parameter
6444 * entity must precede any reference to it...
6445 *
6446 * [ VC: Entity Declared ]
6447 * In a document with an external subset or external parameter entities
6448 * with "standalone='no'", ... ... The declaration of a parameter entity
6449 * must precede any reference to it...
6450 *
6451 * [ WFC: In DTD ]
6452 * Parameter-entity references may only appear in the DTD.
6453 * NOTE: misleading but this is handled.
6454 *
6455 * Returns the string of the entity content.
6456 * str is updated to the current value of the index
6457 */
6458xmlEntityPtr
6459xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6460 const xmlChar *ptr;
6461 xmlChar cur;
6462 xmlChar *name;
6463 xmlEntityPtr entity = NULL;
6464
6465 if ((str == NULL) || (*str == NULL)) return(NULL);
6466 ptr = *str;
6467 cur = *ptr;
6468 if (cur == '%') {
6469 ptr++;
6470 cur = *ptr;
6471 name = xmlParseStringName(ctxt, &ptr);
6472 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006473 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6474 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006475 } else {
6476 cur = *ptr;
6477 if (cur == ';') {
6478 ptr++;
6479 cur = *ptr;
6480 if ((ctxt->sax != NULL) &&
6481 (ctxt->sax->getParameterEntity != NULL))
6482 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6483 name);
6484 if (entity == NULL) {
6485 /*
6486 * [ WFC: Entity Declared ]
6487 * In a document without any DTD, a document with only an
6488 * internal DTD subset which contains no parameter entity
6489 * references, or a document with "standalone='yes'", ...
6490 * ... The declaration of a parameter entity must precede
6491 * any reference to it...
6492 */
6493 if ((ctxt->standalone == 1) ||
6494 ((ctxt->hasExternalSubset == 0) &&
6495 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006496 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006497 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006498 } else {
6499 /*
6500 * [ VC: Entity Declared ]
6501 * In a document with an external subset or external
6502 * parameter entities with "standalone='no'", ...
6503 * ... The declaration of a parameter entity must
6504 * precede any reference to it...
6505 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006506 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6507 "PEReference: %%%s; not found\n",
6508 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006509 ctxt->valid = 0;
6510 }
6511 } else {
6512 /*
6513 * Internal checking in case the entity quest barfed
6514 */
6515 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6516 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006517 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6518 "%%%s; is not a parameter entity\n",
6519 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006520 }
6521 }
6522 ctxt->hasPErefs = 1;
6523 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006524 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006525 }
6526 xmlFree(name);
6527 }
6528 }
6529 *str = ptr;
6530 return(entity);
6531}
6532
6533/**
6534 * xmlParseDocTypeDecl:
6535 * @ctxt: an XML parser context
6536 *
6537 * parse a DOCTYPE declaration
6538 *
6539 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6540 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6541 *
6542 * [ VC: Root Element Type ]
6543 * The Name in the document type declaration must match the element
6544 * type of the root element.
6545 */
6546
6547void
6548xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006549 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006550 xmlChar *ExternalID = NULL;
6551 xmlChar *URI = NULL;
6552
6553 /*
6554 * We know that '<!DOCTYPE' has been detected.
6555 */
6556 SKIP(9);
6557
6558 SKIP_BLANKS;
6559
6560 /*
6561 * Parse the DOCTYPE name.
6562 */
6563 name = xmlParseName(ctxt);
6564 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006565 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6566 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006567 }
6568 ctxt->intSubName = name;
6569
6570 SKIP_BLANKS;
6571
6572 /*
6573 * Check for SystemID and ExternalID
6574 */
6575 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6576
6577 if ((URI != NULL) || (ExternalID != NULL)) {
6578 ctxt->hasExternalSubset = 1;
6579 }
6580 ctxt->extSubURI = URI;
6581 ctxt->extSubSystem = ExternalID;
6582
6583 SKIP_BLANKS;
6584
6585 /*
6586 * Create and update the internal subset.
6587 */
6588 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6589 (!ctxt->disableSAX))
6590 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6591
6592 /*
6593 * Is there any internal subset declarations ?
6594 * they are handled separately in xmlParseInternalSubset()
6595 */
6596 if (RAW == '[')
6597 return;
6598
6599 /*
6600 * We should be at the end of the DOCTYPE declaration.
6601 */
6602 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006603 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006604 }
6605 NEXT;
6606}
6607
6608/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006609 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006610 * @ctxt: an XML parser context
6611 *
6612 * parse the internal subset declaration
6613 *
6614 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6615 */
6616
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006617static void
Owen Taylor3473f882001-02-23 17:55:21 +00006618xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6619 /*
6620 * Is there any DTD definition ?
6621 */
6622 if (RAW == '[') {
6623 ctxt->instate = XML_PARSER_DTD;
6624 NEXT;
6625 /*
6626 * Parse the succession of Markup declarations and
6627 * PEReferences.
6628 * Subsequence (markupdecl | PEReference | S)*
6629 */
6630 while (RAW != ']') {
6631 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006632 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006633
6634 SKIP_BLANKS;
6635 xmlParseMarkupDecl(ctxt);
6636 xmlParsePEReference(ctxt);
6637
6638 /*
6639 * Pop-up of finished entities.
6640 */
6641 while ((RAW == 0) && (ctxt->inputNr > 1))
6642 xmlPopInput(ctxt);
6643
6644 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006645 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006646 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006647 break;
6648 }
6649 }
6650 if (RAW == ']') {
6651 NEXT;
6652 SKIP_BLANKS;
6653 }
6654 }
6655
6656 /*
6657 * We should be at the end of the DOCTYPE declaration.
6658 */
6659 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006660 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006661 }
6662 NEXT;
6663}
6664
Daniel Veillard81273902003-09-30 00:43:48 +00006665#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006666/**
6667 * xmlParseAttribute:
6668 * @ctxt: an XML parser context
6669 * @value: a xmlChar ** used to store the value of the attribute
6670 *
6671 * parse an attribute
6672 *
6673 * [41] Attribute ::= Name Eq AttValue
6674 *
6675 * [ WFC: No External Entity References ]
6676 * Attribute values cannot contain direct or indirect entity references
6677 * to external entities.
6678 *
6679 * [ WFC: No < in Attribute Values ]
6680 * The replacement text of any entity referred to directly or indirectly in
6681 * an attribute value (other than "&lt;") must not contain a <.
6682 *
6683 * [ VC: Attribute Value Type ]
6684 * The attribute must have been declared; the value must be of the type
6685 * declared for it.
6686 *
6687 * [25] Eq ::= S? '=' S?
6688 *
6689 * With namespace:
6690 *
6691 * [NS 11] Attribute ::= QName Eq AttValue
6692 *
6693 * Also the case QName == xmlns:??? is handled independently as a namespace
6694 * definition.
6695 *
6696 * Returns the attribute name, and the value in *value.
6697 */
6698
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006699const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006700xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006701 const xmlChar *name;
6702 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006703
6704 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006705 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006706 name = xmlParseName(ctxt);
6707 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006708 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006709 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006710 return(NULL);
6711 }
6712
6713 /*
6714 * read the value
6715 */
6716 SKIP_BLANKS;
6717 if (RAW == '=') {
6718 NEXT;
6719 SKIP_BLANKS;
6720 val = xmlParseAttValue(ctxt);
6721 ctxt->instate = XML_PARSER_CONTENT;
6722 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006723 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006724 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006725 return(NULL);
6726 }
6727
6728 /*
6729 * Check that xml:lang conforms to the specification
6730 * No more registered as an error, just generate a warning now
6731 * since this was deprecated in XML second edition
6732 */
6733 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6734 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006735 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6736 "Malformed value for xml:lang : %s\n",
6737 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006738 }
6739 }
6740
6741 /*
6742 * Check that xml:space conforms to the specification
6743 */
6744 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6745 if (xmlStrEqual(val, BAD_CAST "default"))
6746 *(ctxt->space) = 0;
6747 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6748 *(ctxt->space) = 1;
6749 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00006750 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006751"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00006752 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006753 }
6754 }
6755
6756 *value = val;
6757 return(name);
6758}
6759
6760/**
6761 * xmlParseStartTag:
6762 * @ctxt: an XML parser context
6763 *
6764 * parse a start of tag either for rule element or
6765 * EmptyElement. In both case we don't parse the tag closing chars.
6766 *
6767 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6768 *
6769 * [ WFC: Unique Att Spec ]
6770 * No attribute name may appear more than once in the same start-tag or
6771 * empty-element tag.
6772 *
6773 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6774 *
6775 * [ WFC: Unique Att Spec ]
6776 * No attribute name may appear more than once in the same start-tag or
6777 * empty-element tag.
6778 *
6779 * With namespace:
6780 *
6781 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6782 *
6783 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6784 *
6785 * Returns the element name parsed
6786 */
6787
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006788const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006789xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006790 const xmlChar *name;
6791 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006792 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006793 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006794 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006795 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006796 int i;
6797
6798 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006799 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006800
6801 name = xmlParseName(ctxt);
6802 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006803 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006804 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006805 return(NULL);
6806 }
6807
6808 /*
6809 * Now parse the attributes, it ends up with the ending
6810 *
6811 * (S Attribute)* S?
6812 */
6813 SKIP_BLANKS;
6814 GROW;
6815
Daniel Veillard21a0f912001-02-25 19:54:14 +00006816 while ((RAW != '>') &&
6817 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006818 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006819 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006820 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006821
6822 attname = xmlParseAttribute(ctxt, &attvalue);
6823 if ((attname != NULL) && (attvalue != NULL)) {
6824 /*
6825 * [ WFC: Unique Att Spec ]
6826 * No attribute name may appear more than once in the same
6827 * start-tag or empty-element tag.
6828 */
6829 for (i = 0; i < nbatts;i += 2) {
6830 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006831 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006832 xmlFree(attvalue);
6833 goto failed;
6834 }
6835 }
Owen Taylor3473f882001-02-23 17:55:21 +00006836 /*
6837 * Add the pair to atts
6838 */
6839 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006840 maxatts = 22; /* allow for 10 attrs by default */
6841 atts = (const xmlChar **)
6842 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006843 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006844 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006845 if (attvalue != NULL)
6846 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006847 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006848 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006849 ctxt->atts = atts;
6850 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006851 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006852 const xmlChar **n;
6853
Owen Taylor3473f882001-02-23 17:55:21 +00006854 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006855 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006856 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006857 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006858 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006859 if (attvalue != NULL)
6860 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006861 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006862 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006863 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006864 ctxt->atts = atts;
6865 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006866 }
6867 atts[nbatts++] = attname;
6868 atts[nbatts++] = attvalue;
6869 atts[nbatts] = NULL;
6870 atts[nbatts + 1] = NULL;
6871 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006872 if (attvalue != NULL)
6873 xmlFree(attvalue);
6874 }
6875
6876failed:
6877
Daniel Veillard3772de32002-12-17 10:31:45 +00006878 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006879 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6880 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006881 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006882 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6883 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006884 }
6885 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006886 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6887 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006888 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6889 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006890 break;
6891 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006892 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006893 GROW;
6894 }
6895
6896 /*
6897 * SAX: Start of Element !
6898 */
6899 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006900 (!ctxt->disableSAX)) {
6901 if (nbatts > 0)
6902 ctxt->sax->startElement(ctxt->userData, name, atts);
6903 else
6904 ctxt->sax->startElement(ctxt->userData, name, NULL);
6905 }
Owen Taylor3473f882001-02-23 17:55:21 +00006906
6907 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006908 /* Free only the content strings */
6909 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006910 if (atts[i] != NULL)
6911 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006912 }
6913 return(name);
6914}
6915
6916/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006917 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006918 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006919 * @line: line of the start tag
6920 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006921 *
6922 * parse an end of tag
6923 *
6924 * [42] ETag ::= '</' Name S? '>'
6925 *
6926 * With namespace
6927 *
6928 * [NS 9] ETag ::= '</' QName S? '>'
6929 */
6930
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006931static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006932xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006933 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006934
6935 GROW;
6936 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006937 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006938 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006939 return;
6940 }
6941 SKIP(2);
6942
Daniel Veillard46de64e2002-05-29 08:21:33 +00006943 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006944
6945 /*
6946 * We should definitely be at the ending "S? '>'" part
6947 */
6948 GROW;
6949 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006950 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006951 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006952 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006953 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006954
6955 /*
6956 * [ WFC: Element Type Match ]
6957 * The Name in an element's end-tag must match the element type in the
6958 * start-tag.
6959 *
6960 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006961 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006962 if (name == NULL) name = BAD_CAST "unparseable";
6963 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006964 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006965 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006966 }
6967
6968 /*
6969 * SAX: End of Tag
6970 */
6971 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6972 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006973 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006974
Daniel Veillarde57ec792003-09-10 10:50:59 +00006975 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006976 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006977 return;
6978}
6979
6980/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006981 * xmlParseEndTag:
6982 * @ctxt: an XML parser context
6983 *
6984 * parse an end of tag
6985 *
6986 * [42] ETag ::= '</' Name S? '>'
6987 *
6988 * With namespace
6989 *
6990 * [NS 9] ETag ::= '</' QName S? '>'
6991 */
6992
6993void
6994xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006995 xmlParseEndTag1(ctxt, 0);
6996}
Daniel Veillard81273902003-09-30 00:43:48 +00006997#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006998
6999/************************************************************************
7000 * *
7001 * SAX 2 specific operations *
7002 * *
7003 ************************************************************************/
7004
7005static const xmlChar *
7006xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7007 int len = 0, l;
7008 int c;
7009 int count = 0;
7010
7011 /*
7012 * Handler for more complex cases
7013 */
7014 GROW;
7015 c = CUR_CHAR(l);
7016 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007017 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007018 return(NULL);
7019 }
7020
7021 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007022 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007023 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007024 (IS_COMBINING(c)) ||
7025 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007026 if (count++ > 100) {
7027 count = 0;
7028 GROW;
7029 }
7030 len += l;
7031 NEXTL(l);
7032 c = CUR_CHAR(l);
7033 }
7034 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7035}
7036
7037/*
7038 * xmlGetNamespace:
7039 * @ctxt: an XML parser context
7040 * @prefix: the prefix to lookup
7041 *
7042 * Lookup the namespace name for the @prefix (which ca be NULL)
7043 * The prefix must come from the @ctxt->dict dictionnary
7044 *
7045 * Returns the namespace name or NULL if not bound
7046 */
7047static const xmlChar *
7048xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7049 int i;
7050
Daniel Veillarde57ec792003-09-10 10:50:59 +00007051 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007052 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007053 if (ctxt->nsTab[i] == prefix) {
7054 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7055 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007056 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007057 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007058 return(NULL);
7059}
7060
7061/**
7062 * xmlParseNCName:
7063 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007064 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007065 *
7066 * parse an XML name.
7067 *
7068 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7069 * CombiningChar | Extender
7070 *
7071 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7072 *
7073 * Returns the Name parsed or NULL
7074 */
7075
7076static const xmlChar *
7077xmlParseNCName(xmlParserCtxtPtr ctxt) {
7078 const xmlChar *in;
7079 const xmlChar *ret;
7080 int count = 0;
7081
7082 /*
7083 * Accelerator for simple ASCII names
7084 */
7085 in = ctxt->input->cur;
7086 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7087 ((*in >= 0x41) && (*in <= 0x5A)) ||
7088 (*in == '_')) {
7089 in++;
7090 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7091 ((*in >= 0x41) && (*in <= 0x5A)) ||
7092 ((*in >= 0x30) && (*in <= 0x39)) ||
7093 (*in == '_') || (*in == '-') ||
7094 (*in == '.'))
7095 in++;
7096 if ((*in > 0) && (*in < 0x80)) {
7097 count = in - ctxt->input->cur;
7098 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7099 ctxt->input->cur = in;
7100 ctxt->nbChars += count;
7101 ctxt->input->col += count;
7102 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007103 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007104 }
7105 return(ret);
7106 }
7107 }
7108 return(xmlParseNCNameComplex(ctxt));
7109}
7110
7111/**
7112 * xmlParseQName:
7113 * @ctxt: an XML parser context
7114 * @prefix: pointer to store the prefix part
7115 *
7116 * parse an XML Namespace QName
7117 *
7118 * [6] QName ::= (Prefix ':')? LocalPart
7119 * [7] Prefix ::= NCName
7120 * [8] LocalPart ::= NCName
7121 *
7122 * Returns the Name parsed or NULL
7123 */
7124
7125static const xmlChar *
7126xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7127 const xmlChar *l, *p;
7128
7129 GROW;
7130
7131 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007132 if (l == NULL) {
7133 if (CUR == ':') {
7134 l = xmlParseName(ctxt);
7135 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007136 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7137 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007138 *prefix = NULL;
7139 return(l);
7140 }
7141 }
7142 return(NULL);
7143 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007144 if (CUR == ':') {
7145 NEXT;
7146 p = l;
7147 l = xmlParseNCName(ctxt);
7148 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007149 xmlChar *tmp;
7150
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007151 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7152 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007153 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7154 p = xmlDictLookup(ctxt->dict, tmp, -1);
7155 if (tmp != NULL) xmlFree(tmp);
7156 *prefix = NULL;
7157 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007158 }
7159 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007160 xmlChar *tmp;
7161
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007162 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7163 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007164 NEXT;
7165 tmp = (xmlChar *) xmlParseName(ctxt);
7166 if (tmp != NULL) {
7167 tmp = xmlBuildQName(tmp, l, NULL, 0);
7168 l = xmlDictLookup(ctxt->dict, tmp, -1);
7169 if (tmp != NULL) xmlFree(tmp);
7170 *prefix = p;
7171 return(l);
7172 }
7173 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7174 l = xmlDictLookup(ctxt->dict, tmp, -1);
7175 if (tmp != NULL) xmlFree(tmp);
7176 *prefix = p;
7177 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007178 }
7179 *prefix = p;
7180 } else
7181 *prefix = NULL;
7182 return(l);
7183}
7184
7185/**
7186 * xmlParseQNameAndCompare:
7187 * @ctxt: an XML parser context
7188 * @name: the localname
7189 * @prefix: the prefix, if any.
7190 *
7191 * parse an XML name and compares for match
7192 * (specialized for endtag parsing)
7193 *
7194 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7195 * and the name for mismatch
7196 */
7197
7198static const xmlChar *
7199xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7200 xmlChar const *prefix) {
7201 const xmlChar *cmp = name;
7202 const xmlChar *in;
7203 const xmlChar *ret;
7204 const xmlChar *prefix2;
7205
7206 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7207
7208 GROW;
7209 in = ctxt->input->cur;
7210
7211 cmp = prefix;
7212 while (*in != 0 && *in == *cmp) {
7213 ++in;
7214 ++cmp;
7215 }
7216 if ((*cmp == 0) && (*in == ':')) {
7217 in++;
7218 cmp = name;
7219 while (*in != 0 && *in == *cmp) {
7220 ++in;
7221 ++cmp;
7222 }
William M. Brack76e95df2003-10-18 16:20:14 +00007223 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007224 /* success */
7225 ctxt->input->cur = in;
7226 return((const xmlChar*) 1);
7227 }
7228 }
7229 /*
7230 * all strings coms from the dictionary, equality can be done directly
7231 */
7232 ret = xmlParseQName (ctxt, &prefix2);
7233 if ((ret == name) && (prefix == prefix2))
7234 return((const xmlChar*) 1);
7235 return ret;
7236}
7237
7238/**
7239 * xmlParseAttValueInternal:
7240 * @ctxt: an XML parser context
7241 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007242 * @alloc: whether the attribute was reallocated as a new string
7243 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007244 *
7245 * parse a value for an attribute.
7246 * NOTE: if no normalization is needed, the routine will return pointers
7247 * directly from the data buffer.
7248 *
7249 * 3.3.3 Attribute-Value Normalization:
7250 * Before the value of an attribute is passed to the application or
7251 * checked for validity, the XML processor must normalize it as follows:
7252 * - a character reference is processed by appending the referenced
7253 * character to the attribute value
7254 * - an entity reference is processed by recursively processing the
7255 * replacement text of the entity
7256 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7257 * appending #x20 to the normalized value, except that only a single
7258 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7259 * parsed entity or the literal entity value of an internal parsed entity
7260 * - other characters are processed by appending them to the normalized value
7261 * If the declared value is not CDATA, then the XML processor must further
7262 * process the normalized attribute value by discarding any leading and
7263 * trailing space (#x20) characters, and by replacing sequences of space
7264 * (#x20) characters by a single space (#x20) character.
7265 * All attributes for which no declaration has been read should be treated
7266 * by a non-validating parser as if declared CDATA.
7267 *
7268 * Returns the AttValue parsed or NULL. The value has to be freed by the
7269 * caller if it was copied, this can be detected by val[*len] == 0.
7270 */
7271
7272static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007273xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7274 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007275{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007276 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007277 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007278 xmlChar *ret = NULL;
7279
7280 GROW;
7281 in = (xmlChar *) CUR_PTR;
7282 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007283 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007284 return (NULL);
7285 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007286 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007287
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007288 /*
7289 * try to handle in this routine the most common case where no
7290 * allocation of a new string is required and where content is
7291 * pure ASCII.
7292 */
7293 limit = *in++;
7294 end = ctxt->input->end;
7295 start = in;
7296 if (in >= end) {
7297 const xmlChar *oldbase = ctxt->input->base;
7298 GROW;
7299 if (oldbase != ctxt->input->base) {
7300 long delta = ctxt->input->base - oldbase;
7301 start = start + delta;
7302 in = in + delta;
7303 }
7304 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007305 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007306 if (normalize) {
7307 /*
7308 * Skip any leading spaces
7309 */
7310 while ((in < end) && (*in != limit) &&
7311 ((*in == 0x20) || (*in == 0x9) ||
7312 (*in == 0xA) || (*in == 0xD))) {
7313 in++;
7314 start = in;
7315 if (in >= end) {
7316 const xmlChar *oldbase = ctxt->input->base;
7317 GROW;
7318 if (oldbase != ctxt->input->base) {
7319 long delta = ctxt->input->base - oldbase;
7320 start = start + delta;
7321 in = in + delta;
7322 }
7323 end = ctxt->input->end;
7324 }
7325 }
7326 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7327 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7328 if ((*in++ == 0x20) && (*in == 0x20)) break;
7329 if (in >= end) {
7330 const xmlChar *oldbase = ctxt->input->base;
7331 GROW;
7332 if (oldbase != ctxt->input->base) {
7333 long delta = ctxt->input->base - oldbase;
7334 start = start + delta;
7335 in = in + delta;
7336 }
7337 end = ctxt->input->end;
7338 }
7339 }
7340 last = in;
7341 /*
7342 * skip the trailing blanks
7343 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007344 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007345 while ((in < end) && (*in != limit) &&
7346 ((*in == 0x20) || (*in == 0x9) ||
7347 (*in == 0xA) || (*in == 0xD))) {
7348 in++;
7349 if (in >= end) {
7350 const xmlChar *oldbase = ctxt->input->base;
7351 GROW;
7352 if (oldbase != ctxt->input->base) {
7353 long delta = ctxt->input->base - oldbase;
7354 start = start + delta;
7355 in = in + delta;
7356 last = last + delta;
7357 }
7358 end = ctxt->input->end;
7359 }
7360 }
7361 if (*in != limit) goto need_complex;
7362 } else {
7363 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7364 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7365 in++;
7366 if (in >= end) {
7367 const xmlChar *oldbase = ctxt->input->base;
7368 GROW;
7369 if (oldbase != ctxt->input->base) {
7370 long delta = ctxt->input->base - oldbase;
7371 start = start + delta;
7372 in = in + delta;
7373 }
7374 end = ctxt->input->end;
7375 }
7376 }
7377 last = in;
7378 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007379 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007380 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007381 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007382 *len = last - start;
7383 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007384 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007385 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007386 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007387 }
7388 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007389 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007390 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007391need_complex:
7392 if (alloc) *alloc = 1;
7393 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007394}
7395
7396/**
7397 * xmlParseAttribute2:
7398 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007399 * @pref: the element prefix
7400 * @elem: the element name
7401 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007402 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007403 * @len: an int * to save the length of the attribute
7404 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007405 *
7406 * parse an attribute in the new SAX2 framework.
7407 *
7408 * Returns the attribute name, and the value in *value, .
7409 */
7410
7411static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007412xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7413 const xmlChar *pref, const xmlChar *elem,
7414 const xmlChar **prefix, xmlChar **value,
7415 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007416 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007417 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007418 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007419
7420 *value = NULL;
7421 GROW;
7422 name = xmlParseQName(ctxt, prefix);
7423 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007424 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7425 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007426 return(NULL);
7427 }
7428
7429 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007430 * get the type if needed
7431 */
7432 if (ctxt->attsSpecial != NULL) {
7433 int type;
7434
7435 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7436 pref, elem, *prefix, name);
7437 if (type != 0) normalize = 1;
7438 }
7439
7440 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007441 * read the value
7442 */
7443 SKIP_BLANKS;
7444 if (RAW == '=') {
7445 NEXT;
7446 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007447 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007448 ctxt->instate = XML_PARSER_CONTENT;
7449 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007450 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007451 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007452 return(NULL);
7453 }
7454
Daniel Veillardd8925572005-06-08 22:34:55 +00007455 if (*prefix == ctxt->str_xml) {
7456 /*
7457 * Check that xml:lang conforms to the specification
7458 * No more registered as an error, just generate a warning now
7459 * since this was deprecated in XML second edition
7460 */
7461 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7462 internal_val = xmlStrndup(val, *len);
7463 if (!xmlCheckLanguageID(internal_val)) {
7464 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7465 "Malformed value for xml:lang : %s\n",
7466 internal_val, NULL);
7467 }
7468 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007469
Daniel Veillardd8925572005-06-08 22:34:55 +00007470 /*
7471 * Check that xml:space conforms to the specification
7472 */
7473 if (xmlStrEqual(name, BAD_CAST "space")) {
7474 internal_val = xmlStrndup(val, *len);
7475 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7476 *(ctxt->space) = 0;
7477 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7478 *(ctxt->space) = 1;
7479 else {
7480 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007481"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007482 internal_val, NULL);
7483 }
7484 }
7485 if (internal_val) {
7486 xmlFree(internal_val);
7487 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007488 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007489
7490 *value = val;
7491 return(name);
7492}
7493
7494/**
7495 * xmlParseStartTag2:
7496 * @ctxt: an XML parser context
7497 *
7498 * parse a start of tag either for rule element or
7499 * EmptyElement. In both case we don't parse the tag closing chars.
7500 * This routine is called when running SAX2 parsing
7501 *
7502 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7503 *
7504 * [ WFC: Unique Att Spec ]
7505 * No attribute name may appear more than once in the same start-tag or
7506 * empty-element tag.
7507 *
7508 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7509 *
7510 * [ WFC: Unique Att Spec ]
7511 * No attribute name may appear more than once in the same start-tag or
7512 * empty-element tag.
7513 *
7514 * With namespace:
7515 *
7516 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7517 *
7518 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7519 *
7520 * Returns the element name parsed
7521 */
7522
7523static const xmlChar *
7524xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007525 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007526 const xmlChar *localname;
7527 const xmlChar *prefix;
7528 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007529 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007530 const xmlChar *nsname;
7531 xmlChar *attvalue;
7532 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007533 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007534 int nratts, nbatts, nbdef;
7535 int i, j, nbNs, attval;
7536 const xmlChar *base;
7537 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007538 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007539
7540 if (RAW != '<') return(NULL);
7541 NEXT1;
7542
7543 /*
7544 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7545 * point since the attribute values may be stored as pointers to
7546 * the buffer and calling SHRINK would destroy them !
7547 * The Shrinking is only possible once the full set of attribute
7548 * callbacks have been done.
7549 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007550reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007551 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007552 base = ctxt->input->base;
7553 cur = ctxt->input->cur - ctxt->input->base;
7554 nbatts = 0;
7555 nratts = 0;
7556 nbdef = 0;
7557 nbNs = 0;
7558 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007559 /* Forget any namespaces added during an earlier parse of this element. */
7560 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007561
7562 localname = xmlParseQName(ctxt, &prefix);
7563 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007564 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7565 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007566 return(NULL);
7567 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007568 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007569
7570 /*
7571 * Now parse the attributes, it ends up with the ending
7572 *
7573 * (S Attribute)* S?
7574 */
7575 SKIP_BLANKS;
7576 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007577 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007578
7579 while ((RAW != '>') &&
7580 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007581 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007582 const xmlChar *q = CUR_PTR;
7583 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007584 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007585
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007586 attname = xmlParseAttribute2(ctxt, prefix, localname,
7587 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007588 if ((attname != NULL) && (attvalue != NULL)) {
7589 if (len < 0) len = xmlStrlen(attvalue);
7590 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007591 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7592 xmlURIPtr uri;
7593
7594 if (*URL != 0) {
7595 uri = xmlParseURI((const char *) URL);
7596 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007597 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7598 "xmlns: %s not a valid URI\n",
7599 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007600 } else {
7601 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007602 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7603 "xmlns: URI %s is not absolute\n",
7604 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007605 }
7606 xmlFreeURI(uri);
7607 }
7608 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007610 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007611 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007612 for (j = 1;j <= nbNs;j++)
7613 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7614 break;
7615 if (j <= nbNs)
7616 xmlErrAttributeDup(ctxt, NULL, attname);
7617 else
7618 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007619 if (alloc != 0) xmlFree(attvalue);
7620 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007621 continue;
7622 }
7623 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007624 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7625 xmlURIPtr uri;
7626
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007627 if (attname == ctxt->str_xml) {
7628 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007629 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7630 "xml namespace prefix mapped to wrong URI\n",
7631 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007632 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007633 /*
7634 * Do not keep a namespace definition node
7635 */
7636 if (alloc != 0) xmlFree(attvalue);
7637 SKIP_BLANKS;
7638 continue;
7639 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007640 uri = xmlParseURI((const char *) URL);
7641 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007642 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7643 "xmlns:%s: '%s' is not a valid URI\n",
7644 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007645 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007646 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007647 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7648 "xmlns:%s: URI %s is not absolute\n",
7649 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007650 }
7651 xmlFreeURI(uri);
7652 }
7653
Daniel Veillard0fb18932003-09-07 09:14:37 +00007654 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007655 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007656 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007657 for (j = 1;j <= nbNs;j++)
7658 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7659 break;
7660 if (j <= nbNs)
7661 xmlErrAttributeDup(ctxt, aprefix, attname);
7662 else
7663 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007664 if (alloc != 0) xmlFree(attvalue);
7665 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007666 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007667 continue;
7668 }
7669
7670 /*
7671 * Add the pair to atts
7672 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007673 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7674 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007675 if (attvalue[len] == 0)
7676 xmlFree(attvalue);
7677 goto failed;
7678 }
7679 maxatts = ctxt->maxatts;
7680 atts = ctxt->atts;
7681 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007682 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007683 atts[nbatts++] = attname;
7684 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007685 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007686 atts[nbatts++] = attvalue;
7687 attvalue += len;
7688 atts[nbatts++] = attvalue;
7689 /*
7690 * tag if some deallocation is needed
7691 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007692 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007693 } else {
7694 if ((attvalue != NULL) && (attvalue[len] == 0))
7695 xmlFree(attvalue);
7696 }
7697
7698failed:
7699
7700 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007701 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007702 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7703 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007704 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007705 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7706 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007707 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 }
7709 SKIP_BLANKS;
7710 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7711 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007712 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007713 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007714 break;
7715 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007716 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007717 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007718 }
7719
Daniel Veillard0fb18932003-09-07 09:14:37 +00007720 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007721 * The attributes defaulting
7722 */
7723 if (ctxt->attsDefault != NULL) {
7724 xmlDefAttrsPtr defaults;
7725
7726 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7727 if (defaults != NULL) {
7728 for (i = 0;i < defaults->nbAttrs;i++) {
7729 attname = defaults->values[4 * i];
7730 aprefix = defaults->values[4 * i + 1];
7731
7732 /*
7733 * special work for namespaces defaulted defs
7734 */
7735 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7736 /*
7737 * check that it's not a defined namespace
7738 */
7739 for (j = 1;j <= nbNs;j++)
7740 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7741 break;
7742 if (j <= nbNs) continue;
7743
7744 nsname = xmlGetNamespace(ctxt, NULL);
7745 if (nsname != defaults->values[4 * i + 2]) {
7746 if (nsPush(ctxt, NULL,
7747 defaults->values[4 * i + 2]) > 0)
7748 nbNs++;
7749 }
7750 } else if (aprefix == ctxt->str_xmlns) {
7751 /*
7752 * check that it's not a defined namespace
7753 */
7754 for (j = 1;j <= nbNs;j++)
7755 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7756 break;
7757 if (j <= nbNs) continue;
7758
7759 nsname = xmlGetNamespace(ctxt, attname);
7760 if (nsname != defaults->values[2]) {
7761 if (nsPush(ctxt, attname,
7762 defaults->values[4 * i + 2]) > 0)
7763 nbNs++;
7764 }
7765 } else {
7766 /*
7767 * check that it's not a defined attribute
7768 */
7769 for (j = 0;j < nbatts;j+=5) {
7770 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7771 break;
7772 }
7773 if (j < nbatts) continue;
7774
7775 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7776 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007777 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007778 }
7779 maxatts = ctxt->maxatts;
7780 atts = ctxt->atts;
7781 }
7782 atts[nbatts++] = attname;
7783 atts[nbatts++] = aprefix;
7784 if (aprefix == NULL)
7785 atts[nbatts++] = NULL;
7786 else
7787 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7788 atts[nbatts++] = defaults->values[4 * i + 2];
7789 atts[nbatts++] = defaults->values[4 * i + 3];
7790 nbdef++;
7791 }
7792 }
7793 }
7794 }
7795
Daniel Veillarde70c8772003-11-25 07:21:18 +00007796 /*
7797 * The attributes checkings
7798 */
7799 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00007800 /*
7801 * The default namespace does not apply to attribute names.
7802 */
7803 if (atts[i + 1] != NULL) {
7804 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7805 if (nsname == NULL) {
7806 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7807 "Namespace prefix %s for %s on %s is not defined\n",
7808 atts[i + 1], atts[i], localname);
7809 }
7810 atts[i + 2] = nsname;
7811 } else
7812 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00007813 /*
7814 * [ WFC: Unique Att Spec ]
7815 * No attribute name may appear more than once in the same
7816 * start-tag or empty-element tag.
7817 * As extended by the Namespace in XML REC.
7818 */
7819 for (j = 0; j < i;j += 5) {
7820 if (atts[i] == atts[j]) {
7821 if (atts[i+1] == atts[j+1]) {
7822 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7823 break;
7824 }
7825 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7826 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7827 "Namespaced Attribute %s in '%s' redefined\n",
7828 atts[i], nsname, NULL);
7829 break;
7830 }
7831 }
7832 }
7833 }
7834
Daniel Veillarde57ec792003-09-10 10:50:59 +00007835 nsname = xmlGetNamespace(ctxt, prefix);
7836 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007837 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7838 "Namespace prefix %s on %s is not defined\n",
7839 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007840 }
7841 *pref = prefix;
7842 *URI = nsname;
7843
7844 /*
7845 * SAX: Start of Element !
7846 */
7847 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7848 (!ctxt->disableSAX)) {
7849 if (nbNs > 0)
7850 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7851 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7852 nbatts / 5, nbdef, atts);
7853 else
7854 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7855 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7856 }
7857
7858 /*
7859 * Free up attribute allocated strings if needed
7860 */
7861 if (attval != 0) {
7862 for (i = 3,j = 0; j < nratts;i += 5,j++)
7863 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7864 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007865 }
7866
7867 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007868
7869base_changed:
7870 /*
7871 * the attribute strings are valid iif the base didn't changed
7872 */
7873 if (attval != 0) {
7874 for (i = 3,j = 0; j < nratts;i += 5,j++)
7875 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7876 xmlFree((xmlChar *) atts[i]);
7877 }
7878 ctxt->input->cur = ctxt->input->base + cur;
7879 if (ctxt->wellFormed == 1) {
7880 goto reparse;
7881 }
7882 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007883}
7884
7885/**
7886 * xmlParseEndTag2:
7887 * @ctxt: an XML parser context
7888 * @line: line of the start tag
7889 * @nsNr: number of namespaces on the start tag
7890 *
7891 * parse an end of tag
7892 *
7893 * [42] ETag ::= '</' Name S? '>'
7894 *
7895 * With namespace
7896 *
7897 * [NS 9] ETag ::= '</' QName S? '>'
7898 */
7899
7900static void
7901xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007902 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007903 const xmlChar *name;
7904
7905 GROW;
7906 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007907 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007908 return;
7909 }
7910 SKIP(2);
7911
William M. Brack13dfa872004-09-18 04:52:08 +00007912 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007913 if (ctxt->input->cur[tlen] == '>') {
7914 ctxt->input->cur += tlen + 1;
7915 goto done;
7916 }
7917 ctxt->input->cur += tlen;
7918 name = (xmlChar*)1;
7919 } else {
7920 if (prefix == NULL)
7921 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7922 else
7923 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7924 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007925
7926 /*
7927 * We should definitely be at the ending "S? '>'" part
7928 */
7929 GROW;
7930 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007931 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007932 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007933 } else
7934 NEXT1;
7935
7936 /*
7937 * [ WFC: Element Type Match ]
7938 * The Name in an element's end-tag must match the element type in the
7939 * start-tag.
7940 *
7941 */
7942 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007943 if (name == NULL) name = BAD_CAST "unparseable";
7944 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007945 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007946 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007947 }
7948
7949 /*
7950 * SAX: End of Tag
7951 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007952done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007953 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7954 (!ctxt->disableSAX))
7955 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7956
Daniel Veillard0fb18932003-09-07 09:14:37 +00007957 spacePop(ctxt);
7958 if (nsNr != 0)
7959 nsPop(ctxt, nsNr);
7960 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007961}
7962
7963/**
Owen Taylor3473f882001-02-23 17:55:21 +00007964 * xmlParseCDSect:
7965 * @ctxt: an XML parser context
7966 *
7967 * Parse escaped pure raw content.
7968 *
7969 * [18] CDSect ::= CDStart CData CDEnd
7970 *
7971 * [19] CDStart ::= '<![CDATA['
7972 *
7973 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7974 *
7975 * [21] CDEnd ::= ']]>'
7976 */
7977void
7978xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7979 xmlChar *buf = NULL;
7980 int len = 0;
7981 int size = XML_PARSER_BUFFER_SIZE;
7982 int r, rl;
7983 int s, sl;
7984 int cur, l;
7985 int count = 0;
7986
Daniel Veillard8f597c32003-10-06 08:19:27 +00007987 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007988 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007989 SKIP(9);
7990 } else
7991 return;
7992
7993 ctxt->instate = XML_PARSER_CDATA_SECTION;
7994 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007995 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007996 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007997 ctxt->instate = XML_PARSER_CONTENT;
7998 return;
7999 }
8000 NEXTL(rl);
8001 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008002 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008003 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008004 ctxt->instate = XML_PARSER_CONTENT;
8005 return;
8006 }
8007 NEXTL(sl);
8008 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008009 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008010 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008011 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008012 return;
8013 }
William M. Brack871611b2003-10-18 04:53:14 +00008014 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008015 ((r != ']') || (s != ']') || (cur != '>'))) {
8016 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008017 xmlChar *tmp;
8018
Owen Taylor3473f882001-02-23 17:55:21 +00008019 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008020 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8021 if (tmp == NULL) {
8022 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008023 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008024 return;
8025 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008026 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008027 }
8028 COPY_BUF(rl,buf,len,r);
8029 r = s;
8030 rl = sl;
8031 s = cur;
8032 sl = l;
8033 count++;
8034 if (count > 50) {
8035 GROW;
8036 count = 0;
8037 }
8038 NEXTL(l);
8039 cur = CUR_CHAR(l);
8040 }
8041 buf[len] = 0;
8042 ctxt->instate = XML_PARSER_CONTENT;
8043 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008044 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008045 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008046 xmlFree(buf);
8047 return;
8048 }
8049 NEXTL(l);
8050
8051 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008052 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008053 */
8054 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8055 if (ctxt->sax->cdataBlock != NULL)
8056 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008057 else if (ctxt->sax->characters != NULL)
8058 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008059 }
8060 xmlFree(buf);
8061}
8062
8063/**
8064 * xmlParseContent:
8065 * @ctxt: an XML parser context
8066 *
8067 * Parse a content:
8068 *
8069 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8070 */
8071
8072void
8073xmlParseContent(xmlParserCtxtPtr ctxt) {
8074 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008075 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008076 ((RAW != '<') || (NXT(1) != '/'))) {
8077 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008078 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008079 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008080
8081 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008082 * First case : a Processing Instruction.
8083 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008084 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008085 xmlParsePI(ctxt);
8086 }
8087
8088 /*
8089 * Second case : a CDSection
8090 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008091 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008092 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008093 xmlParseCDSect(ctxt);
8094 }
8095
8096 /*
8097 * Third case : a comment
8098 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008099 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008100 (NXT(2) == '-') && (NXT(3) == '-')) {
8101 xmlParseComment(ctxt);
8102 ctxt->instate = XML_PARSER_CONTENT;
8103 }
8104
8105 /*
8106 * Fourth case : a sub-element.
8107 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008108 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008109 xmlParseElement(ctxt);
8110 }
8111
8112 /*
8113 * Fifth case : a reference. If if has not been resolved,
8114 * parsing returns it's Name, create the node
8115 */
8116
Daniel Veillard21a0f912001-02-25 19:54:14 +00008117 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008118 xmlParseReference(ctxt);
8119 }
8120
8121 /*
8122 * Last case, text. Note that References are handled directly.
8123 */
8124 else {
8125 xmlParseCharData(ctxt, 0);
8126 }
8127
8128 GROW;
8129 /*
8130 * Pop-up of finished entities.
8131 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008132 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008133 xmlPopInput(ctxt);
8134 SHRINK;
8135
Daniel Veillardfdc91562002-07-01 21:52:03 +00008136 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008137 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8138 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008139 ctxt->instate = XML_PARSER_EOF;
8140 break;
8141 }
8142 }
8143}
8144
8145/**
8146 * xmlParseElement:
8147 * @ctxt: an XML parser context
8148 *
8149 * parse an XML element, this is highly recursive
8150 *
8151 * [39] element ::= EmptyElemTag | STag content ETag
8152 *
8153 * [ WFC: Element Type Match ]
8154 * The Name in an element's end-tag must match the element type in the
8155 * start-tag.
8156 *
Owen Taylor3473f882001-02-23 17:55:21 +00008157 */
8158
8159void
8160xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008161 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008162 const xmlChar *prefix;
8163 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008164 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008165 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008166 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008167 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008168
8169 /* Capture start position */
8170 if (ctxt->record_info) {
8171 node_info.begin_pos = ctxt->input->consumed +
8172 (CUR_PTR - ctxt->input->base);
8173 node_info.begin_line = ctxt->input->line;
8174 }
8175
8176 if (ctxt->spaceNr == 0)
8177 spacePush(ctxt, -1);
8178 else
8179 spacePush(ctxt, *ctxt->space);
8180
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008181 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008182#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008183 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008184#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008185 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008186#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008187 else
8188 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008189#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008190 if (name == NULL) {
8191 spacePop(ctxt);
8192 return;
8193 }
8194 namePush(ctxt, name);
8195 ret = ctxt->node;
8196
Daniel Veillard4432df22003-09-28 18:58:27 +00008197#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008198 /*
8199 * [ VC: Root Element Type ]
8200 * The Name in the document type declaration must match the element
8201 * type of the root element.
8202 */
8203 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8204 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8205 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008206#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008207
8208 /*
8209 * Check for an Empty Element.
8210 */
8211 if ((RAW == '/') && (NXT(1) == '>')) {
8212 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008213 if (ctxt->sax2) {
8214 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8215 (!ctxt->disableSAX))
8216 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008217#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008218 } else {
8219 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8220 (!ctxt->disableSAX))
8221 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008222#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008223 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008224 namePop(ctxt);
8225 spacePop(ctxt);
8226 if (nsNr != ctxt->nsNr)
8227 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008228 if ( ret != NULL && ctxt->record_info ) {
8229 node_info.end_pos = ctxt->input->consumed +
8230 (CUR_PTR - ctxt->input->base);
8231 node_info.end_line = ctxt->input->line;
8232 node_info.node = ret;
8233 xmlParserAddNodeInfo(ctxt, &node_info);
8234 }
8235 return;
8236 }
8237 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008238 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008239 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008240 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8241 "Couldn't find end of Start Tag %s line %d\n",
8242 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008243
8244 /*
8245 * end of parsing of this node.
8246 */
8247 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008248 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008249 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008250 if (nsNr != ctxt->nsNr)
8251 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008252
8253 /*
8254 * Capture end position and add node
8255 */
8256 if ( ret != NULL && ctxt->record_info ) {
8257 node_info.end_pos = ctxt->input->consumed +
8258 (CUR_PTR - ctxt->input->base);
8259 node_info.end_line = ctxt->input->line;
8260 node_info.node = ret;
8261 xmlParserAddNodeInfo(ctxt, &node_info);
8262 }
8263 return;
8264 }
8265
8266 /*
8267 * Parse the content of the element:
8268 */
8269 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008270 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008271 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008272 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008273 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008274
8275 /*
8276 * end of parsing of this node.
8277 */
8278 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008279 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008280 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008281 if (nsNr != ctxt->nsNr)
8282 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008283 return;
8284 }
8285
8286 /*
8287 * parse the end of tag: '</' should be here.
8288 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008289 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008290 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008291 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008292 }
8293#ifdef LIBXML_SAX1_ENABLED
8294 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008295 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008296#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008297
8298 /*
8299 * Capture end position and add node
8300 */
8301 if ( ret != NULL && ctxt->record_info ) {
8302 node_info.end_pos = ctxt->input->consumed +
8303 (CUR_PTR - ctxt->input->base);
8304 node_info.end_line = ctxt->input->line;
8305 node_info.node = ret;
8306 xmlParserAddNodeInfo(ctxt, &node_info);
8307 }
8308}
8309
8310/**
8311 * xmlParseVersionNum:
8312 * @ctxt: an XML parser context
8313 *
8314 * parse the XML version value.
8315 *
8316 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8317 *
8318 * Returns the string giving the XML version number, or NULL
8319 */
8320xmlChar *
8321xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8322 xmlChar *buf = NULL;
8323 int len = 0;
8324 int size = 10;
8325 xmlChar cur;
8326
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008327 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008328 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008329 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008330 return(NULL);
8331 }
8332 cur = CUR;
8333 while (((cur >= 'a') && (cur <= 'z')) ||
8334 ((cur >= 'A') && (cur <= 'Z')) ||
8335 ((cur >= '0') && (cur <= '9')) ||
8336 (cur == '_') || (cur == '.') ||
8337 (cur == ':') || (cur == '-')) {
8338 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008339 xmlChar *tmp;
8340
Owen Taylor3473f882001-02-23 17:55:21 +00008341 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008342 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8343 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008344 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008345 return(NULL);
8346 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008347 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008348 }
8349 buf[len++] = cur;
8350 NEXT;
8351 cur=CUR;
8352 }
8353 buf[len] = 0;
8354 return(buf);
8355}
8356
8357/**
8358 * xmlParseVersionInfo:
8359 * @ctxt: an XML parser context
8360 *
8361 * parse the XML version.
8362 *
8363 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8364 *
8365 * [25] Eq ::= S? '=' S?
8366 *
8367 * Returns the version string, e.g. "1.0"
8368 */
8369
8370xmlChar *
8371xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8372 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008373
Daniel Veillarda07050d2003-10-19 14:46:32 +00008374 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008375 SKIP(7);
8376 SKIP_BLANKS;
8377 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008378 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008379 return(NULL);
8380 }
8381 NEXT;
8382 SKIP_BLANKS;
8383 if (RAW == '"') {
8384 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008385 version = xmlParseVersionNum(ctxt);
8386 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008387 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008388 } else
8389 NEXT;
8390 } else if (RAW == '\''){
8391 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008392 version = xmlParseVersionNum(ctxt);
8393 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008394 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008395 } else
8396 NEXT;
8397 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008398 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008399 }
8400 }
8401 return(version);
8402}
8403
8404/**
8405 * xmlParseEncName:
8406 * @ctxt: an XML parser context
8407 *
8408 * parse the XML encoding name
8409 *
8410 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8411 *
8412 * Returns the encoding name value or NULL
8413 */
8414xmlChar *
8415xmlParseEncName(xmlParserCtxtPtr ctxt) {
8416 xmlChar *buf = NULL;
8417 int len = 0;
8418 int size = 10;
8419 xmlChar cur;
8420
8421 cur = CUR;
8422 if (((cur >= 'a') && (cur <= 'z')) ||
8423 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008424 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008425 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008426 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008427 return(NULL);
8428 }
8429
8430 buf[len++] = cur;
8431 NEXT;
8432 cur = CUR;
8433 while (((cur >= 'a') && (cur <= 'z')) ||
8434 ((cur >= 'A') && (cur <= 'Z')) ||
8435 ((cur >= '0') && (cur <= '9')) ||
8436 (cur == '.') || (cur == '_') ||
8437 (cur == '-')) {
8438 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008439 xmlChar *tmp;
8440
Owen Taylor3473f882001-02-23 17:55:21 +00008441 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008442 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8443 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008444 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008445 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008446 return(NULL);
8447 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008448 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008449 }
8450 buf[len++] = cur;
8451 NEXT;
8452 cur = CUR;
8453 if (cur == 0) {
8454 SHRINK;
8455 GROW;
8456 cur = CUR;
8457 }
8458 }
8459 buf[len] = 0;
8460 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008461 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008462 }
8463 return(buf);
8464}
8465
8466/**
8467 * xmlParseEncodingDecl:
8468 * @ctxt: an XML parser context
8469 *
8470 * parse the XML encoding declaration
8471 *
8472 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8473 *
8474 * this setups the conversion filters.
8475 *
8476 * Returns the encoding value or NULL
8477 */
8478
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008479const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008480xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8481 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008482
8483 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008484 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008485 SKIP(8);
8486 SKIP_BLANKS;
8487 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008488 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008489 return(NULL);
8490 }
8491 NEXT;
8492 SKIP_BLANKS;
8493 if (RAW == '"') {
8494 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008495 encoding = xmlParseEncName(ctxt);
8496 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008497 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008498 } else
8499 NEXT;
8500 } else if (RAW == '\''){
8501 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008502 encoding = xmlParseEncName(ctxt);
8503 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008504 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008505 } else
8506 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008507 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008508 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008509 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008510 /*
8511 * UTF-16 encoding stwich has already taken place at this stage,
8512 * more over the little-endian/big-endian selection is already done
8513 */
8514 if ((encoding != NULL) &&
8515 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8516 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008517 if (ctxt->encoding != NULL)
8518 xmlFree((xmlChar *) ctxt->encoding);
8519 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008520 }
8521 /*
8522 * UTF-8 encoding is handled natively
8523 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008524 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008525 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8526 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008527 if (ctxt->encoding != NULL)
8528 xmlFree((xmlChar *) ctxt->encoding);
8529 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008530 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008531 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008532 xmlCharEncodingHandlerPtr handler;
8533
8534 if (ctxt->input->encoding != NULL)
8535 xmlFree((xmlChar *) ctxt->input->encoding);
8536 ctxt->input->encoding = encoding;
8537
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008538 handler = xmlFindCharEncodingHandler((const char *) encoding);
8539 if (handler != NULL) {
8540 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008541 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008542 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008543 "Unsupported encoding %s\n", encoding);
8544 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008545 }
8546 }
8547 }
8548 return(encoding);
8549}
8550
8551/**
8552 * xmlParseSDDecl:
8553 * @ctxt: an XML parser context
8554 *
8555 * parse the XML standalone declaration
8556 *
8557 * [32] SDDecl ::= S 'standalone' Eq
8558 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8559 *
8560 * [ VC: Standalone Document Declaration ]
8561 * TODO The standalone document declaration must have the value "no"
8562 * if any external markup declarations contain declarations of:
8563 * - attributes with default values, if elements to which these
8564 * attributes apply appear in the document without specifications
8565 * of values for these attributes, or
8566 * - entities (other than amp, lt, gt, apos, quot), if references
8567 * to those entities appear in the document, or
8568 * - attributes with values subject to normalization, where the
8569 * attribute appears in the document with a value which will change
8570 * as a result of normalization, or
8571 * - element types with element content, if white space occurs directly
8572 * within any instance of those types.
8573 *
8574 * Returns 1 if standalone, 0 otherwise
8575 */
8576
8577int
8578xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8579 int standalone = -1;
8580
8581 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008582 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008583 SKIP(10);
8584 SKIP_BLANKS;
8585 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008586 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008587 return(standalone);
8588 }
8589 NEXT;
8590 SKIP_BLANKS;
8591 if (RAW == '\''){
8592 NEXT;
8593 if ((RAW == 'n') && (NXT(1) == 'o')) {
8594 standalone = 0;
8595 SKIP(2);
8596 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8597 (NXT(2) == 's')) {
8598 standalone = 1;
8599 SKIP(3);
8600 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008601 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008602 }
8603 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008604 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008605 } else
8606 NEXT;
8607 } else if (RAW == '"'){
8608 NEXT;
8609 if ((RAW == 'n') && (NXT(1) == 'o')) {
8610 standalone = 0;
8611 SKIP(2);
8612 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8613 (NXT(2) == 's')) {
8614 standalone = 1;
8615 SKIP(3);
8616 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008617 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008618 }
8619 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008620 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008621 } else
8622 NEXT;
8623 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008624 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008625 }
8626 }
8627 return(standalone);
8628}
8629
8630/**
8631 * xmlParseXMLDecl:
8632 * @ctxt: an XML parser context
8633 *
8634 * parse an XML declaration header
8635 *
8636 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8637 */
8638
8639void
8640xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8641 xmlChar *version;
8642
8643 /*
8644 * We know that '<?xml' is here.
8645 */
8646 SKIP(5);
8647
William M. Brack76e95df2003-10-18 16:20:14 +00008648 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008649 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8650 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008651 }
8652 SKIP_BLANKS;
8653
8654 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008655 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008656 */
8657 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008658 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008659 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008660 } else {
8661 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8662 /*
8663 * TODO: Blueberry should be detected here
8664 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008665 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8666 "Unsupported version '%s'\n",
8667 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008668 }
8669 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008670 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008671 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008672 }
Owen Taylor3473f882001-02-23 17:55:21 +00008673
8674 /*
8675 * We may have the encoding declaration
8676 */
William M. Brack76e95df2003-10-18 16:20:14 +00008677 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008678 if ((RAW == '?') && (NXT(1) == '>')) {
8679 SKIP(2);
8680 return;
8681 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008682 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008683 }
8684 xmlParseEncodingDecl(ctxt);
8685 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8686 /*
8687 * The XML REC instructs us to stop parsing right here
8688 */
8689 return;
8690 }
8691
8692 /*
8693 * We may have the standalone status.
8694 */
William M. Brack76e95df2003-10-18 16:20:14 +00008695 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008696 if ((RAW == '?') && (NXT(1) == '>')) {
8697 SKIP(2);
8698 return;
8699 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008701 }
8702 SKIP_BLANKS;
8703 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8704
8705 SKIP_BLANKS;
8706 if ((RAW == '?') && (NXT(1) == '>')) {
8707 SKIP(2);
8708 } else if (RAW == '>') {
8709 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008710 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008711 NEXT;
8712 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008713 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008714 MOVETO_ENDTAG(CUR_PTR);
8715 NEXT;
8716 }
8717}
8718
8719/**
8720 * xmlParseMisc:
8721 * @ctxt: an XML parser context
8722 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008723 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008724 *
8725 * [27] Misc ::= Comment | PI | S
8726 */
8727
8728void
8729xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008730 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008731 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008732 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008733 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008734 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008735 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008736 NEXT;
8737 } else
8738 xmlParseComment(ctxt);
8739 }
8740}
8741
8742/**
8743 * xmlParseDocument:
8744 * @ctxt: an XML parser context
8745 *
8746 * parse an XML document (and build a tree if using the standard SAX
8747 * interface).
8748 *
8749 * [1] document ::= prolog element Misc*
8750 *
8751 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8752 *
8753 * Returns 0, -1 in case of error. the parser context is augmented
8754 * as a result of the parsing.
8755 */
8756
8757int
8758xmlParseDocument(xmlParserCtxtPtr ctxt) {
8759 xmlChar start[4];
8760 xmlCharEncoding enc;
8761
8762 xmlInitParser();
8763
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008764 if ((ctxt == NULL) || (ctxt->input == NULL))
8765 return(-1);
8766
Owen Taylor3473f882001-02-23 17:55:21 +00008767 GROW;
8768
8769 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008770 * SAX: detecting the level.
8771 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008772 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008773
8774 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008775 * SAX: beginning of the document processing.
8776 */
8777 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8778 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8779
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008780 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8781 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008782 /*
8783 * Get the 4 first bytes and decode the charset
8784 * if enc != XML_CHAR_ENCODING_NONE
8785 * plug some encoding conversion routines.
8786 */
8787 start[0] = RAW;
8788 start[1] = NXT(1);
8789 start[2] = NXT(2);
8790 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008791 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008792 if (enc != XML_CHAR_ENCODING_NONE) {
8793 xmlSwitchEncoding(ctxt, enc);
8794 }
Owen Taylor3473f882001-02-23 17:55:21 +00008795 }
8796
8797
8798 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008799 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008800 }
8801
8802 /*
8803 * Check for the XMLDecl in the Prolog.
8804 */
8805 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008806 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008807
8808 /*
8809 * Note that we will switch encoding on the fly.
8810 */
8811 xmlParseXMLDecl(ctxt);
8812 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8813 /*
8814 * The XML REC instructs us to stop parsing right here
8815 */
8816 return(-1);
8817 }
8818 ctxt->standalone = ctxt->input->standalone;
8819 SKIP_BLANKS;
8820 } else {
8821 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8822 }
8823 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8824 ctxt->sax->startDocument(ctxt->userData);
8825
8826 /*
8827 * The Misc part of the Prolog
8828 */
8829 GROW;
8830 xmlParseMisc(ctxt);
8831
8832 /*
8833 * Then possibly doc type declaration(s) and more Misc
8834 * (doctypedecl Misc*)?
8835 */
8836 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008837 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008838
8839 ctxt->inSubset = 1;
8840 xmlParseDocTypeDecl(ctxt);
8841 if (RAW == '[') {
8842 ctxt->instate = XML_PARSER_DTD;
8843 xmlParseInternalSubset(ctxt);
8844 }
8845
8846 /*
8847 * Create and update the external subset.
8848 */
8849 ctxt->inSubset = 2;
8850 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8851 (!ctxt->disableSAX))
8852 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8853 ctxt->extSubSystem, ctxt->extSubURI);
8854 ctxt->inSubset = 0;
8855
8856
8857 ctxt->instate = XML_PARSER_PROLOG;
8858 xmlParseMisc(ctxt);
8859 }
8860
8861 /*
8862 * Time to start parsing the tree itself
8863 */
8864 GROW;
8865 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008866 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8867 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008868 } else {
8869 ctxt->instate = XML_PARSER_CONTENT;
8870 xmlParseElement(ctxt);
8871 ctxt->instate = XML_PARSER_EPILOG;
8872
8873
8874 /*
8875 * The Misc part at the end
8876 */
8877 xmlParseMisc(ctxt);
8878
Daniel Veillard561b7f82002-03-20 21:55:57 +00008879 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008880 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008881 }
8882 ctxt->instate = XML_PARSER_EOF;
8883 }
8884
8885 /*
8886 * SAX: end of the document processing.
8887 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008888 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008889 ctxt->sax->endDocument(ctxt->userData);
8890
Daniel Veillard5997aca2002-03-18 18:36:20 +00008891 /*
8892 * Remove locally kept entity definitions if the tree was not built
8893 */
8894 if ((ctxt->myDoc != NULL) &&
8895 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8896 xmlFreeDoc(ctxt->myDoc);
8897 ctxt->myDoc = NULL;
8898 }
8899
Daniel Veillardc7612992002-02-17 22:47:37 +00008900 if (! ctxt->wellFormed) {
8901 ctxt->valid = 0;
8902 return(-1);
8903 }
Owen Taylor3473f882001-02-23 17:55:21 +00008904 return(0);
8905}
8906
8907/**
8908 * xmlParseExtParsedEnt:
8909 * @ctxt: an XML parser context
8910 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008911 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008912 * An external general parsed entity is well-formed if it matches the
8913 * production labeled extParsedEnt.
8914 *
8915 * [78] extParsedEnt ::= TextDecl? content
8916 *
8917 * Returns 0, -1 in case of error. the parser context is augmented
8918 * as a result of the parsing.
8919 */
8920
8921int
8922xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8923 xmlChar start[4];
8924 xmlCharEncoding enc;
8925
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008926 if ((ctxt == NULL) || (ctxt->input == NULL))
8927 return(-1);
8928
Owen Taylor3473f882001-02-23 17:55:21 +00008929 xmlDefaultSAXHandlerInit();
8930
Daniel Veillard309f81d2003-09-23 09:02:53 +00008931 xmlDetectSAX2(ctxt);
8932
Owen Taylor3473f882001-02-23 17:55:21 +00008933 GROW;
8934
8935 /*
8936 * SAX: beginning of the document processing.
8937 */
8938 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8939 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8940
8941 /*
8942 * Get the 4 first bytes and decode the charset
8943 * if enc != XML_CHAR_ENCODING_NONE
8944 * plug some encoding conversion routines.
8945 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008946 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8947 start[0] = RAW;
8948 start[1] = NXT(1);
8949 start[2] = NXT(2);
8950 start[3] = NXT(3);
8951 enc = xmlDetectCharEncoding(start, 4);
8952 if (enc != XML_CHAR_ENCODING_NONE) {
8953 xmlSwitchEncoding(ctxt, enc);
8954 }
Owen Taylor3473f882001-02-23 17:55:21 +00008955 }
8956
8957
8958 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008959 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008960 }
8961
8962 /*
8963 * Check for the XMLDecl in the Prolog.
8964 */
8965 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008966 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008967
8968 /*
8969 * Note that we will switch encoding on the fly.
8970 */
8971 xmlParseXMLDecl(ctxt);
8972 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8973 /*
8974 * The XML REC instructs us to stop parsing right here
8975 */
8976 return(-1);
8977 }
8978 SKIP_BLANKS;
8979 } else {
8980 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8981 }
8982 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8983 ctxt->sax->startDocument(ctxt->userData);
8984
8985 /*
8986 * Doing validity checking on chunk doesn't make sense
8987 */
8988 ctxt->instate = XML_PARSER_CONTENT;
8989 ctxt->validate = 0;
8990 ctxt->loadsubset = 0;
8991 ctxt->depth = 0;
8992
8993 xmlParseContent(ctxt);
8994
8995 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008996 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008997 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008998 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008999 }
9000
9001 /*
9002 * SAX: end of the document processing.
9003 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009004 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009005 ctxt->sax->endDocument(ctxt->userData);
9006
9007 if (! ctxt->wellFormed) return(-1);
9008 return(0);
9009}
9010
Daniel Veillard73b013f2003-09-30 12:36:01 +00009011#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009012/************************************************************************
9013 * *
9014 * Progressive parsing interfaces *
9015 * *
9016 ************************************************************************/
9017
9018/**
9019 * xmlParseLookupSequence:
9020 * @ctxt: an XML parser context
9021 * @first: the first char to lookup
9022 * @next: the next char to lookup or zero
9023 * @third: the next char to lookup or zero
9024 *
9025 * Try to find if a sequence (first, next, third) or just (first next) or
9026 * (first) is available in the input stream.
9027 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9028 * to avoid rescanning sequences of bytes, it DOES change the state of the
9029 * parser, do not use liberally.
9030 *
9031 * Returns the index to the current parsing point if the full sequence
9032 * is available, -1 otherwise.
9033 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009034static int
Owen Taylor3473f882001-02-23 17:55:21 +00009035xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9036 xmlChar next, xmlChar third) {
9037 int base, len;
9038 xmlParserInputPtr in;
9039 const xmlChar *buf;
9040
9041 in = ctxt->input;
9042 if (in == NULL) return(-1);
9043 base = in->cur - in->base;
9044 if (base < 0) return(-1);
9045 if (ctxt->checkIndex > base)
9046 base = ctxt->checkIndex;
9047 if (in->buf == NULL) {
9048 buf = in->base;
9049 len = in->length;
9050 } else {
9051 buf = in->buf->buffer->content;
9052 len = in->buf->buffer->use;
9053 }
9054 /* take into account the sequence length */
9055 if (third) len -= 2;
9056 else if (next) len --;
9057 for (;base < len;base++) {
9058 if (buf[base] == first) {
9059 if (third != 0) {
9060 if ((buf[base + 1] != next) ||
9061 (buf[base + 2] != third)) continue;
9062 } else if (next != 0) {
9063 if (buf[base + 1] != next) continue;
9064 }
9065 ctxt->checkIndex = 0;
9066#ifdef DEBUG_PUSH
9067 if (next == 0)
9068 xmlGenericError(xmlGenericErrorContext,
9069 "PP: lookup '%c' found at %d\n",
9070 first, base);
9071 else if (third == 0)
9072 xmlGenericError(xmlGenericErrorContext,
9073 "PP: lookup '%c%c' found at %d\n",
9074 first, next, base);
9075 else
9076 xmlGenericError(xmlGenericErrorContext,
9077 "PP: lookup '%c%c%c' found at %d\n",
9078 first, next, third, base);
9079#endif
9080 return(base - (in->cur - in->base));
9081 }
9082 }
9083 ctxt->checkIndex = base;
9084#ifdef DEBUG_PUSH
9085 if (next == 0)
9086 xmlGenericError(xmlGenericErrorContext,
9087 "PP: lookup '%c' failed\n", first);
9088 else if (third == 0)
9089 xmlGenericError(xmlGenericErrorContext,
9090 "PP: lookup '%c%c' failed\n", first, next);
9091 else
9092 xmlGenericError(xmlGenericErrorContext,
9093 "PP: lookup '%c%c%c' failed\n", first, next, third);
9094#endif
9095 return(-1);
9096}
9097
9098/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009099 * xmlParseGetLasts:
9100 * @ctxt: an XML parser context
9101 * @lastlt: pointer to store the last '<' from the input
9102 * @lastgt: pointer to store the last '>' from the input
9103 *
9104 * Lookup the last < and > in the current chunk
9105 */
9106static void
9107xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9108 const xmlChar **lastgt) {
9109 const xmlChar *tmp;
9110
9111 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9112 xmlGenericError(xmlGenericErrorContext,
9113 "Internal error: xmlParseGetLasts\n");
9114 return;
9115 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009116 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009117 tmp = ctxt->input->end;
9118 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009119 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009120 if (tmp < ctxt->input->base) {
9121 *lastlt = NULL;
9122 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009123 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009124 *lastlt = tmp;
9125 tmp++;
9126 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9127 if (*tmp == '\'') {
9128 tmp++;
9129 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9130 if (tmp < ctxt->input->end) tmp++;
9131 } else if (*tmp == '"') {
9132 tmp++;
9133 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9134 if (tmp < ctxt->input->end) tmp++;
9135 } else
9136 tmp++;
9137 }
9138 if (tmp < ctxt->input->end)
9139 *lastgt = tmp;
9140 else {
9141 tmp = *lastlt;
9142 tmp--;
9143 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9144 if (tmp >= ctxt->input->base)
9145 *lastgt = tmp;
9146 else
9147 *lastgt = NULL;
9148 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009149 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009150 } else {
9151 *lastlt = NULL;
9152 *lastgt = NULL;
9153 }
9154}
9155/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009156 * xmlCheckCdataPush:
9157 * @cur: pointer to the bock of characters
9158 * @len: length of the block in bytes
9159 *
9160 * Check that the block of characters is okay as SCdata content [20]
9161 *
9162 * Returns the number of bytes to pass if okay, a negative index where an
9163 * UTF-8 error occured otherwise
9164 */
9165static int
9166xmlCheckCdataPush(const xmlChar *utf, int len) {
9167 int ix;
9168 unsigned char c;
9169 int codepoint;
9170
9171 if ((utf == NULL) || (len <= 0))
9172 return(0);
9173
9174 for (ix = 0; ix < len;) { /* string is 0-terminated */
9175 c = utf[ix];
9176 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9177 if (c >= 0x20)
9178 ix++;
9179 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9180 ix++;
9181 else
9182 return(-ix);
9183 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9184 if (ix + 2 > len) return(ix);
9185 if ((utf[ix+1] & 0xc0 ) != 0x80)
9186 return(-ix);
9187 codepoint = (utf[0] & 0x1f) << 6;
9188 codepoint |= utf[1] & 0x3f;
9189 if (!xmlIsCharQ(codepoint))
9190 return(-ix);
9191 ix += 2;
9192 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9193 if (ix + 3 > len) return(ix);
9194 if (((utf[ix+1] & 0xc0) != 0x80) ||
9195 ((utf[ix+2] & 0xc0) != 0x80))
9196 return(-ix);
9197 codepoint = (utf[0] & 0xf) << 12;
9198 codepoint |= (utf[1] & 0x3f) << 6;
9199 codepoint |= utf[2] & 0x3f;
9200 if (!xmlIsCharQ(codepoint))
9201 return(-ix);
9202 ix += 3;
9203 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9204 if (ix + 4 > len) return(ix);
9205 if (((utf[ix+1] & 0xc0) != 0x80) ||
9206 ((utf[ix+2] & 0xc0) != 0x80) ||
9207 ((utf[ix+3] & 0xc0) != 0x80))
9208 return(-ix);
9209 codepoint = (utf[0] & 0x7) << 18;
9210 codepoint |= (utf[1] & 0x3f) << 12;
9211 codepoint |= (utf[2] & 0x3f) << 6;
9212 codepoint |= utf[3] & 0x3f;
9213 if (!xmlIsCharQ(codepoint))
9214 return(-ix);
9215 ix += 4;
9216 } else /* unknown encoding */
9217 return(-ix);
9218 }
9219 return(ix);
9220}
9221
9222/**
Owen Taylor3473f882001-02-23 17:55:21 +00009223 * xmlParseTryOrFinish:
9224 * @ctxt: an XML parser context
9225 * @terminate: last chunk indicator
9226 *
9227 * Try to progress on parsing
9228 *
9229 * Returns zero if no parsing was possible
9230 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009231static int
Owen Taylor3473f882001-02-23 17:55:21 +00009232xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9233 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009234 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009235 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009236 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009237
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009238 if (ctxt->input == NULL)
9239 return(0);
9240
Owen Taylor3473f882001-02-23 17:55:21 +00009241#ifdef DEBUG_PUSH
9242 switch (ctxt->instate) {
9243 case XML_PARSER_EOF:
9244 xmlGenericError(xmlGenericErrorContext,
9245 "PP: try EOF\n"); break;
9246 case XML_PARSER_START:
9247 xmlGenericError(xmlGenericErrorContext,
9248 "PP: try START\n"); break;
9249 case XML_PARSER_MISC:
9250 xmlGenericError(xmlGenericErrorContext,
9251 "PP: try MISC\n");break;
9252 case XML_PARSER_COMMENT:
9253 xmlGenericError(xmlGenericErrorContext,
9254 "PP: try COMMENT\n");break;
9255 case XML_PARSER_PROLOG:
9256 xmlGenericError(xmlGenericErrorContext,
9257 "PP: try PROLOG\n");break;
9258 case XML_PARSER_START_TAG:
9259 xmlGenericError(xmlGenericErrorContext,
9260 "PP: try START_TAG\n");break;
9261 case XML_PARSER_CONTENT:
9262 xmlGenericError(xmlGenericErrorContext,
9263 "PP: try CONTENT\n");break;
9264 case XML_PARSER_CDATA_SECTION:
9265 xmlGenericError(xmlGenericErrorContext,
9266 "PP: try CDATA_SECTION\n");break;
9267 case XML_PARSER_END_TAG:
9268 xmlGenericError(xmlGenericErrorContext,
9269 "PP: try END_TAG\n");break;
9270 case XML_PARSER_ENTITY_DECL:
9271 xmlGenericError(xmlGenericErrorContext,
9272 "PP: try ENTITY_DECL\n");break;
9273 case XML_PARSER_ENTITY_VALUE:
9274 xmlGenericError(xmlGenericErrorContext,
9275 "PP: try ENTITY_VALUE\n");break;
9276 case XML_PARSER_ATTRIBUTE_VALUE:
9277 xmlGenericError(xmlGenericErrorContext,
9278 "PP: try ATTRIBUTE_VALUE\n");break;
9279 case XML_PARSER_DTD:
9280 xmlGenericError(xmlGenericErrorContext,
9281 "PP: try DTD\n");break;
9282 case XML_PARSER_EPILOG:
9283 xmlGenericError(xmlGenericErrorContext,
9284 "PP: try EPILOG\n");break;
9285 case XML_PARSER_PI:
9286 xmlGenericError(xmlGenericErrorContext,
9287 "PP: try PI\n");break;
9288 case XML_PARSER_IGNORE:
9289 xmlGenericError(xmlGenericErrorContext,
9290 "PP: try IGNORE\n");break;
9291 }
9292#endif
9293
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009294 if ((ctxt->input != NULL) &&
9295 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009296 xmlSHRINK(ctxt);
9297 ctxt->checkIndex = 0;
9298 }
9299 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009300
Daniel Veillarda880b122003-04-21 21:36:41 +00009301 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009302 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009303 return(0);
9304
9305
Owen Taylor3473f882001-02-23 17:55:21 +00009306 /*
9307 * Pop-up of finished entities.
9308 */
9309 while ((RAW == 0) && (ctxt->inputNr > 1))
9310 xmlPopInput(ctxt);
9311
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009312 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009313 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009314 avail = ctxt->input->length -
9315 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009316 else {
9317 /*
9318 * If we are operating on converted input, try to flush
9319 * remainng chars to avoid them stalling in the non-converted
9320 * buffer.
9321 */
9322 if ((ctxt->input->buf->raw != NULL) &&
9323 (ctxt->input->buf->raw->use > 0)) {
9324 int base = ctxt->input->base -
9325 ctxt->input->buf->buffer->content;
9326 int current = ctxt->input->cur - ctxt->input->base;
9327
9328 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9329 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9330 ctxt->input->cur = ctxt->input->base + current;
9331 ctxt->input->end =
9332 &ctxt->input->buf->buffer->content[
9333 ctxt->input->buf->buffer->use];
9334 }
9335 avail = ctxt->input->buf->buffer->use -
9336 (ctxt->input->cur - ctxt->input->base);
9337 }
Owen Taylor3473f882001-02-23 17:55:21 +00009338 if (avail < 1)
9339 goto done;
9340 switch (ctxt->instate) {
9341 case XML_PARSER_EOF:
9342 /*
9343 * Document parsing is done !
9344 */
9345 goto done;
9346 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009347 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9348 xmlChar start[4];
9349 xmlCharEncoding enc;
9350
9351 /*
9352 * Very first chars read from the document flow.
9353 */
9354 if (avail < 4)
9355 goto done;
9356
9357 /*
9358 * Get the 4 first bytes and decode the charset
9359 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009360 * plug some encoding conversion routines,
9361 * else xmlSwitchEncoding will set to (default)
9362 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009363 */
9364 start[0] = RAW;
9365 start[1] = NXT(1);
9366 start[2] = NXT(2);
9367 start[3] = NXT(3);
9368 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009369 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009370 break;
9371 }
Owen Taylor3473f882001-02-23 17:55:21 +00009372
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009373 if (avail < 2)
9374 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009375 cur = ctxt->input->cur[0];
9376 next = ctxt->input->cur[1];
9377 if (cur == 0) {
9378 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9379 ctxt->sax->setDocumentLocator(ctxt->userData,
9380 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009381 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009382 ctxt->instate = XML_PARSER_EOF;
9383#ifdef DEBUG_PUSH
9384 xmlGenericError(xmlGenericErrorContext,
9385 "PP: entering EOF\n");
9386#endif
9387 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9388 ctxt->sax->endDocument(ctxt->userData);
9389 goto done;
9390 }
9391 if ((cur == '<') && (next == '?')) {
9392 /* PI or XML decl */
9393 if (avail < 5) return(ret);
9394 if ((!terminate) &&
9395 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9396 return(ret);
9397 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9398 ctxt->sax->setDocumentLocator(ctxt->userData,
9399 &xmlDefaultSAXLocator);
9400 if ((ctxt->input->cur[2] == 'x') &&
9401 (ctxt->input->cur[3] == 'm') &&
9402 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009403 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009404 ret += 5;
9405#ifdef DEBUG_PUSH
9406 xmlGenericError(xmlGenericErrorContext,
9407 "PP: Parsing XML Decl\n");
9408#endif
9409 xmlParseXMLDecl(ctxt);
9410 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9411 /*
9412 * The XML REC instructs us to stop parsing right
9413 * here
9414 */
9415 ctxt->instate = XML_PARSER_EOF;
9416 return(0);
9417 }
9418 ctxt->standalone = ctxt->input->standalone;
9419 if ((ctxt->encoding == NULL) &&
9420 (ctxt->input->encoding != NULL))
9421 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9422 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9423 (!ctxt->disableSAX))
9424 ctxt->sax->startDocument(ctxt->userData);
9425 ctxt->instate = XML_PARSER_MISC;
9426#ifdef DEBUG_PUSH
9427 xmlGenericError(xmlGenericErrorContext,
9428 "PP: entering MISC\n");
9429#endif
9430 } else {
9431 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9432 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9433 (!ctxt->disableSAX))
9434 ctxt->sax->startDocument(ctxt->userData);
9435 ctxt->instate = XML_PARSER_MISC;
9436#ifdef DEBUG_PUSH
9437 xmlGenericError(xmlGenericErrorContext,
9438 "PP: entering MISC\n");
9439#endif
9440 }
9441 } else {
9442 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9443 ctxt->sax->setDocumentLocator(ctxt->userData,
9444 &xmlDefaultSAXLocator);
9445 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009446 if (ctxt->version == NULL) {
9447 xmlErrMemory(ctxt, NULL);
9448 break;
9449 }
Owen Taylor3473f882001-02-23 17:55:21 +00009450 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9451 (!ctxt->disableSAX))
9452 ctxt->sax->startDocument(ctxt->userData);
9453 ctxt->instate = XML_PARSER_MISC;
9454#ifdef DEBUG_PUSH
9455 xmlGenericError(xmlGenericErrorContext,
9456 "PP: entering MISC\n");
9457#endif
9458 }
9459 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009460 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009461 const xmlChar *name;
9462 const xmlChar *prefix;
9463 const xmlChar *URI;
9464 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009465
9466 if ((avail < 2) && (ctxt->inputNr == 1))
9467 goto done;
9468 cur = ctxt->input->cur[0];
9469 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009470 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009471 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009472 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9473 ctxt->sax->endDocument(ctxt->userData);
9474 goto done;
9475 }
9476 if (!terminate) {
9477 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009478 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009479 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009480 goto done;
9481 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9482 goto done;
9483 }
9484 }
9485 if (ctxt->spaceNr == 0)
9486 spacePush(ctxt, -1);
9487 else
9488 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009489#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009490 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009491#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009492 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009493#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009494 else
9495 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009496#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009497 if (name == NULL) {
9498 spacePop(ctxt);
9499 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009500 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9501 ctxt->sax->endDocument(ctxt->userData);
9502 goto done;
9503 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009504#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009505 /*
9506 * [ VC: Root Element Type ]
9507 * The Name in the document type declaration must match
9508 * the element type of the root element.
9509 */
9510 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9511 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9512 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009513#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009514
9515 /*
9516 * Check for an Empty Element.
9517 */
9518 if ((RAW == '/') && (NXT(1) == '>')) {
9519 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009520
9521 if (ctxt->sax2) {
9522 if ((ctxt->sax != NULL) &&
9523 (ctxt->sax->endElementNs != NULL) &&
9524 (!ctxt->disableSAX))
9525 ctxt->sax->endElementNs(ctxt->userData, name,
9526 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009527 if (ctxt->nsNr - nsNr > 0)
9528 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009529#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009530 } else {
9531 if ((ctxt->sax != NULL) &&
9532 (ctxt->sax->endElement != NULL) &&
9533 (!ctxt->disableSAX))
9534 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009535#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009536 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009537 spacePop(ctxt);
9538 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009539 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009540 } else {
9541 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009542 }
9543 break;
9544 }
9545 if (RAW == '>') {
9546 NEXT;
9547 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009548 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009549 "Couldn't find end of Start Tag %s\n",
9550 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009551 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009552 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009553 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009554 if (ctxt->sax2)
9555 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009556#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009557 else
9558 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009559#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009560
Daniel Veillarda880b122003-04-21 21:36:41 +00009561 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009562 break;
9563 }
9564 case XML_PARSER_CONTENT: {
9565 const xmlChar *test;
9566 unsigned int cons;
9567 if ((avail < 2) && (ctxt->inputNr == 1))
9568 goto done;
9569 cur = ctxt->input->cur[0];
9570 next = ctxt->input->cur[1];
9571
9572 test = CUR_PTR;
9573 cons = ctxt->input->consumed;
9574 if ((cur == '<') && (next == '/')) {
9575 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009576 break;
9577 } else if ((cur == '<') && (next == '?')) {
9578 if ((!terminate) &&
9579 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9580 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009581 xmlParsePI(ctxt);
9582 } else if ((cur == '<') && (next != '!')) {
9583 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009584 break;
9585 } else if ((cur == '<') && (next == '!') &&
9586 (ctxt->input->cur[2] == '-') &&
9587 (ctxt->input->cur[3] == '-')) {
9588 if ((!terminate) &&
9589 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9590 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009591 xmlParseComment(ctxt);
9592 ctxt->instate = XML_PARSER_CONTENT;
9593 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9594 (ctxt->input->cur[2] == '[') &&
9595 (ctxt->input->cur[3] == 'C') &&
9596 (ctxt->input->cur[4] == 'D') &&
9597 (ctxt->input->cur[5] == 'A') &&
9598 (ctxt->input->cur[6] == 'T') &&
9599 (ctxt->input->cur[7] == 'A') &&
9600 (ctxt->input->cur[8] == '[')) {
9601 SKIP(9);
9602 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009603 break;
9604 } else if ((cur == '<') && (next == '!') &&
9605 (avail < 9)) {
9606 goto done;
9607 } else if (cur == '&') {
9608 if ((!terminate) &&
9609 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9610 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009611 xmlParseReference(ctxt);
9612 } else {
9613 /* TODO Avoid the extra copy, handle directly !!! */
9614 /*
9615 * Goal of the following test is:
9616 * - minimize calls to the SAX 'character' callback
9617 * when they are mergeable
9618 * - handle an problem for isBlank when we only parse
9619 * a sequence of blank chars and the next one is
9620 * not available to check against '<' presence.
9621 * - tries to homogenize the differences in SAX
9622 * callbacks between the push and pull versions
9623 * of the parser.
9624 */
9625 if ((ctxt->inputNr == 1) &&
9626 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9627 if (!terminate) {
9628 if (ctxt->progressive) {
9629 if ((lastlt == NULL) ||
9630 (ctxt->input->cur > lastlt))
9631 goto done;
9632 } else if (xmlParseLookupSequence(ctxt,
9633 '<', 0, 0) < 0) {
9634 goto done;
9635 }
9636 }
9637 }
9638 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009639 xmlParseCharData(ctxt, 0);
9640 }
9641 /*
9642 * Pop-up of finished entities.
9643 */
9644 while ((RAW == 0) && (ctxt->inputNr > 1))
9645 xmlPopInput(ctxt);
9646 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009647 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9648 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009649 ctxt->instate = XML_PARSER_EOF;
9650 break;
9651 }
9652 break;
9653 }
9654 case XML_PARSER_END_TAG:
9655 if (avail < 2)
9656 goto done;
9657 if (!terminate) {
9658 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009659 /* > can be found unescaped in attribute values */
9660 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009661 goto done;
9662 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9663 goto done;
9664 }
9665 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009666 if (ctxt->sax2) {
9667 xmlParseEndTag2(ctxt,
9668 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9669 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009670 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009671 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009672 }
9673#ifdef LIBXML_SAX1_ENABLED
9674 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009675 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009676#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009677 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009678 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009679 } else {
9680 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009681 }
9682 break;
9683 case XML_PARSER_CDATA_SECTION: {
9684 /*
9685 * The Push mode need to have the SAX callback for
9686 * cdataBlock merge back contiguous callbacks.
9687 */
9688 int base;
9689
9690 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9691 if (base < 0) {
9692 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009693 int tmp;
9694
9695 tmp = xmlCheckCdataPush(ctxt->input->cur,
9696 XML_PARSER_BIG_BUFFER_SIZE);
9697 if (tmp < 0) {
9698 tmp = -tmp;
9699 ctxt->input->cur += tmp;
9700 goto encoding_error;
9701 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009702 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9703 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009704 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009705 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009706 else if (ctxt->sax->characters != NULL)
9707 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009708 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009709 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009710 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009711 ctxt->checkIndex = 0;
9712 }
9713 goto done;
9714 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009715 int tmp;
9716
9717 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
9718 if ((tmp < 0) || (tmp != base)) {
9719 tmp = -tmp;
9720 ctxt->input->cur += tmp;
9721 goto encoding_error;
9722 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009723 if ((ctxt->sax != NULL) && (base > 0) &&
9724 (!ctxt->disableSAX)) {
9725 if (ctxt->sax->cdataBlock != NULL)
9726 ctxt->sax->cdataBlock(ctxt->userData,
9727 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009728 else if (ctxt->sax->characters != NULL)
9729 ctxt->sax->characters(ctxt->userData,
9730 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009731 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009732 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009733 ctxt->checkIndex = 0;
9734 ctxt->instate = XML_PARSER_CONTENT;
9735#ifdef DEBUG_PUSH
9736 xmlGenericError(xmlGenericErrorContext,
9737 "PP: entering CONTENT\n");
9738#endif
9739 }
9740 break;
9741 }
Owen Taylor3473f882001-02-23 17:55:21 +00009742 case XML_PARSER_MISC:
9743 SKIP_BLANKS;
9744 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009745 avail = ctxt->input->length -
9746 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009747 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009748 avail = ctxt->input->buf->buffer->use -
9749 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009750 if (avail < 2)
9751 goto done;
9752 cur = ctxt->input->cur[0];
9753 next = ctxt->input->cur[1];
9754 if ((cur == '<') && (next == '?')) {
9755 if ((!terminate) &&
9756 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9757 goto done;
9758#ifdef DEBUG_PUSH
9759 xmlGenericError(xmlGenericErrorContext,
9760 "PP: Parsing PI\n");
9761#endif
9762 xmlParsePI(ctxt);
9763 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009764 (ctxt->input->cur[2] == '-') &&
9765 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009766 if ((!terminate) &&
9767 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9768 goto done;
9769#ifdef DEBUG_PUSH
9770 xmlGenericError(xmlGenericErrorContext,
9771 "PP: Parsing Comment\n");
9772#endif
9773 xmlParseComment(ctxt);
9774 ctxt->instate = XML_PARSER_MISC;
9775 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009776 (ctxt->input->cur[2] == 'D') &&
9777 (ctxt->input->cur[3] == 'O') &&
9778 (ctxt->input->cur[4] == 'C') &&
9779 (ctxt->input->cur[5] == 'T') &&
9780 (ctxt->input->cur[6] == 'Y') &&
9781 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009782 (ctxt->input->cur[8] == 'E')) {
9783 if ((!terminate) &&
9784 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9785 goto done;
9786#ifdef DEBUG_PUSH
9787 xmlGenericError(xmlGenericErrorContext,
9788 "PP: Parsing internal subset\n");
9789#endif
9790 ctxt->inSubset = 1;
9791 xmlParseDocTypeDecl(ctxt);
9792 if (RAW == '[') {
9793 ctxt->instate = XML_PARSER_DTD;
9794#ifdef DEBUG_PUSH
9795 xmlGenericError(xmlGenericErrorContext,
9796 "PP: entering DTD\n");
9797#endif
9798 } else {
9799 /*
9800 * Create and update the external subset.
9801 */
9802 ctxt->inSubset = 2;
9803 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9804 (ctxt->sax->externalSubset != NULL))
9805 ctxt->sax->externalSubset(ctxt->userData,
9806 ctxt->intSubName, ctxt->extSubSystem,
9807 ctxt->extSubURI);
9808 ctxt->inSubset = 0;
9809 ctxt->instate = XML_PARSER_PROLOG;
9810#ifdef DEBUG_PUSH
9811 xmlGenericError(xmlGenericErrorContext,
9812 "PP: entering PROLOG\n");
9813#endif
9814 }
9815 } else if ((cur == '<') && (next == '!') &&
9816 (avail < 9)) {
9817 goto done;
9818 } else {
9819 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009820 ctxt->progressive = 1;
9821 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009822#ifdef DEBUG_PUSH
9823 xmlGenericError(xmlGenericErrorContext,
9824 "PP: entering START_TAG\n");
9825#endif
9826 }
9827 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009828 case XML_PARSER_PROLOG:
9829 SKIP_BLANKS;
9830 if (ctxt->input->buf == NULL)
9831 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9832 else
9833 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9834 if (avail < 2)
9835 goto done;
9836 cur = ctxt->input->cur[0];
9837 next = ctxt->input->cur[1];
9838 if ((cur == '<') && (next == '?')) {
9839 if ((!terminate) &&
9840 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9841 goto done;
9842#ifdef DEBUG_PUSH
9843 xmlGenericError(xmlGenericErrorContext,
9844 "PP: Parsing PI\n");
9845#endif
9846 xmlParsePI(ctxt);
9847 } else if ((cur == '<') && (next == '!') &&
9848 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9849 if ((!terminate) &&
9850 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9851 goto done;
9852#ifdef DEBUG_PUSH
9853 xmlGenericError(xmlGenericErrorContext,
9854 "PP: Parsing Comment\n");
9855#endif
9856 xmlParseComment(ctxt);
9857 ctxt->instate = XML_PARSER_PROLOG;
9858 } else if ((cur == '<') && (next == '!') &&
9859 (avail < 4)) {
9860 goto done;
9861 } else {
9862 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009863 if (ctxt->progressive == 0)
9864 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009865 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009866#ifdef DEBUG_PUSH
9867 xmlGenericError(xmlGenericErrorContext,
9868 "PP: entering START_TAG\n");
9869#endif
9870 }
9871 break;
9872 case XML_PARSER_EPILOG:
9873 SKIP_BLANKS;
9874 if (ctxt->input->buf == NULL)
9875 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9876 else
9877 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9878 if (avail < 2)
9879 goto done;
9880 cur = ctxt->input->cur[0];
9881 next = ctxt->input->cur[1];
9882 if ((cur == '<') && (next == '?')) {
9883 if ((!terminate) &&
9884 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9885 goto done;
9886#ifdef DEBUG_PUSH
9887 xmlGenericError(xmlGenericErrorContext,
9888 "PP: Parsing PI\n");
9889#endif
9890 xmlParsePI(ctxt);
9891 ctxt->instate = XML_PARSER_EPILOG;
9892 } else if ((cur == '<') && (next == '!') &&
9893 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9894 if ((!terminate) &&
9895 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9896 goto done;
9897#ifdef DEBUG_PUSH
9898 xmlGenericError(xmlGenericErrorContext,
9899 "PP: Parsing Comment\n");
9900#endif
9901 xmlParseComment(ctxt);
9902 ctxt->instate = XML_PARSER_EPILOG;
9903 } else if ((cur == '<') && (next == '!') &&
9904 (avail < 4)) {
9905 goto done;
9906 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009907 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009908 ctxt->instate = XML_PARSER_EOF;
9909#ifdef DEBUG_PUSH
9910 xmlGenericError(xmlGenericErrorContext,
9911 "PP: entering EOF\n");
9912#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009913 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009914 ctxt->sax->endDocument(ctxt->userData);
9915 goto done;
9916 }
9917 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009918 case XML_PARSER_DTD: {
9919 /*
9920 * Sorry but progressive parsing of the internal subset
9921 * is not expected to be supported. We first check that
9922 * the full content of the internal subset is available and
9923 * the parsing is launched only at that point.
9924 * Internal subset ends up with "']' S? '>'" in an unescaped
9925 * section and not in a ']]>' sequence which are conditional
9926 * sections (whoever argued to keep that crap in XML deserve
9927 * a place in hell !).
9928 */
9929 int base, i;
9930 xmlChar *buf;
9931 xmlChar quote = 0;
9932
9933 base = ctxt->input->cur - ctxt->input->base;
9934 if (base < 0) return(0);
9935 if (ctxt->checkIndex > base)
9936 base = ctxt->checkIndex;
9937 buf = ctxt->input->buf->buffer->content;
9938 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9939 base++) {
9940 if (quote != 0) {
9941 if (buf[base] == quote)
9942 quote = 0;
9943 continue;
9944 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009945 if ((quote == 0) && (buf[base] == '<')) {
9946 int found = 0;
9947 /* special handling of comments */
9948 if (((unsigned int) base + 4 <
9949 ctxt->input->buf->buffer->use) &&
9950 (buf[base + 1] == '!') &&
9951 (buf[base + 2] == '-') &&
9952 (buf[base + 3] == '-')) {
9953 for (;(unsigned int) base + 3 <
9954 ctxt->input->buf->buffer->use; base++) {
9955 if ((buf[base] == '-') &&
9956 (buf[base + 1] == '-') &&
9957 (buf[base + 2] == '>')) {
9958 found = 1;
9959 base += 2;
9960 break;
9961 }
9962 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009963 if (!found) {
9964#if 0
9965 fprintf(stderr, "unfinished comment\n");
9966#endif
9967 break; /* for */
9968 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009969 continue;
9970 }
9971 }
Owen Taylor3473f882001-02-23 17:55:21 +00009972 if (buf[base] == '"') {
9973 quote = '"';
9974 continue;
9975 }
9976 if (buf[base] == '\'') {
9977 quote = '\'';
9978 continue;
9979 }
9980 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009981#if 0
9982 fprintf(stderr, "%c%c%c%c: ", buf[base],
9983 buf[base + 1], buf[base + 2], buf[base + 3]);
9984#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009985 if ((unsigned int) base +1 >=
9986 ctxt->input->buf->buffer->use)
9987 break;
9988 if (buf[base + 1] == ']') {
9989 /* conditional crap, skip both ']' ! */
9990 base++;
9991 continue;
9992 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009993 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009994 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9995 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009996 if (buf[base + i] == '>') {
9997#if 0
9998 fprintf(stderr, "found\n");
9999#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010000 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010001 }
10002 if (!IS_BLANK_CH(buf[base + i])) {
10003#if 0
10004 fprintf(stderr, "not found\n");
10005#endif
10006 goto not_end_of_int_subset;
10007 }
Owen Taylor3473f882001-02-23 17:55:21 +000010008 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010009#if 0
10010 fprintf(stderr, "end of stream\n");
10011#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010012 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010013
Owen Taylor3473f882001-02-23 17:55:21 +000010014 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010015not_end_of_int_subset:
10016 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010017 }
10018 /*
10019 * We didn't found the end of the Internal subset
10020 */
Owen Taylor3473f882001-02-23 17:55:21 +000010021#ifdef DEBUG_PUSH
10022 if (next == 0)
10023 xmlGenericError(xmlGenericErrorContext,
10024 "PP: lookup of int subset end filed\n");
10025#endif
10026 goto done;
10027
10028found_end_int_subset:
10029 xmlParseInternalSubset(ctxt);
10030 ctxt->inSubset = 2;
10031 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10032 (ctxt->sax->externalSubset != NULL))
10033 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10034 ctxt->extSubSystem, ctxt->extSubURI);
10035 ctxt->inSubset = 0;
10036 ctxt->instate = XML_PARSER_PROLOG;
10037 ctxt->checkIndex = 0;
10038#ifdef DEBUG_PUSH
10039 xmlGenericError(xmlGenericErrorContext,
10040 "PP: entering PROLOG\n");
10041#endif
10042 break;
10043 }
10044 case XML_PARSER_COMMENT:
10045 xmlGenericError(xmlGenericErrorContext,
10046 "PP: internal error, state == COMMENT\n");
10047 ctxt->instate = XML_PARSER_CONTENT;
10048#ifdef DEBUG_PUSH
10049 xmlGenericError(xmlGenericErrorContext,
10050 "PP: entering CONTENT\n");
10051#endif
10052 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010053 case XML_PARSER_IGNORE:
10054 xmlGenericError(xmlGenericErrorContext,
10055 "PP: internal error, state == IGNORE");
10056 ctxt->instate = XML_PARSER_DTD;
10057#ifdef DEBUG_PUSH
10058 xmlGenericError(xmlGenericErrorContext,
10059 "PP: entering DTD\n");
10060#endif
10061 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010062 case XML_PARSER_PI:
10063 xmlGenericError(xmlGenericErrorContext,
10064 "PP: internal error, state == PI\n");
10065 ctxt->instate = XML_PARSER_CONTENT;
10066#ifdef DEBUG_PUSH
10067 xmlGenericError(xmlGenericErrorContext,
10068 "PP: entering CONTENT\n");
10069#endif
10070 break;
10071 case XML_PARSER_ENTITY_DECL:
10072 xmlGenericError(xmlGenericErrorContext,
10073 "PP: internal error, state == ENTITY_DECL\n");
10074 ctxt->instate = XML_PARSER_DTD;
10075#ifdef DEBUG_PUSH
10076 xmlGenericError(xmlGenericErrorContext,
10077 "PP: entering DTD\n");
10078#endif
10079 break;
10080 case XML_PARSER_ENTITY_VALUE:
10081 xmlGenericError(xmlGenericErrorContext,
10082 "PP: internal error, state == ENTITY_VALUE\n");
10083 ctxt->instate = XML_PARSER_CONTENT;
10084#ifdef DEBUG_PUSH
10085 xmlGenericError(xmlGenericErrorContext,
10086 "PP: entering DTD\n");
10087#endif
10088 break;
10089 case XML_PARSER_ATTRIBUTE_VALUE:
10090 xmlGenericError(xmlGenericErrorContext,
10091 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10092 ctxt->instate = XML_PARSER_START_TAG;
10093#ifdef DEBUG_PUSH
10094 xmlGenericError(xmlGenericErrorContext,
10095 "PP: entering START_TAG\n");
10096#endif
10097 break;
10098 case XML_PARSER_SYSTEM_LITERAL:
10099 xmlGenericError(xmlGenericErrorContext,
10100 "PP: internal error, state == SYSTEM_LITERAL\n");
10101 ctxt->instate = XML_PARSER_START_TAG;
10102#ifdef DEBUG_PUSH
10103 xmlGenericError(xmlGenericErrorContext,
10104 "PP: entering START_TAG\n");
10105#endif
10106 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010107 case XML_PARSER_PUBLIC_LITERAL:
10108 xmlGenericError(xmlGenericErrorContext,
10109 "PP: internal error, state == PUBLIC_LITERAL\n");
10110 ctxt->instate = XML_PARSER_START_TAG;
10111#ifdef DEBUG_PUSH
10112 xmlGenericError(xmlGenericErrorContext,
10113 "PP: entering START_TAG\n");
10114#endif
10115 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010116 }
10117 }
10118done:
10119#ifdef DEBUG_PUSH
10120 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10121#endif
10122 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010123encoding_error:
10124 {
10125 char buffer[150];
10126
10127 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10128 ctxt->input->cur[0], ctxt->input->cur[1],
10129 ctxt->input->cur[2], ctxt->input->cur[3]);
10130 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10131 "Input is not proper UTF-8, indicate encoding !\n%s",
10132 BAD_CAST buffer, NULL);
10133 }
10134 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010135}
10136
10137/**
Owen Taylor3473f882001-02-23 17:55:21 +000010138 * xmlParseChunk:
10139 * @ctxt: an XML parser context
10140 * @chunk: an char array
10141 * @size: the size in byte of the chunk
10142 * @terminate: last chunk indicator
10143 *
10144 * Parse a Chunk of memory
10145 *
10146 * Returns zero if no error, the xmlParserErrors otherwise.
10147 */
10148int
10149xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10150 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010151 if (ctxt == NULL)
10152 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010153 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010154 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010155 if (ctxt->instate == XML_PARSER_START)
10156 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010157 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10158 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10159 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10160 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010161 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010162
William M. Bracka3215c72004-07-31 16:24:01 +000010163 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10164 if (res < 0) {
10165 ctxt->errNo = XML_PARSER_EOF;
10166 ctxt->disableSAX = 1;
10167 return (XML_PARSER_EOF);
10168 }
Owen Taylor3473f882001-02-23 17:55:21 +000010169 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10170 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010171 ctxt->input->end =
10172 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010173#ifdef DEBUG_PUSH
10174 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10175#endif
10176
Owen Taylor3473f882001-02-23 17:55:21 +000010177 } else if (ctxt->instate != XML_PARSER_EOF) {
10178 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10179 xmlParserInputBufferPtr in = ctxt->input->buf;
10180 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10181 (in->raw != NULL)) {
10182 int nbchars;
10183
10184 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10185 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010186 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010187 xmlGenericError(xmlGenericErrorContext,
10188 "xmlParseChunk: encoder error\n");
10189 return(XML_ERR_INVALID_ENCODING);
10190 }
10191 }
10192 }
10193 }
10194 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillard14412512005-01-21 23:53:26 +000010195 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010196 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010197 if (terminate) {
10198 /*
10199 * Check for termination
10200 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010201 int avail = 0;
10202
10203 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010204 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010205 avail = ctxt->input->length -
10206 (ctxt->input->cur - ctxt->input->base);
10207 else
10208 avail = ctxt->input->buf->buffer->use -
10209 (ctxt->input->cur - ctxt->input->base);
10210 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010211
Owen Taylor3473f882001-02-23 17:55:21 +000010212 if ((ctxt->instate != XML_PARSER_EOF) &&
10213 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010214 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010215 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010216 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010217 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010218 }
Owen Taylor3473f882001-02-23 17:55:21 +000010219 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010220 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010221 ctxt->sax->endDocument(ctxt->userData);
10222 }
10223 ctxt->instate = XML_PARSER_EOF;
10224 }
10225 return((xmlParserErrors) ctxt->errNo);
10226}
10227
10228/************************************************************************
10229 * *
10230 * I/O front end functions to the parser *
10231 * *
10232 ************************************************************************/
10233
10234/**
Owen Taylor3473f882001-02-23 17:55:21 +000010235 * xmlCreatePushParserCtxt:
10236 * @sax: a SAX handler
10237 * @user_data: The user data returned on SAX callbacks
10238 * @chunk: a pointer to an array of chars
10239 * @size: number of chars in the array
10240 * @filename: an optional file name or URI
10241 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010242 * Create a parser context for using the XML parser in push mode.
10243 * If @buffer and @size are non-NULL, the data is used to detect
10244 * the encoding. The remaining characters will be parsed so they
10245 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010246 * To allow content encoding detection, @size should be >= 4
10247 * The value of @filename is used for fetching external entities
10248 * and error/warning reports.
10249 *
10250 * Returns the new parser context or NULL
10251 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010252
Owen Taylor3473f882001-02-23 17:55:21 +000010253xmlParserCtxtPtr
10254xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10255 const char *chunk, int size, const char *filename) {
10256 xmlParserCtxtPtr ctxt;
10257 xmlParserInputPtr inputStream;
10258 xmlParserInputBufferPtr buf;
10259 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10260
10261 /*
10262 * plug some encoding conversion routines
10263 */
10264 if ((chunk != NULL) && (size >= 4))
10265 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10266
10267 buf = xmlAllocParserInputBuffer(enc);
10268 if (buf == NULL) return(NULL);
10269
10270 ctxt = xmlNewParserCtxt();
10271 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010272 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010273 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010274 return(NULL);
10275 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010276 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010277 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10278 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010279 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010280 xmlFreeParserInputBuffer(buf);
10281 xmlFreeParserCtxt(ctxt);
10282 return(NULL);
10283 }
Owen Taylor3473f882001-02-23 17:55:21 +000010284 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010285#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010286 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010287#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010288 xmlFree(ctxt->sax);
10289 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10290 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010291 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010292 xmlFreeParserInputBuffer(buf);
10293 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010294 return(NULL);
10295 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010296 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10297 if (sax->initialized == XML_SAX2_MAGIC)
10298 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10299 else
10300 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010301 if (user_data != NULL)
10302 ctxt->userData = user_data;
10303 }
10304 if (filename == NULL) {
10305 ctxt->directory = NULL;
10306 } else {
10307 ctxt->directory = xmlParserGetDirectory(filename);
10308 }
10309
10310 inputStream = xmlNewInputStream(ctxt);
10311 if (inputStream == NULL) {
10312 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010313 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010314 return(NULL);
10315 }
10316
10317 if (filename == NULL)
10318 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010319 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010320 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010321 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010322 if (inputStream->filename == NULL) {
10323 xmlFreeParserCtxt(ctxt);
10324 xmlFreeParserInputBuffer(buf);
10325 return(NULL);
10326 }
10327 }
Owen Taylor3473f882001-02-23 17:55:21 +000010328 inputStream->buf = buf;
10329 inputStream->base = inputStream->buf->buffer->content;
10330 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010331 inputStream->end =
10332 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010333
10334 inputPush(ctxt, inputStream);
10335
William M. Brack3a1cd212005-02-11 14:35:54 +000010336 /*
10337 * If the caller didn't provide an initial 'chunk' for determining
10338 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10339 * that it can be automatically determined later
10340 */
10341 if ((size == 0) || (chunk == NULL)) {
10342 ctxt->charset = XML_CHAR_ENCODING_NONE;
10343 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010344 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10345 int cur = ctxt->input->cur - ctxt->input->base;
10346
Owen Taylor3473f882001-02-23 17:55:21 +000010347 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010348
10349 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10350 ctxt->input->cur = ctxt->input->base + cur;
10351 ctxt->input->end =
10352 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010353#ifdef DEBUG_PUSH
10354 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10355#endif
10356 }
10357
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010358 if (enc != XML_CHAR_ENCODING_NONE) {
10359 xmlSwitchEncoding(ctxt, enc);
10360 }
10361
Owen Taylor3473f882001-02-23 17:55:21 +000010362 return(ctxt);
10363}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010364#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010365
10366/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010367 * xmlStopParser:
10368 * @ctxt: an XML parser context
10369 *
10370 * Blocks further parser processing
10371 */
10372void
10373xmlStopParser(xmlParserCtxtPtr ctxt) {
10374 if (ctxt == NULL)
10375 return;
10376 ctxt->instate = XML_PARSER_EOF;
10377 ctxt->disableSAX = 1;
10378 if (ctxt->input != NULL) {
10379 ctxt->input->cur = BAD_CAST"";
10380 ctxt->input->base = ctxt->input->cur;
10381 }
10382}
10383
10384/**
Owen Taylor3473f882001-02-23 17:55:21 +000010385 * xmlCreateIOParserCtxt:
10386 * @sax: a SAX handler
10387 * @user_data: The user data returned on SAX callbacks
10388 * @ioread: an I/O read function
10389 * @ioclose: an I/O close function
10390 * @ioctx: an I/O handler
10391 * @enc: the charset encoding if known
10392 *
10393 * Create a parser context for using the XML parser with an existing
10394 * I/O stream
10395 *
10396 * Returns the new parser context or NULL
10397 */
10398xmlParserCtxtPtr
10399xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10400 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10401 void *ioctx, xmlCharEncoding enc) {
10402 xmlParserCtxtPtr ctxt;
10403 xmlParserInputPtr inputStream;
10404 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010405
10406 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010407
10408 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10409 if (buf == NULL) return(NULL);
10410
10411 ctxt = xmlNewParserCtxt();
10412 if (ctxt == NULL) {
10413 xmlFree(buf);
10414 return(NULL);
10415 }
10416 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010417#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010418 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010419#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010420 xmlFree(ctxt->sax);
10421 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10422 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010423 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010424 xmlFree(ctxt);
10425 return(NULL);
10426 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010427 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10428 if (sax->initialized == XML_SAX2_MAGIC)
10429 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10430 else
10431 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010432 if (user_data != NULL)
10433 ctxt->userData = user_data;
10434 }
10435
10436 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10437 if (inputStream == NULL) {
10438 xmlFreeParserCtxt(ctxt);
10439 return(NULL);
10440 }
10441 inputPush(ctxt, inputStream);
10442
10443 return(ctxt);
10444}
10445
Daniel Veillard4432df22003-09-28 18:58:27 +000010446#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010447/************************************************************************
10448 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010449 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010450 * *
10451 ************************************************************************/
10452
10453/**
10454 * xmlIOParseDTD:
10455 * @sax: the SAX handler block or NULL
10456 * @input: an Input Buffer
10457 * @enc: the charset encoding if known
10458 *
10459 * Load and parse a DTD
10460 *
10461 * Returns the resulting xmlDtdPtr or NULL in case of error.
10462 * @input will be freed at parsing end.
10463 */
10464
10465xmlDtdPtr
10466xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10467 xmlCharEncoding enc) {
10468 xmlDtdPtr ret = NULL;
10469 xmlParserCtxtPtr ctxt;
10470 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010471 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010472
10473 if (input == NULL)
10474 return(NULL);
10475
10476 ctxt = xmlNewParserCtxt();
10477 if (ctxt == NULL) {
10478 return(NULL);
10479 }
10480
10481 /*
10482 * Set-up the SAX context
10483 */
10484 if (sax != NULL) {
10485 if (ctxt->sax != NULL)
10486 xmlFree(ctxt->sax);
10487 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010488 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010489 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010490 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010491
10492 /*
10493 * generate a parser input from the I/O handler
10494 */
10495
Daniel Veillard43caefb2003-12-07 19:32:22 +000010496 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010497 if (pinput == NULL) {
10498 if (sax != NULL) ctxt->sax = NULL;
10499 xmlFreeParserCtxt(ctxt);
10500 return(NULL);
10501 }
10502
10503 /*
10504 * plug some encoding conversion routines here.
10505 */
10506 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010507 if (enc != XML_CHAR_ENCODING_NONE) {
10508 xmlSwitchEncoding(ctxt, enc);
10509 }
Owen Taylor3473f882001-02-23 17:55:21 +000010510
10511 pinput->filename = NULL;
10512 pinput->line = 1;
10513 pinput->col = 1;
10514 pinput->base = ctxt->input->cur;
10515 pinput->cur = ctxt->input->cur;
10516 pinput->free = NULL;
10517
10518 /*
10519 * let's parse that entity knowing it's an external subset.
10520 */
10521 ctxt->inSubset = 2;
10522 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10523 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10524 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010525
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010526 if ((enc == XML_CHAR_ENCODING_NONE) &&
10527 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010528 /*
10529 * Get the 4 first bytes and decode the charset
10530 * if enc != XML_CHAR_ENCODING_NONE
10531 * plug some encoding conversion routines.
10532 */
10533 start[0] = RAW;
10534 start[1] = NXT(1);
10535 start[2] = NXT(2);
10536 start[3] = NXT(3);
10537 enc = xmlDetectCharEncoding(start, 4);
10538 if (enc != XML_CHAR_ENCODING_NONE) {
10539 xmlSwitchEncoding(ctxt, enc);
10540 }
10541 }
10542
Owen Taylor3473f882001-02-23 17:55:21 +000010543 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10544
10545 if (ctxt->myDoc != NULL) {
10546 if (ctxt->wellFormed) {
10547 ret = ctxt->myDoc->extSubset;
10548 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010549 if (ret != NULL) {
10550 xmlNodePtr tmp;
10551
10552 ret->doc = NULL;
10553 tmp = ret->children;
10554 while (tmp != NULL) {
10555 tmp->doc = NULL;
10556 tmp = tmp->next;
10557 }
10558 }
Owen Taylor3473f882001-02-23 17:55:21 +000010559 } else {
10560 ret = NULL;
10561 }
10562 xmlFreeDoc(ctxt->myDoc);
10563 ctxt->myDoc = NULL;
10564 }
10565 if (sax != NULL) ctxt->sax = NULL;
10566 xmlFreeParserCtxt(ctxt);
10567
10568 return(ret);
10569}
10570
10571/**
10572 * xmlSAXParseDTD:
10573 * @sax: the SAX handler block
10574 * @ExternalID: a NAME* containing the External ID of the DTD
10575 * @SystemID: a NAME* containing the URL to the DTD
10576 *
10577 * Load and parse an external subset.
10578 *
10579 * Returns the resulting xmlDtdPtr or NULL in case of error.
10580 */
10581
10582xmlDtdPtr
10583xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10584 const xmlChar *SystemID) {
10585 xmlDtdPtr ret = NULL;
10586 xmlParserCtxtPtr ctxt;
10587 xmlParserInputPtr input = NULL;
10588 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010589 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010590
10591 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10592
10593 ctxt = xmlNewParserCtxt();
10594 if (ctxt == NULL) {
10595 return(NULL);
10596 }
10597
10598 /*
10599 * Set-up the SAX context
10600 */
10601 if (sax != NULL) {
10602 if (ctxt->sax != NULL)
10603 xmlFree(ctxt->sax);
10604 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010605 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010606 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010607
10608 /*
10609 * Canonicalise the system ID
10610 */
10611 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010612 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010613 xmlFreeParserCtxt(ctxt);
10614 return(NULL);
10615 }
Owen Taylor3473f882001-02-23 17:55:21 +000010616
10617 /*
10618 * Ask the Entity resolver to load the damn thing
10619 */
10620
10621 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010622 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010623 if (input == NULL) {
10624 if (sax != NULL) ctxt->sax = NULL;
10625 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010626 if (systemIdCanonic != NULL)
10627 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010628 return(NULL);
10629 }
10630
10631 /*
10632 * plug some encoding conversion routines here.
10633 */
10634 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010635 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10636 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10637 xmlSwitchEncoding(ctxt, enc);
10638 }
Owen Taylor3473f882001-02-23 17:55:21 +000010639
10640 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010641 input->filename = (char *) systemIdCanonic;
10642 else
10643 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010644 input->line = 1;
10645 input->col = 1;
10646 input->base = ctxt->input->cur;
10647 input->cur = ctxt->input->cur;
10648 input->free = NULL;
10649
10650 /*
10651 * let's parse that entity knowing it's an external subset.
10652 */
10653 ctxt->inSubset = 2;
10654 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10655 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10656 ExternalID, SystemID);
10657 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10658
10659 if (ctxt->myDoc != NULL) {
10660 if (ctxt->wellFormed) {
10661 ret = ctxt->myDoc->extSubset;
10662 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010663 if (ret != NULL) {
10664 xmlNodePtr tmp;
10665
10666 ret->doc = NULL;
10667 tmp = ret->children;
10668 while (tmp != NULL) {
10669 tmp->doc = NULL;
10670 tmp = tmp->next;
10671 }
10672 }
Owen Taylor3473f882001-02-23 17:55:21 +000010673 } else {
10674 ret = NULL;
10675 }
10676 xmlFreeDoc(ctxt->myDoc);
10677 ctxt->myDoc = NULL;
10678 }
10679 if (sax != NULL) ctxt->sax = NULL;
10680 xmlFreeParserCtxt(ctxt);
10681
10682 return(ret);
10683}
10684
Daniel Veillard4432df22003-09-28 18:58:27 +000010685
Owen Taylor3473f882001-02-23 17:55:21 +000010686/**
10687 * xmlParseDTD:
10688 * @ExternalID: a NAME* containing the External ID of the DTD
10689 * @SystemID: a NAME* containing the URL to the DTD
10690 *
10691 * Load and parse an external subset.
10692 *
10693 * Returns the resulting xmlDtdPtr or NULL in case of error.
10694 */
10695
10696xmlDtdPtr
10697xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10698 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10699}
Daniel Veillard4432df22003-09-28 18:58:27 +000010700#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010701
10702/************************************************************************
10703 * *
10704 * Front ends when parsing an Entity *
10705 * *
10706 ************************************************************************/
10707
10708/**
Owen Taylor3473f882001-02-23 17:55:21 +000010709 * xmlParseCtxtExternalEntity:
10710 * @ctx: the existing parsing context
10711 * @URL: the URL for the entity to load
10712 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010713 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010714 *
10715 * Parse an external general entity within an existing parsing context
10716 * An external general parsed entity is well-formed if it matches the
10717 * production labeled extParsedEnt.
10718 *
10719 * [78] extParsedEnt ::= TextDecl? content
10720 *
10721 * Returns 0 if the entity is well formed, -1 in case of args problem and
10722 * the parser error code otherwise
10723 */
10724
10725int
10726xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010727 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010728 xmlParserCtxtPtr ctxt;
10729 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010730 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010731 xmlSAXHandlerPtr oldsax = NULL;
10732 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010733 xmlChar start[4];
10734 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010735
Daniel Veillardce682bc2004-11-05 17:22:25 +000010736 if (ctx == NULL) return(-1);
10737
Owen Taylor3473f882001-02-23 17:55:21 +000010738 if (ctx->depth > 40) {
10739 return(XML_ERR_ENTITY_LOOP);
10740 }
10741
Daniel Veillardcda96922001-08-21 10:56:31 +000010742 if (lst != NULL)
10743 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010744 if ((URL == NULL) && (ID == NULL))
10745 return(-1);
10746 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10747 return(-1);
10748
10749
10750 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10751 if (ctxt == NULL) return(-1);
10752 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010753 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010754 oldsax = ctxt->sax;
10755 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010756 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010757 newDoc = xmlNewDoc(BAD_CAST "1.0");
10758 if (newDoc == NULL) {
10759 xmlFreeParserCtxt(ctxt);
10760 return(-1);
10761 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010762 if (ctx->myDoc->dict) {
10763 newDoc->dict = ctx->myDoc->dict;
10764 xmlDictReference(newDoc->dict);
10765 }
Owen Taylor3473f882001-02-23 17:55:21 +000010766 if (ctx->myDoc != NULL) {
10767 newDoc->intSubset = ctx->myDoc->intSubset;
10768 newDoc->extSubset = ctx->myDoc->extSubset;
10769 }
10770 if (ctx->myDoc->URL != NULL) {
10771 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10772 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010773 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10774 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010775 ctxt->sax = oldsax;
10776 xmlFreeParserCtxt(ctxt);
10777 newDoc->intSubset = NULL;
10778 newDoc->extSubset = NULL;
10779 xmlFreeDoc(newDoc);
10780 return(-1);
10781 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010782 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010783 nodePush(ctxt, newDoc->children);
10784 if (ctx->myDoc == NULL) {
10785 ctxt->myDoc = newDoc;
10786 } else {
10787 ctxt->myDoc = ctx->myDoc;
10788 newDoc->children->doc = ctx->myDoc;
10789 }
10790
Daniel Veillard87a764e2001-06-20 17:41:10 +000010791 /*
10792 * Get the 4 first bytes and decode the charset
10793 * if enc != XML_CHAR_ENCODING_NONE
10794 * plug some encoding conversion routines.
10795 */
10796 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010797 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10798 start[0] = RAW;
10799 start[1] = NXT(1);
10800 start[2] = NXT(2);
10801 start[3] = NXT(3);
10802 enc = xmlDetectCharEncoding(start, 4);
10803 if (enc != XML_CHAR_ENCODING_NONE) {
10804 xmlSwitchEncoding(ctxt, enc);
10805 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010806 }
10807
Owen Taylor3473f882001-02-23 17:55:21 +000010808 /*
10809 * Parse a possible text declaration first
10810 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010811 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010812 xmlParseTextDecl(ctxt);
10813 }
10814
10815 /*
10816 * Doing validity checking on chunk doesn't make sense
10817 */
10818 ctxt->instate = XML_PARSER_CONTENT;
10819 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010820 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010821 ctxt->loadsubset = ctx->loadsubset;
10822 ctxt->depth = ctx->depth + 1;
10823 ctxt->replaceEntities = ctx->replaceEntities;
10824 if (ctxt->validate) {
10825 ctxt->vctxt.error = ctx->vctxt.error;
10826 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010827 } else {
10828 ctxt->vctxt.error = NULL;
10829 ctxt->vctxt.warning = NULL;
10830 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010831 ctxt->vctxt.nodeTab = NULL;
10832 ctxt->vctxt.nodeNr = 0;
10833 ctxt->vctxt.nodeMax = 0;
10834 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010835 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10836 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010837 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10838 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10839 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010840 ctxt->dictNames = ctx->dictNames;
10841 ctxt->attsDefault = ctx->attsDefault;
10842 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010843 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010844
10845 xmlParseContent(ctxt);
10846
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010847 ctx->validate = ctxt->validate;
10848 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010849 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010850 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010851 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010852 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010853 }
10854 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010855 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010856 }
10857
10858 if (!ctxt->wellFormed) {
10859 if (ctxt->errNo == 0)
10860 ret = 1;
10861 else
10862 ret = ctxt->errNo;
10863 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010864 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010865 xmlNodePtr cur;
10866
10867 /*
10868 * Return the newly created nodeset after unlinking it from
10869 * they pseudo parent.
10870 */
10871 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010872 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010873 while (cur != NULL) {
10874 cur->parent = NULL;
10875 cur = cur->next;
10876 }
10877 newDoc->children->children = NULL;
10878 }
10879 ret = 0;
10880 }
10881 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010882 ctxt->dict = NULL;
10883 ctxt->attsDefault = NULL;
10884 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010885 xmlFreeParserCtxt(ctxt);
10886 newDoc->intSubset = NULL;
10887 newDoc->extSubset = NULL;
10888 xmlFreeDoc(newDoc);
10889
10890 return(ret);
10891}
10892
10893/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010894 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010895 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010896 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010897 * @sax: the SAX handler bloc (possibly NULL)
10898 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10899 * @depth: Used for loop detection, use 0
10900 * @URL: the URL for the entity to load
10901 * @ID: the System ID for the entity to load
10902 * @list: the return value for the set of parsed nodes
10903 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010904 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010905 *
10906 * Returns 0 if the entity is well formed, -1 in case of args problem and
10907 * the parser error code otherwise
10908 */
10909
Daniel Veillard7d515752003-09-26 19:12:37 +000010910static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010911xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10912 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010913 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010914 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010915 xmlParserCtxtPtr ctxt;
10916 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010917 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010918 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010919 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010920 xmlChar start[4];
10921 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010922
10923 if (depth > 40) {
10924 return(XML_ERR_ENTITY_LOOP);
10925 }
10926
10927
10928
10929 if (list != NULL)
10930 *list = NULL;
10931 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010932 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010933 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010934 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010935
10936
10937 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010938 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010939 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010940 if (oldctxt != NULL) {
10941 ctxt->_private = oldctxt->_private;
10942 ctxt->loadsubset = oldctxt->loadsubset;
10943 ctxt->validate = oldctxt->validate;
10944 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010945 ctxt->record_info = oldctxt->record_info;
10946 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10947 ctxt->node_seq.length = oldctxt->node_seq.length;
10948 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010949 } else {
10950 /*
10951 * Doing validity checking on chunk without context
10952 * doesn't make sense
10953 */
10954 ctxt->_private = NULL;
10955 ctxt->validate = 0;
10956 ctxt->external = 2;
10957 ctxt->loadsubset = 0;
10958 }
Owen Taylor3473f882001-02-23 17:55:21 +000010959 if (sax != NULL) {
10960 oldsax = ctxt->sax;
10961 ctxt->sax = sax;
10962 if (user_data != NULL)
10963 ctxt->userData = user_data;
10964 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010965 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010966 newDoc = xmlNewDoc(BAD_CAST "1.0");
10967 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010968 ctxt->node_seq.maximum = 0;
10969 ctxt->node_seq.length = 0;
10970 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010971 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010972 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010973 }
10974 if (doc != NULL) {
10975 newDoc->intSubset = doc->intSubset;
10976 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000010977 newDoc->dict = doc->dict;
10978 } else if (oldctxt != NULL) {
10979 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000010980 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010981 xmlDictReference(newDoc->dict);
10982
Owen Taylor3473f882001-02-23 17:55:21 +000010983 if (doc->URL != NULL) {
10984 newDoc->URL = xmlStrdup(doc->URL);
10985 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010986 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10987 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010988 if (sax != NULL)
10989 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010990 ctxt->node_seq.maximum = 0;
10991 ctxt->node_seq.length = 0;
10992 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010993 xmlFreeParserCtxt(ctxt);
10994 newDoc->intSubset = NULL;
10995 newDoc->extSubset = NULL;
10996 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010997 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010998 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010999 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011000 nodePush(ctxt, newDoc->children);
11001 if (doc == NULL) {
11002 ctxt->myDoc = newDoc;
11003 } else {
11004 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011005 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011006 }
11007
Daniel Veillard87a764e2001-06-20 17:41:10 +000011008 /*
11009 * Get the 4 first bytes and decode the charset
11010 * if enc != XML_CHAR_ENCODING_NONE
11011 * plug some encoding conversion routines.
11012 */
11013 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011014 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11015 start[0] = RAW;
11016 start[1] = NXT(1);
11017 start[2] = NXT(2);
11018 start[3] = NXT(3);
11019 enc = xmlDetectCharEncoding(start, 4);
11020 if (enc != XML_CHAR_ENCODING_NONE) {
11021 xmlSwitchEncoding(ctxt, enc);
11022 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011023 }
11024
Owen Taylor3473f882001-02-23 17:55:21 +000011025 /*
11026 * Parse a possible text declaration first
11027 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011028 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011029 xmlParseTextDecl(ctxt);
11030 }
11031
Owen Taylor3473f882001-02-23 17:55:21 +000011032 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011033 ctxt->depth = depth;
11034
11035 xmlParseContent(ctxt);
11036
Daniel Veillard561b7f82002-03-20 21:55:57 +000011037 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011038 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011039 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011040 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011041 }
11042 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011043 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011044 }
11045
11046 if (!ctxt->wellFormed) {
11047 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011048 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011049 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011050 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011051 } else {
11052 if (list != NULL) {
11053 xmlNodePtr cur;
11054
11055 /*
11056 * Return the newly created nodeset after unlinking it from
11057 * they pseudo parent.
11058 */
11059 cur = newDoc->children->children;
11060 *list = cur;
11061 while (cur != NULL) {
11062 cur->parent = NULL;
11063 cur = cur->next;
11064 }
11065 newDoc->children->children = NULL;
11066 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011067 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011068 }
11069 if (sax != NULL)
11070 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011071 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11072 oldctxt->node_seq.length = ctxt->node_seq.length;
11073 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011074 ctxt->node_seq.maximum = 0;
11075 ctxt->node_seq.length = 0;
11076 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011077 xmlFreeParserCtxt(ctxt);
11078 newDoc->intSubset = NULL;
11079 newDoc->extSubset = NULL;
11080 xmlFreeDoc(newDoc);
11081
11082 return(ret);
11083}
11084
Daniel Veillard81273902003-09-30 00:43:48 +000011085#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011086/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011087 * xmlParseExternalEntity:
11088 * @doc: the document the chunk pertains to
11089 * @sax: the SAX handler bloc (possibly NULL)
11090 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11091 * @depth: Used for loop detection, use 0
11092 * @URL: the URL for the entity to load
11093 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011094 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011095 *
11096 * Parse an external general entity
11097 * An external general parsed entity is well-formed if it matches the
11098 * production labeled extParsedEnt.
11099 *
11100 * [78] extParsedEnt ::= TextDecl? content
11101 *
11102 * Returns 0 if the entity is well formed, -1 in case of args problem and
11103 * the parser error code otherwise
11104 */
11105
11106int
11107xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011108 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011109 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011110 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011111}
11112
11113/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011114 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011115 * @doc: the document the chunk pertains to
11116 * @sax: the SAX handler bloc (possibly NULL)
11117 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11118 * @depth: Used for loop detection, use 0
11119 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011120 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011121 *
11122 * Parse a well-balanced chunk of an XML document
11123 * called by the parser
11124 * The allowed sequence for the Well Balanced Chunk is the one defined by
11125 * the content production in the XML grammar:
11126 *
11127 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11128 *
11129 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11130 * the parser error code otherwise
11131 */
11132
11133int
11134xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011135 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011136 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11137 depth, string, lst, 0 );
11138}
Daniel Veillard81273902003-09-30 00:43:48 +000011139#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011140
11141/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011142 * xmlParseBalancedChunkMemoryInternal:
11143 * @oldctxt: the existing parsing context
11144 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11145 * @user_data: the user data field for the parser context
11146 * @lst: the return value for the set of parsed nodes
11147 *
11148 *
11149 * Parse a well-balanced chunk of an XML document
11150 * called by the parser
11151 * The allowed sequence for the Well Balanced Chunk is the one defined by
11152 * the content production in the XML grammar:
11153 *
11154 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11155 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011156 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11157 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011158 *
11159 * In case recover is set to 1, the nodelist will not be empty even if
11160 * the parsed chunk is not well balanced.
11161 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011162static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011163xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11164 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11165 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011166 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011167 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011168 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011169 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011170 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011171 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011172 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011173
11174 if (oldctxt->depth > 40) {
11175 return(XML_ERR_ENTITY_LOOP);
11176 }
11177
11178
11179 if (lst != NULL)
11180 *lst = NULL;
11181 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011182 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011183
11184 size = xmlStrlen(string);
11185
11186 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011187 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011188 if (user_data != NULL)
11189 ctxt->userData = user_data;
11190 else
11191 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011192 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11193 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011194 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11195 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11196 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011197
11198 oldsax = ctxt->sax;
11199 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011200 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011201 ctxt->replaceEntities = oldctxt->replaceEntities;
11202 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011203
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011204 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011205 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011206 newDoc = xmlNewDoc(BAD_CAST "1.0");
11207 if (newDoc == NULL) {
11208 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011209 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011210 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011211 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011212 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011213 newDoc->dict = ctxt->dict;
11214 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011215 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011216 } else {
11217 ctxt->myDoc = oldctxt->myDoc;
11218 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011219 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011220 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011221 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11222 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011223 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011224 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011225 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011226 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011227 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011228 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011229 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011230 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011231 ctxt->myDoc->children = NULL;
11232 ctxt->myDoc->last = NULL;
11233 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011234 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011235 ctxt->instate = XML_PARSER_CONTENT;
11236 ctxt->depth = oldctxt->depth + 1;
11237
Daniel Veillard328f48c2002-11-15 15:24:34 +000011238 ctxt->validate = 0;
11239 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011240 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11241 /*
11242 * ID/IDREF registration will be done in xmlValidateElement below
11243 */
11244 ctxt->loadsubset |= XML_SKIP_IDS;
11245 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011246 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011247 ctxt->attsDefault = oldctxt->attsDefault;
11248 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011249
Daniel Veillard68e9e742002-11-16 15:35:11 +000011250 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011251 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011252 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011253 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011254 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011255 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011256 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011257 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011258 }
11259
11260 if (!ctxt->wellFormed) {
11261 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011262 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011263 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011264 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011265 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011266 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011267 }
11268
William M. Brack7b9154b2003-09-27 19:23:50 +000011269 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011270 xmlNodePtr cur;
11271
11272 /*
11273 * Return the newly created nodeset after unlinking it from
11274 * they pseudo parent.
11275 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011276 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011277 *lst = cur;
11278 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011279#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011280 if (oldctxt->validate && oldctxt->wellFormed &&
11281 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11282 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11283 oldctxt->myDoc, cur);
11284 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011285#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011286 cur->parent = NULL;
11287 cur = cur->next;
11288 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011289 ctxt->myDoc->children->children = NULL;
11290 }
11291 if (ctxt->myDoc != NULL) {
11292 xmlFreeNode(ctxt->myDoc->children);
11293 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011294 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011295 }
11296
11297 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011298 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011299 ctxt->attsDefault = NULL;
11300 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011301 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011302 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011303 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011304 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011305
11306 return(ret);
11307}
11308
Daniel Veillard29b17482004-08-16 00:39:03 +000011309/**
11310 * xmlParseInNodeContext:
11311 * @node: the context node
11312 * @data: the input string
11313 * @datalen: the input string length in bytes
11314 * @options: a combination of xmlParserOption
11315 * @lst: the return value for the set of parsed nodes
11316 *
11317 * Parse a well-balanced chunk of an XML document
11318 * within the context (DTD, namespaces, etc ...) of the given node.
11319 *
11320 * The allowed sequence for the data is a Well Balanced Chunk defined by
11321 * the content production in the XML grammar:
11322 *
11323 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11324 *
11325 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11326 * error code otherwise
11327 */
11328xmlParserErrors
11329xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11330 int options, xmlNodePtr *lst) {
11331#ifdef SAX2
11332 xmlParserCtxtPtr ctxt;
11333 xmlDocPtr doc = NULL;
11334 xmlNodePtr fake, cur;
11335 int nsnr = 0;
11336
11337 xmlParserErrors ret = XML_ERR_OK;
11338
11339 /*
11340 * check all input parameters, grab the document
11341 */
11342 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11343 return(XML_ERR_INTERNAL_ERROR);
11344 switch (node->type) {
11345 case XML_ELEMENT_NODE:
11346 case XML_ATTRIBUTE_NODE:
11347 case XML_TEXT_NODE:
11348 case XML_CDATA_SECTION_NODE:
11349 case XML_ENTITY_REF_NODE:
11350 case XML_PI_NODE:
11351 case XML_COMMENT_NODE:
11352 case XML_DOCUMENT_NODE:
11353 case XML_HTML_DOCUMENT_NODE:
11354 break;
11355 default:
11356 return(XML_ERR_INTERNAL_ERROR);
11357
11358 }
11359 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11360 (node->type != XML_DOCUMENT_NODE) &&
11361 (node->type != XML_HTML_DOCUMENT_NODE))
11362 node = node->parent;
11363 if (node == NULL)
11364 return(XML_ERR_INTERNAL_ERROR);
11365 if (node->type == XML_ELEMENT_NODE)
11366 doc = node->doc;
11367 else
11368 doc = (xmlDocPtr) node;
11369 if (doc == NULL)
11370 return(XML_ERR_INTERNAL_ERROR);
11371
11372 /*
11373 * allocate a context and set-up everything not related to the
11374 * node position in the tree
11375 */
11376 if (doc->type == XML_DOCUMENT_NODE)
11377 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11378#ifdef LIBXML_HTML_ENABLED
11379 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11380 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11381#endif
11382 else
11383 return(XML_ERR_INTERNAL_ERROR);
11384
11385 if (ctxt == NULL)
11386 return(XML_ERR_NO_MEMORY);
11387 fake = xmlNewComment(NULL);
11388 if (fake == NULL) {
11389 xmlFreeParserCtxt(ctxt);
11390 return(XML_ERR_NO_MEMORY);
11391 }
11392 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011393
11394 /*
11395 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11396 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11397 * we must wait until the last moment to free the original one.
11398 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011399 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011400 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011401 xmlDictFree(ctxt->dict);
11402 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011403 } else
11404 options |= XML_PARSE_NODICT;
11405
11406 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011407 xmlDetectSAX2(ctxt);
11408 ctxt->myDoc = doc;
11409
11410 if (node->type == XML_ELEMENT_NODE) {
11411 nodePush(ctxt, node);
11412 /*
11413 * initialize the SAX2 namespaces stack
11414 */
11415 cur = node;
11416 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11417 xmlNsPtr ns = cur->nsDef;
11418 const xmlChar *iprefix, *ihref;
11419
11420 while (ns != NULL) {
11421 if (ctxt->dict) {
11422 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11423 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11424 } else {
11425 iprefix = ns->prefix;
11426 ihref = ns->href;
11427 }
11428
11429 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11430 nsPush(ctxt, iprefix, ihref);
11431 nsnr++;
11432 }
11433 ns = ns->next;
11434 }
11435 cur = cur->parent;
11436 }
11437 ctxt->instate = XML_PARSER_CONTENT;
11438 }
11439
11440 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11441 /*
11442 * ID/IDREF registration will be done in xmlValidateElement below
11443 */
11444 ctxt->loadsubset |= XML_SKIP_IDS;
11445 }
11446
11447 xmlParseContent(ctxt);
11448 nsPop(ctxt, nsnr);
11449 if ((RAW == '<') && (NXT(1) == '/')) {
11450 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11451 } else if (RAW != 0) {
11452 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11453 }
11454 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11455 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11456 ctxt->wellFormed = 0;
11457 }
11458
11459 if (!ctxt->wellFormed) {
11460 if (ctxt->errNo == 0)
11461 ret = XML_ERR_INTERNAL_ERROR;
11462 else
11463 ret = (xmlParserErrors)ctxt->errNo;
11464 } else {
11465 ret = XML_ERR_OK;
11466 }
11467
11468 /*
11469 * Return the newly created nodeset after unlinking it from
11470 * the pseudo sibling.
11471 */
11472
11473 cur = fake->next;
11474 fake->next = NULL;
11475 node->last = fake;
11476
11477 if (cur != NULL) {
11478 cur->prev = NULL;
11479 }
11480
11481 *lst = cur;
11482
11483 while (cur != NULL) {
11484 cur->parent = NULL;
11485 cur = cur->next;
11486 }
11487
11488 xmlUnlinkNode(fake);
11489 xmlFreeNode(fake);
11490
11491
11492 if (ret != XML_ERR_OK) {
11493 xmlFreeNodeList(*lst);
11494 *lst = NULL;
11495 }
William M. Brackc3f81342004-10-03 01:22:44 +000011496
William M. Brackb7b54de2004-10-06 16:38:01 +000011497 if (doc->dict != NULL)
11498 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011499 xmlFreeParserCtxt(ctxt);
11500
11501 return(ret);
11502#else /* !SAX2 */
11503 return(XML_ERR_INTERNAL_ERROR);
11504#endif
11505}
11506
Daniel Veillard81273902003-09-30 00:43:48 +000011507#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011508/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011509 * xmlParseBalancedChunkMemoryRecover:
11510 * @doc: the document the chunk pertains to
11511 * @sax: the SAX handler bloc (possibly NULL)
11512 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11513 * @depth: Used for loop detection, use 0
11514 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11515 * @lst: the return value for the set of parsed nodes
11516 * @recover: return nodes even if the data is broken (use 0)
11517 *
11518 *
11519 * Parse a well-balanced chunk of an XML document
11520 * called by the parser
11521 * The allowed sequence for the Well Balanced Chunk is the one defined by
11522 * the content production in the XML grammar:
11523 *
11524 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11525 *
11526 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11527 * the parser error code otherwise
11528 *
11529 * In case recover is set to 1, the nodelist will not be empty even if
11530 * the parsed chunk is not well balanced.
11531 */
11532int
11533xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11534 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11535 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011536 xmlParserCtxtPtr ctxt;
11537 xmlDocPtr newDoc;
11538 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011539 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011540 int size;
11541 int ret = 0;
11542
11543 if (depth > 40) {
11544 return(XML_ERR_ENTITY_LOOP);
11545 }
11546
11547
Daniel Veillardcda96922001-08-21 10:56:31 +000011548 if (lst != NULL)
11549 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011550 if (string == NULL)
11551 return(-1);
11552
11553 size = xmlStrlen(string);
11554
11555 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11556 if (ctxt == NULL) return(-1);
11557 ctxt->userData = ctxt;
11558 if (sax != NULL) {
11559 oldsax = ctxt->sax;
11560 ctxt->sax = sax;
11561 if (user_data != NULL)
11562 ctxt->userData = user_data;
11563 }
11564 newDoc = xmlNewDoc(BAD_CAST "1.0");
11565 if (newDoc == NULL) {
11566 xmlFreeParserCtxt(ctxt);
11567 return(-1);
11568 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011569 if ((doc != NULL) && (doc->dict != NULL)) {
11570 xmlDictFree(ctxt->dict);
11571 ctxt->dict = doc->dict;
11572 xmlDictReference(ctxt->dict);
11573 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11574 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11575 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11576 ctxt->dictNames = 1;
11577 } else {
11578 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11579 }
Owen Taylor3473f882001-02-23 17:55:21 +000011580 if (doc != NULL) {
11581 newDoc->intSubset = doc->intSubset;
11582 newDoc->extSubset = doc->extSubset;
11583 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011584 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11585 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011586 if (sax != NULL)
11587 ctxt->sax = oldsax;
11588 xmlFreeParserCtxt(ctxt);
11589 newDoc->intSubset = NULL;
11590 newDoc->extSubset = NULL;
11591 xmlFreeDoc(newDoc);
11592 return(-1);
11593 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011594 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11595 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011596 if (doc == NULL) {
11597 ctxt->myDoc = newDoc;
11598 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011599 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011600 newDoc->children->doc = doc;
11601 }
11602 ctxt->instate = XML_PARSER_CONTENT;
11603 ctxt->depth = depth;
11604
11605 /*
11606 * Doing validity checking on chunk doesn't make sense
11607 */
11608 ctxt->validate = 0;
11609 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011610 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011611
Daniel Veillardb39bc392002-10-26 19:29:51 +000011612 if ( doc != NULL ){
11613 content = doc->children;
11614 doc->children = NULL;
11615 xmlParseContent(ctxt);
11616 doc->children = content;
11617 }
11618 else {
11619 xmlParseContent(ctxt);
11620 }
Owen Taylor3473f882001-02-23 17:55:21 +000011621 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011622 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011623 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011624 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011625 }
11626 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011627 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011628 }
11629
11630 if (!ctxt->wellFormed) {
11631 if (ctxt->errNo == 0)
11632 ret = 1;
11633 else
11634 ret = ctxt->errNo;
11635 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011636 ret = 0;
11637 }
11638
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011639 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11640 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011641
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011642 /*
11643 * Return the newly created nodeset after unlinking it from
11644 * they pseudo parent.
11645 */
11646 cur = newDoc->children->children;
11647 *lst = cur;
11648 while (cur != NULL) {
11649 xmlSetTreeDoc(cur, doc);
11650 cur->parent = NULL;
11651 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011652 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011653 newDoc->children->children = NULL;
11654 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011655
Owen Taylor3473f882001-02-23 17:55:21 +000011656 if (sax != NULL)
11657 ctxt->sax = oldsax;
11658 xmlFreeParserCtxt(ctxt);
11659 newDoc->intSubset = NULL;
11660 newDoc->extSubset = NULL;
11661 xmlFreeDoc(newDoc);
11662
11663 return(ret);
11664}
11665
11666/**
11667 * xmlSAXParseEntity:
11668 * @sax: the SAX handler block
11669 * @filename: the filename
11670 *
11671 * parse an XML external entity out of context and build a tree.
11672 * It use the given SAX function block to handle the parsing callback.
11673 * If sax is NULL, fallback to the default DOM tree building routines.
11674 *
11675 * [78] extParsedEnt ::= TextDecl? content
11676 *
11677 * This correspond to a "Well Balanced" chunk
11678 *
11679 * Returns the resulting document tree
11680 */
11681
11682xmlDocPtr
11683xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11684 xmlDocPtr ret;
11685 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011686
11687 ctxt = xmlCreateFileParserCtxt(filename);
11688 if (ctxt == NULL) {
11689 return(NULL);
11690 }
11691 if (sax != NULL) {
11692 if (ctxt->sax != NULL)
11693 xmlFree(ctxt->sax);
11694 ctxt->sax = sax;
11695 ctxt->userData = NULL;
11696 }
11697
Owen Taylor3473f882001-02-23 17:55:21 +000011698 xmlParseExtParsedEnt(ctxt);
11699
11700 if (ctxt->wellFormed)
11701 ret = ctxt->myDoc;
11702 else {
11703 ret = NULL;
11704 xmlFreeDoc(ctxt->myDoc);
11705 ctxt->myDoc = NULL;
11706 }
11707 if (sax != NULL)
11708 ctxt->sax = NULL;
11709 xmlFreeParserCtxt(ctxt);
11710
11711 return(ret);
11712}
11713
11714/**
11715 * xmlParseEntity:
11716 * @filename: the filename
11717 *
11718 * parse an XML external entity out of context and build a tree.
11719 *
11720 * [78] extParsedEnt ::= TextDecl? content
11721 *
11722 * This correspond to a "Well Balanced" chunk
11723 *
11724 * Returns the resulting document tree
11725 */
11726
11727xmlDocPtr
11728xmlParseEntity(const char *filename) {
11729 return(xmlSAXParseEntity(NULL, filename));
11730}
Daniel Veillard81273902003-09-30 00:43:48 +000011731#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011732
11733/**
11734 * xmlCreateEntityParserCtxt:
11735 * @URL: the entity URL
11736 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011737 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011738 *
11739 * Create a parser context for an external entity
11740 * Automatic support for ZLIB/Compress compressed document is provided
11741 * by default if found at compile-time.
11742 *
11743 * Returns the new parser context or NULL
11744 */
11745xmlParserCtxtPtr
11746xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11747 const xmlChar *base) {
11748 xmlParserCtxtPtr ctxt;
11749 xmlParserInputPtr inputStream;
11750 char *directory = NULL;
11751 xmlChar *uri;
11752
11753 ctxt = xmlNewParserCtxt();
11754 if (ctxt == NULL) {
11755 return(NULL);
11756 }
11757
11758 uri = xmlBuildURI(URL, base);
11759
11760 if (uri == NULL) {
11761 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11762 if (inputStream == NULL) {
11763 xmlFreeParserCtxt(ctxt);
11764 return(NULL);
11765 }
11766
11767 inputPush(ctxt, inputStream);
11768
11769 if ((ctxt->directory == NULL) && (directory == NULL))
11770 directory = xmlParserGetDirectory((char *)URL);
11771 if ((ctxt->directory == NULL) && (directory != NULL))
11772 ctxt->directory = directory;
11773 } else {
11774 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11775 if (inputStream == NULL) {
11776 xmlFree(uri);
11777 xmlFreeParserCtxt(ctxt);
11778 return(NULL);
11779 }
11780
11781 inputPush(ctxt, inputStream);
11782
11783 if ((ctxt->directory == NULL) && (directory == NULL))
11784 directory = xmlParserGetDirectory((char *)uri);
11785 if ((ctxt->directory == NULL) && (directory != NULL))
11786 ctxt->directory = directory;
11787 xmlFree(uri);
11788 }
Owen Taylor3473f882001-02-23 17:55:21 +000011789 return(ctxt);
11790}
11791
11792/************************************************************************
11793 * *
11794 * Front ends when parsing from a file *
11795 * *
11796 ************************************************************************/
11797
11798/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011799 * xmlCreateURLParserCtxt:
11800 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011801 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011802 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011803 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011804 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011805 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011806 *
11807 * Returns the new parser context or NULL
11808 */
11809xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011810xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011811{
11812 xmlParserCtxtPtr ctxt;
11813 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011814 char *directory = NULL;
11815
Owen Taylor3473f882001-02-23 17:55:21 +000011816 ctxt = xmlNewParserCtxt();
11817 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011818 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011819 return(NULL);
11820 }
11821
Daniel Veillarddf292f72005-01-16 19:00:15 +000011822 if (options)
11823 xmlCtxtUseOptions(ctxt, options);
11824 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000011825
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011826 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011827 if (inputStream == NULL) {
11828 xmlFreeParserCtxt(ctxt);
11829 return(NULL);
11830 }
11831
Owen Taylor3473f882001-02-23 17:55:21 +000011832 inputPush(ctxt, inputStream);
11833 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011834 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011835 if ((ctxt->directory == NULL) && (directory != NULL))
11836 ctxt->directory = directory;
11837
11838 return(ctxt);
11839}
11840
Daniel Veillard61b93382003-11-03 14:28:31 +000011841/**
11842 * xmlCreateFileParserCtxt:
11843 * @filename: the filename
11844 *
11845 * Create a parser context for a file content.
11846 * Automatic support for ZLIB/Compress compressed document is provided
11847 * by default if found at compile-time.
11848 *
11849 * Returns the new parser context or NULL
11850 */
11851xmlParserCtxtPtr
11852xmlCreateFileParserCtxt(const char *filename)
11853{
11854 return(xmlCreateURLParserCtxt(filename, 0));
11855}
11856
Daniel Veillard81273902003-09-30 00:43:48 +000011857#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011858/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011859 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011860 * @sax: the SAX handler block
11861 * @filename: the filename
11862 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11863 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011864 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011865 *
11866 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11867 * compressed document is provided by default if found at compile-time.
11868 * It use the given SAX function block to handle the parsing callback.
11869 * If sax is NULL, fallback to the default DOM tree building routines.
11870 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011871 * User data (void *) is stored within the parser context in the
11872 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011873 *
Owen Taylor3473f882001-02-23 17:55:21 +000011874 * Returns the resulting document tree
11875 */
11876
11877xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011878xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11879 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011880 xmlDocPtr ret;
11881 xmlParserCtxtPtr ctxt;
11882 char *directory = NULL;
11883
Daniel Veillard635ef722001-10-29 11:48:19 +000011884 xmlInitParser();
11885
Owen Taylor3473f882001-02-23 17:55:21 +000011886 ctxt = xmlCreateFileParserCtxt(filename);
11887 if (ctxt == NULL) {
11888 return(NULL);
11889 }
11890 if (sax != NULL) {
11891 if (ctxt->sax != NULL)
11892 xmlFree(ctxt->sax);
11893 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011894 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011895 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011896 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011897 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011898 }
Owen Taylor3473f882001-02-23 17:55:21 +000011899
11900 if ((ctxt->directory == NULL) && (directory == NULL))
11901 directory = xmlParserGetDirectory(filename);
11902 if ((ctxt->directory == NULL) && (directory != NULL))
11903 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11904
Daniel Veillarddad3f682002-11-17 16:47:27 +000011905 ctxt->recovery = recovery;
11906
Owen Taylor3473f882001-02-23 17:55:21 +000011907 xmlParseDocument(ctxt);
11908
William M. Brackc07329e2003-09-08 01:57:30 +000011909 if ((ctxt->wellFormed) || recovery) {
11910 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011911 if (ret != NULL) {
11912 if (ctxt->input->buf->compressed > 0)
11913 ret->compression = 9;
11914 else
11915 ret->compression = ctxt->input->buf->compressed;
11916 }
William M. Brackc07329e2003-09-08 01:57:30 +000011917 }
Owen Taylor3473f882001-02-23 17:55:21 +000011918 else {
11919 ret = NULL;
11920 xmlFreeDoc(ctxt->myDoc);
11921 ctxt->myDoc = NULL;
11922 }
11923 if (sax != NULL)
11924 ctxt->sax = NULL;
11925 xmlFreeParserCtxt(ctxt);
11926
11927 return(ret);
11928}
11929
11930/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011931 * xmlSAXParseFile:
11932 * @sax: the SAX handler block
11933 * @filename: the filename
11934 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11935 * documents
11936 *
11937 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11938 * compressed document is provided by default if found at compile-time.
11939 * It use the given SAX function block to handle the parsing callback.
11940 * If sax is NULL, fallback to the default DOM tree building routines.
11941 *
11942 * Returns the resulting document tree
11943 */
11944
11945xmlDocPtr
11946xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11947 int recovery) {
11948 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11949}
11950
11951/**
Owen Taylor3473f882001-02-23 17:55:21 +000011952 * xmlRecoverDoc:
11953 * @cur: a pointer to an array of xmlChar
11954 *
11955 * parse an XML in-memory document and build a tree.
11956 * In the case the document is not Well Formed, a tree is built anyway
11957 *
11958 * Returns the resulting document tree
11959 */
11960
11961xmlDocPtr
11962xmlRecoverDoc(xmlChar *cur) {
11963 return(xmlSAXParseDoc(NULL, cur, 1));
11964}
11965
11966/**
11967 * xmlParseFile:
11968 * @filename: the filename
11969 *
11970 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11971 * compressed document is provided by default if found at compile-time.
11972 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011973 * Returns the resulting document tree if the file was wellformed,
11974 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011975 */
11976
11977xmlDocPtr
11978xmlParseFile(const char *filename) {
11979 return(xmlSAXParseFile(NULL, filename, 0));
11980}
11981
11982/**
11983 * xmlRecoverFile:
11984 * @filename: the filename
11985 *
11986 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11987 * compressed document is provided by default if found at compile-time.
11988 * In the case the document is not Well Formed, a tree is built anyway
11989 *
11990 * Returns the resulting document tree
11991 */
11992
11993xmlDocPtr
11994xmlRecoverFile(const char *filename) {
11995 return(xmlSAXParseFile(NULL, filename, 1));
11996}
11997
11998
11999/**
12000 * xmlSetupParserForBuffer:
12001 * @ctxt: an XML parser context
12002 * @buffer: a xmlChar * buffer
12003 * @filename: a file name
12004 *
12005 * Setup the parser context to parse a new buffer; Clears any prior
12006 * contents from the parser context. The buffer parameter must not be
12007 * NULL, but the filename parameter can be
12008 */
12009void
12010xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12011 const char* filename)
12012{
12013 xmlParserInputPtr input;
12014
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012015 if ((ctxt == NULL) || (buffer == NULL))
12016 return;
12017
Owen Taylor3473f882001-02-23 17:55:21 +000012018 input = xmlNewInputStream(ctxt);
12019 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012020 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012021 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012022 return;
12023 }
12024
12025 xmlClearParserCtxt(ctxt);
12026 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012027 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012028 input->base = buffer;
12029 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012030 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012031 inputPush(ctxt, input);
12032}
12033
12034/**
12035 * xmlSAXUserParseFile:
12036 * @sax: a SAX handler
12037 * @user_data: The user data returned on SAX callbacks
12038 * @filename: a file name
12039 *
12040 * parse an XML file and call the given SAX handler routines.
12041 * Automatic support for ZLIB/Compress compressed document is provided
12042 *
12043 * Returns 0 in case of success or a error number otherwise
12044 */
12045int
12046xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12047 const char *filename) {
12048 int ret = 0;
12049 xmlParserCtxtPtr ctxt;
12050
12051 ctxt = xmlCreateFileParserCtxt(filename);
12052 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012053#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012054 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012055#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012056 xmlFree(ctxt->sax);
12057 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012058 xmlDetectSAX2(ctxt);
12059
Owen Taylor3473f882001-02-23 17:55:21 +000012060 if (user_data != NULL)
12061 ctxt->userData = user_data;
12062
12063 xmlParseDocument(ctxt);
12064
12065 if (ctxt->wellFormed)
12066 ret = 0;
12067 else {
12068 if (ctxt->errNo != 0)
12069 ret = ctxt->errNo;
12070 else
12071 ret = -1;
12072 }
12073 if (sax != NULL)
12074 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012075 if (ctxt->myDoc != NULL) {
12076 xmlFreeDoc(ctxt->myDoc);
12077 ctxt->myDoc = NULL;
12078 }
Owen Taylor3473f882001-02-23 17:55:21 +000012079 xmlFreeParserCtxt(ctxt);
12080
12081 return ret;
12082}
Daniel Veillard81273902003-09-30 00:43:48 +000012083#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012084
12085/************************************************************************
12086 * *
12087 * Front ends when parsing from memory *
12088 * *
12089 ************************************************************************/
12090
12091/**
12092 * xmlCreateMemoryParserCtxt:
12093 * @buffer: a pointer to a char array
12094 * @size: the size of the array
12095 *
12096 * Create a parser context for an XML in-memory document.
12097 *
12098 * Returns the new parser context or NULL
12099 */
12100xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012101xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012102 xmlParserCtxtPtr ctxt;
12103 xmlParserInputPtr input;
12104 xmlParserInputBufferPtr buf;
12105
12106 if (buffer == NULL)
12107 return(NULL);
12108 if (size <= 0)
12109 return(NULL);
12110
12111 ctxt = xmlNewParserCtxt();
12112 if (ctxt == NULL)
12113 return(NULL);
12114
Daniel Veillard53350552003-09-18 13:35:51 +000012115 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012116 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012117 if (buf == NULL) {
12118 xmlFreeParserCtxt(ctxt);
12119 return(NULL);
12120 }
Owen Taylor3473f882001-02-23 17:55:21 +000012121
12122 input = xmlNewInputStream(ctxt);
12123 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012124 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012125 xmlFreeParserCtxt(ctxt);
12126 return(NULL);
12127 }
12128
12129 input->filename = NULL;
12130 input->buf = buf;
12131 input->base = input->buf->buffer->content;
12132 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012133 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012134
12135 inputPush(ctxt, input);
12136 return(ctxt);
12137}
12138
Daniel Veillard81273902003-09-30 00:43:48 +000012139#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012140/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012141 * xmlSAXParseMemoryWithData:
12142 * @sax: the SAX handler block
12143 * @buffer: an pointer to a char array
12144 * @size: the size of the array
12145 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12146 * documents
12147 * @data: the userdata
12148 *
12149 * parse an XML in-memory block and use the given SAX function block
12150 * to handle the parsing callback. If sax is NULL, fallback to the default
12151 * DOM tree building routines.
12152 *
12153 * User data (void *) is stored within the parser context in the
12154 * context's _private member, so it is available nearly everywhere in libxml
12155 *
12156 * Returns the resulting document tree
12157 */
12158
12159xmlDocPtr
12160xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12161 int size, int recovery, void *data) {
12162 xmlDocPtr ret;
12163 xmlParserCtxtPtr ctxt;
12164
12165 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12166 if (ctxt == NULL) return(NULL);
12167 if (sax != NULL) {
12168 if (ctxt->sax != NULL)
12169 xmlFree(ctxt->sax);
12170 ctxt->sax = sax;
12171 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012172 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012173 if (data!=NULL) {
12174 ctxt->_private=data;
12175 }
12176
Daniel Veillardadba5f12003-04-04 16:09:01 +000012177 ctxt->recovery = recovery;
12178
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012179 xmlParseDocument(ctxt);
12180
12181 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12182 else {
12183 ret = NULL;
12184 xmlFreeDoc(ctxt->myDoc);
12185 ctxt->myDoc = NULL;
12186 }
12187 if (sax != NULL)
12188 ctxt->sax = NULL;
12189 xmlFreeParserCtxt(ctxt);
12190
12191 return(ret);
12192}
12193
12194/**
Owen Taylor3473f882001-02-23 17:55:21 +000012195 * xmlSAXParseMemory:
12196 * @sax: the SAX handler block
12197 * @buffer: an pointer to a char array
12198 * @size: the size of the array
12199 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12200 * documents
12201 *
12202 * parse an XML in-memory block and use the given SAX function block
12203 * to handle the parsing callback. If sax is NULL, fallback to the default
12204 * DOM tree building routines.
12205 *
12206 * Returns the resulting document tree
12207 */
12208xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012209xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12210 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012211 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012212}
12213
12214/**
12215 * xmlParseMemory:
12216 * @buffer: an pointer to a char array
12217 * @size: the size of the array
12218 *
12219 * parse an XML in-memory block and build a tree.
12220 *
12221 * Returns the resulting document tree
12222 */
12223
Daniel Veillard50822cb2001-07-26 20:05:51 +000012224xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012225 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12226}
12227
12228/**
12229 * xmlRecoverMemory:
12230 * @buffer: an pointer to a char array
12231 * @size: the size of the array
12232 *
12233 * parse an XML in-memory block and build a tree.
12234 * In the case the document is not Well Formed, a tree is built anyway
12235 *
12236 * Returns the resulting document tree
12237 */
12238
Daniel Veillard50822cb2001-07-26 20:05:51 +000012239xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012240 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12241}
12242
12243/**
12244 * xmlSAXUserParseMemory:
12245 * @sax: a SAX handler
12246 * @user_data: The user data returned on SAX callbacks
12247 * @buffer: an in-memory XML document input
12248 * @size: the length of the XML document in bytes
12249 *
12250 * A better SAX parsing routine.
12251 * parse an XML in-memory buffer and call the given SAX handler routines.
12252 *
12253 * Returns 0 in case of success or a error number otherwise
12254 */
12255int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012256 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012257 int ret = 0;
12258 xmlParserCtxtPtr ctxt;
12259 xmlSAXHandlerPtr oldsax = NULL;
12260
Daniel Veillard9e923512002-08-14 08:48:52 +000012261 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012262 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12263 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012264 oldsax = ctxt->sax;
12265 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012266 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012267 if (user_data != NULL)
12268 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012269
12270 xmlParseDocument(ctxt);
12271
12272 if (ctxt->wellFormed)
12273 ret = 0;
12274 else {
12275 if (ctxt->errNo != 0)
12276 ret = ctxt->errNo;
12277 else
12278 ret = -1;
12279 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012280 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012281 if (ctxt->myDoc != NULL) {
12282 xmlFreeDoc(ctxt->myDoc);
12283 ctxt->myDoc = NULL;
12284 }
Owen Taylor3473f882001-02-23 17:55:21 +000012285 xmlFreeParserCtxt(ctxt);
12286
12287 return ret;
12288}
Daniel Veillard81273902003-09-30 00:43:48 +000012289#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012290
12291/**
12292 * xmlCreateDocParserCtxt:
12293 * @cur: a pointer to an array of xmlChar
12294 *
12295 * Creates a parser context for an XML in-memory document.
12296 *
12297 * Returns the new parser context or NULL
12298 */
12299xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012300xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012301 int len;
12302
12303 if (cur == NULL)
12304 return(NULL);
12305 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012306 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012307}
12308
Daniel Veillard81273902003-09-30 00:43:48 +000012309#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012310/**
12311 * xmlSAXParseDoc:
12312 * @sax: the SAX handler block
12313 * @cur: a pointer to an array of xmlChar
12314 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12315 * documents
12316 *
12317 * parse an XML in-memory document and build a tree.
12318 * It use the given SAX function block to handle the parsing callback.
12319 * If sax is NULL, fallback to the default DOM tree building routines.
12320 *
12321 * Returns the resulting document tree
12322 */
12323
12324xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012325xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012326 xmlDocPtr ret;
12327 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012328 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012329
Daniel Veillard38936062004-11-04 17:45:11 +000012330 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012331
12332
12333 ctxt = xmlCreateDocParserCtxt(cur);
12334 if (ctxt == NULL) return(NULL);
12335 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012336 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012337 ctxt->sax = sax;
12338 ctxt->userData = NULL;
12339 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012340 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012341
12342 xmlParseDocument(ctxt);
12343 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12344 else {
12345 ret = NULL;
12346 xmlFreeDoc(ctxt->myDoc);
12347 ctxt->myDoc = NULL;
12348 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012349 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012350 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012351 xmlFreeParserCtxt(ctxt);
12352
12353 return(ret);
12354}
12355
12356/**
12357 * xmlParseDoc:
12358 * @cur: a pointer to an array of xmlChar
12359 *
12360 * parse an XML in-memory document and build a tree.
12361 *
12362 * Returns the resulting document tree
12363 */
12364
12365xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012366xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012367 return(xmlSAXParseDoc(NULL, cur, 0));
12368}
Daniel Veillard81273902003-09-30 00:43:48 +000012369#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012370
Daniel Veillard81273902003-09-30 00:43:48 +000012371#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012372/************************************************************************
12373 * *
12374 * Specific function to keep track of entities references *
12375 * and used by the XSLT debugger *
12376 * *
12377 ************************************************************************/
12378
12379static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12380
12381/**
12382 * xmlAddEntityReference:
12383 * @ent : A valid entity
12384 * @firstNode : A valid first node for children of entity
12385 * @lastNode : A valid last node of children entity
12386 *
12387 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12388 */
12389static void
12390xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12391 xmlNodePtr lastNode)
12392{
12393 if (xmlEntityRefFunc != NULL) {
12394 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12395 }
12396}
12397
12398
12399/**
12400 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012401 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012402 *
12403 * Set the function to call call back when a xml reference has been made
12404 */
12405void
12406xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12407{
12408 xmlEntityRefFunc = func;
12409}
Daniel Veillard81273902003-09-30 00:43:48 +000012410#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012411
12412/************************************************************************
12413 * *
12414 * Miscellaneous *
12415 * *
12416 ************************************************************************/
12417
12418#ifdef LIBXML_XPATH_ENABLED
12419#include <libxml/xpath.h>
12420#endif
12421
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012422extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012423static int xmlParserInitialized = 0;
12424
12425/**
12426 * xmlInitParser:
12427 *
12428 * Initialization function for the XML parser.
12429 * This is not reentrant. Call once before processing in case of
12430 * use in multithreaded programs.
12431 */
12432
12433void
12434xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012435 if (xmlParserInitialized != 0)
12436 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012437
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012438 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12439 (xmlGenericError == NULL))
12440 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012441 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012442 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012443 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012444 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012445 xmlDefaultSAXHandlerInit();
12446 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012447#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012448 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012449#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012450#ifdef LIBXML_HTML_ENABLED
12451 htmlInitAutoClose();
12452 htmlDefaultSAXHandlerInit();
12453#endif
12454#ifdef LIBXML_XPATH_ENABLED
12455 xmlXPathInit();
12456#endif
12457 xmlParserInitialized = 1;
12458}
12459
12460/**
12461 * xmlCleanupParser:
12462 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012463 * Cleanup function for the XML library. It tries to reclaim all
12464 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012465 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012466 * function should not prevent reusing the library but one should
12467 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012468 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012469 */
12470
12471void
12472xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012473 if (!xmlParserInitialized)
12474 return;
12475
Owen Taylor3473f882001-02-23 17:55:21 +000012476 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012477#ifdef LIBXML_CATALOG_ENABLED
12478 xmlCatalogCleanup();
12479#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012480 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012481 xmlCleanupInputCallbacks();
12482#ifdef LIBXML_OUTPUT_ENABLED
12483 xmlCleanupOutputCallbacks();
12484#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012485#ifdef LIBXML_SCHEMAS_ENABLED
12486 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012487 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012488#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012489 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012490 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012491 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012492 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012493 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012494}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012495
12496/************************************************************************
12497 * *
12498 * New set (2.6.0) of simpler and more flexible APIs *
12499 * *
12500 ************************************************************************/
12501
12502/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012503 * DICT_FREE:
12504 * @str: a string
12505 *
12506 * Free a string if it is not owned by the "dict" dictionnary in the
12507 * current scope
12508 */
12509#define DICT_FREE(str) \
12510 if ((str) && ((!dict) || \
12511 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12512 xmlFree((char *)(str));
12513
12514/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012515 * xmlCtxtReset:
12516 * @ctxt: an XML parser context
12517 *
12518 * Reset a parser context
12519 */
12520void
12521xmlCtxtReset(xmlParserCtxtPtr ctxt)
12522{
12523 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012524 xmlDictPtr dict;
12525
12526 if (ctxt == NULL)
12527 return;
12528
12529 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012530
12531 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12532 xmlFreeInputStream(input);
12533 }
12534 ctxt->inputNr = 0;
12535 ctxt->input = NULL;
12536
12537 ctxt->spaceNr = 0;
12538 ctxt->spaceTab[0] = -1;
12539 ctxt->space = &ctxt->spaceTab[0];
12540
12541
12542 ctxt->nodeNr = 0;
12543 ctxt->node = NULL;
12544
12545 ctxt->nameNr = 0;
12546 ctxt->name = NULL;
12547
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012548 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012549 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012550 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012551 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012552 DICT_FREE(ctxt->directory);
12553 ctxt->directory = NULL;
12554 DICT_FREE(ctxt->extSubURI);
12555 ctxt->extSubURI = NULL;
12556 DICT_FREE(ctxt->extSubSystem);
12557 ctxt->extSubSystem = NULL;
12558 if (ctxt->myDoc != NULL)
12559 xmlFreeDoc(ctxt->myDoc);
12560 ctxt->myDoc = NULL;
12561
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012562 ctxt->standalone = -1;
12563 ctxt->hasExternalSubset = 0;
12564 ctxt->hasPErefs = 0;
12565 ctxt->html = 0;
12566 ctxt->external = 0;
12567 ctxt->instate = XML_PARSER_START;
12568 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012569
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012570 ctxt->wellFormed = 1;
12571 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012572 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012573 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012574#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012575 ctxt->vctxt.userData = ctxt;
12576 ctxt->vctxt.error = xmlParserValidityError;
12577 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012578#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012579 ctxt->record_info = 0;
12580 ctxt->nbChars = 0;
12581 ctxt->checkIndex = 0;
12582 ctxt->inSubset = 0;
12583 ctxt->errNo = XML_ERR_OK;
12584 ctxt->depth = 0;
12585 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12586 ctxt->catalogs = NULL;
12587 xmlInitNodeInfoSeq(&ctxt->node_seq);
12588
12589 if (ctxt->attsDefault != NULL) {
12590 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12591 ctxt->attsDefault = NULL;
12592 }
12593 if (ctxt->attsSpecial != NULL) {
12594 xmlHashFree(ctxt->attsSpecial, NULL);
12595 ctxt->attsSpecial = NULL;
12596 }
12597
Daniel Veillard4432df22003-09-28 18:58:27 +000012598#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012599 if (ctxt->catalogs != NULL)
12600 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012601#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012602 if (ctxt->lastError.code != XML_ERR_OK)
12603 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012604}
12605
12606/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012607 * xmlCtxtResetPush:
12608 * @ctxt: an XML parser context
12609 * @chunk: a pointer to an array of chars
12610 * @size: number of chars in the array
12611 * @filename: an optional file name or URI
12612 * @encoding: the document encoding, or NULL
12613 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012614 * Reset a push parser context
12615 *
12616 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012617 */
12618int
12619xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12620 int size, const char *filename, const char *encoding)
12621{
12622 xmlParserInputPtr inputStream;
12623 xmlParserInputBufferPtr buf;
12624 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12625
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012626 if (ctxt == NULL)
12627 return(1);
12628
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012629 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12630 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12631
12632 buf = xmlAllocParserInputBuffer(enc);
12633 if (buf == NULL)
12634 return(1);
12635
12636 if (ctxt == NULL) {
12637 xmlFreeParserInputBuffer(buf);
12638 return(1);
12639 }
12640
12641 xmlCtxtReset(ctxt);
12642
12643 if (ctxt->pushTab == NULL) {
12644 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12645 sizeof(xmlChar *));
12646 if (ctxt->pushTab == NULL) {
12647 xmlErrMemory(ctxt, NULL);
12648 xmlFreeParserInputBuffer(buf);
12649 return(1);
12650 }
12651 }
12652
12653 if (filename == NULL) {
12654 ctxt->directory = NULL;
12655 } else {
12656 ctxt->directory = xmlParserGetDirectory(filename);
12657 }
12658
12659 inputStream = xmlNewInputStream(ctxt);
12660 if (inputStream == NULL) {
12661 xmlFreeParserInputBuffer(buf);
12662 return(1);
12663 }
12664
12665 if (filename == NULL)
12666 inputStream->filename = NULL;
12667 else
12668 inputStream->filename = (char *)
12669 xmlCanonicPath((const xmlChar *) filename);
12670 inputStream->buf = buf;
12671 inputStream->base = inputStream->buf->buffer->content;
12672 inputStream->cur = inputStream->buf->buffer->content;
12673 inputStream->end =
12674 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12675
12676 inputPush(ctxt, inputStream);
12677
12678 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12679 (ctxt->input->buf != NULL)) {
12680 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12681 int cur = ctxt->input->cur - ctxt->input->base;
12682
12683 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12684
12685 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12686 ctxt->input->cur = ctxt->input->base + cur;
12687 ctxt->input->end =
12688 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12689 use];
12690#ifdef DEBUG_PUSH
12691 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12692#endif
12693 }
12694
12695 if (encoding != NULL) {
12696 xmlCharEncodingHandlerPtr hdlr;
12697
12698 hdlr = xmlFindCharEncodingHandler(encoding);
12699 if (hdlr != NULL) {
12700 xmlSwitchToEncoding(ctxt, hdlr);
12701 } else {
12702 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12703 "Unsupported encoding %s\n", BAD_CAST encoding);
12704 }
12705 } else if (enc != XML_CHAR_ENCODING_NONE) {
12706 xmlSwitchEncoding(ctxt, enc);
12707 }
12708
12709 return(0);
12710}
12711
12712/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012713 * xmlCtxtUseOptions:
12714 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012715 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012716 *
12717 * Applies the options to the parser context
12718 *
12719 * Returns 0 in case of success, the set of unknown or unimplemented options
12720 * in case of error.
12721 */
12722int
12723xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12724{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012725 if (ctxt == NULL)
12726 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012727 if (options & XML_PARSE_RECOVER) {
12728 ctxt->recovery = 1;
12729 options -= XML_PARSE_RECOVER;
12730 } else
12731 ctxt->recovery = 0;
12732 if (options & XML_PARSE_DTDLOAD) {
12733 ctxt->loadsubset = XML_DETECT_IDS;
12734 options -= XML_PARSE_DTDLOAD;
12735 } else
12736 ctxt->loadsubset = 0;
12737 if (options & XML_PARSE_DTDATTR) {
12738 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12739 options -= XML_PARSE_DTDATTR;
12740 }
12741 if (options & XML_PARSE_NOENT) {
12742 ctxt->replaceEntities = 1;
12743 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12744 options -= XML_PARSE_NOENT;
12745 } else
12746 ctxt->replaceEntities = 0;
12747 if (options & XML_PARSE_NOWARNING) {
12748 ctxt->sax->warning = NULL;
12749 options -= XML_PARSE_NOWARNING;
12750 }
12751 if (options & XML_PARSE_NOERROR) {
12752 ctxt->sax->error = NULL;
12753 ctxt->sax->fatalError = NULL;
12754 options -= XML_PARSE_NOERROR;
12755 }
12756 if (options & XML_PARSE_PEDANTIC) {
12757 ctxt->pedantic = 1;
12758 options -= XML_PARSE_PEDANTIC;
12759 } else
12760 ctxt->pedantic = 0;
12761 if (options & XML_PARSE_NOBLANKS) {
12762 ctxt->keepBlanks = 0;
12763 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12764 options -= XML_PARSE_NOBLANKS;
12765 } else
12766 ctxt->keepBlanks = 1;
12767 if (options & XML_PARSE_DTDVALID) {
12768 ctxt->validate = 1;
12769 if (options & XML_PARSE_NOWARNING)
12770 ctxt->vctxt.warning = NULL;
12771 if (options & XML_PARSE_NOERROR)
12772 ctxt->vctxt.error = NULL;
12773 options -= XML_PARSE_DTDVALID;
12774 } else
12775 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012776#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012777 if (options & XML_PARSE_SAX1) {
12778 ctxt->sax->startElement = xmlSAX2StartElement;
12779 ctxt->sax->endElement = xmlSAX2EndElement;
12780 ctxt->sax->startElementNs = NULL;
12781 ctxt->sax->endElementNs = NULL;
12782 ctxt->sax->initialized = 1;
12783 options -= XML_PARSE_SAX1;
12784 }
Daniel Veillard81273902003-09-30 00:43:48 +000012785#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012786 if (options & XML_PARSE_NODICT) {
12787 ctxt->dictNames = 0;
12788 options -= XML_PARSE_NODICT;
12789 } else {
12790 ctxt->dictNames = 1;
12791 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012792 if (options & XML_PARSE_NOCDATA) {
12793 ctxt->sax->cdataBlock = NULL;
12794 options -= XML_PARSE_NOCDATA;
12795 }
12796 if (options & XML_PARSE_NSCLEAN) {
12797 ctxt->options |= XML_PARSE_NSCLEAN;
12798 options -= XML_PARSE_NSCLEAN;
12799 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012800 if (options & XML_PARSE_NONET) {
12801 ctxt->options |= XML_PARSE_NONET;
12802 options -= XML_PARSE_NONET;
12803 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012804 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012805 return (options);
12806}
12807
12808/**
12809 * xmlDoRead:
12810 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012811 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012812 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012813 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012814 * @reuse: keep the context for reuse
12815 *
12816 * Common front-end for the xmlRead functions
12817 *
12818 * Returns the resulting document tree or NULL
12819 */
12820static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012821xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12822 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012823{
12824 xmlDocPtr ret;
12825
12826 xmlCtxtUseOptions(ctxt, options);
12827 if (encoding != NULL) {
12828 xmlCharEncodingHandlerPtr hdlr;
12829
12830 hdlr = xmlFindCharEncodingHandler(encoding);
12831 if (hdlr != NULL)
12832 xmlSwitchToEncoding(ctxt, hdlr);
12833 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012834 if ((URL != NULL) && (ctxt->input != NULL) &&
12835 (ctxt->input->filename == NULL))
12836 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012837 xmlParseDocument(ctxt);
12838 if ((ctxt->wellFormed) || ctxt->recovery)
12839 ret = ctxt->myDoc;
12840 else {
12841 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012842 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012843 xmlFreeDoc(ctxt->myDoc);
12844 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012845 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012846 ctxt->myDoc = NULL;
12847 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012848 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012849 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012850
12851 return (ret);
12852}
12853
12854/**
12855 * xmlReadDoc:
12856 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012857 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012858 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012859 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012860 *
12861 * parse an XML in-memory document and build a tree.
12862 *
12863 * Returns the resulting document tree
12864 */
12865xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012866xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012867{
12868 xmlParserCtxtPtr ctxt;
12869
12870 if (cur == NULL)
12871 return (NULL);
12872
12873 ctxt = xmlCreateDocParserCtxt(cur);
12874 if (ctxt == NULL)
12875 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012876 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012877}
12878
12879/**
12880 * xmlReadFile:
12881 * @filename: a file or URL
12882 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012883 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012884 *
12885 * parse an XML file from the filesystem or the network.
12886 *
12887 * Returns the resulting document tree
12888 */
12889xmlDocPtr
12890xmlReadFile(const char *filename, const char *encoding, int options)
12891{
12892 xmlParserCtxtPtr ctxt;
12893
Daniel Veillard61b93382003-11-03 14:28:31 +000012894 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012895 if (ctxt == NULL)
12896 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012897 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012898}
12899
12900/**
12901 * xmlReadMemory:
12902 * @buffer: a pointer to a char array
12903 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012904 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012905 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012906 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012907 *
12908 * parse an XML in-memory document and build a tree.
12909 *
12910 * Returns the resulting document tree
12911 */
12912xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012913xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012914{
12915 xmlParserCtxtPtr ctxt;
12916
12917 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12918 if (ctxt == NULL)
12919 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012920 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012921}
12922
12923/**
12924 * xmlReadFd:
12925 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012926 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012927 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012928 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012929 *
12930 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012931 * NOTE that the file descriptor will not be closed when the
12932 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012933 *
12934 * Returns the resulting document tree
12935 */
12936xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012937xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012938{
12939 xmlParserCtxtPtr ctxt;
12940 xmlParserInputBufferPtr input;
12941 xmlParserInputPtr stream;
12942
12943 if (fd < 0)
12944 return (NULL);
12945
12946 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12947 if (input == NULL)
12948 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012949 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012950 ctxt = xmlNewParserCtxt();
12951 if (ctxt == NULL) {
12952 xmlFreeParserInputBuffer(input);
12953 return (NULL);
12954 }
12955 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12956 if (stream == NULL) {
12957 xmlFreeParserInputBuffer(input);
12958 xmlFreeParserCtxt(ctxt);
12959 return (NULL);
12960 }
12961 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012962 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012963}
12964
12965/**
12966 * xmlReadIO:
12967 * @ioread: an I/O read function
12968 * @ioclose: an I/O close function
12969 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012970 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012971 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012972 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012973 *
12974 * parse an XML document from I/O functions and source and build a tree.
12975 *
12976 * Returns the resulting document tree
12977 */
12978xmlDocPtr
12979xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012980 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012981{
12982 xmlParserCtxtPtr ctxt;
12983 xmlParserInputBufferPtr input;
12984 xmlParserInputPtr stream;
12985
12986 if (ioread == NULL)
12987 return (NULL);
12988
12989 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12990 XML_CHAR_ENCODING_NONE);
12991 if (input == NULL)
12992 return (NULL);
12993 ctxt = xmlNewParserCtxt();
12994 if (ctxt == NULL) {
12995 xmlFreeParserInputBuffer(input);
12996 return (NULL);
12997 }
12998 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12999 if (stream == NULL) {
13000 xmlFreeParserInputBuffer(input);
13001 xmlFreeParserCtxt(ctxt);
13002 return (NULL);
13003 }
13004 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013005 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013006}
13007
13008/**
13009 * xmlCtxtReadDoc:
13010 * @ctxt: an XML parser context
13011 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013012 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013013 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013014 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013015 *
13016 * parse an XML in-memory document and build a tree.
13017 * This reuses the existing @ctxt parser context
13018 *
13019 * Returns the resulting document tree
13020 */
13021xmlDocPtr
13022xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013023 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013024{
13025 xmlParserInputPtr stream;
13026
13027 if (cur == NULL)
13028 return (NULL);
13029 if (ctxt == NULL)
13030 return (NULL);
13031
13032 xmlCtxtReset(ctxt);
13033
13034 stream = xmlNewStringInputStream(ctxt, cur);
13035 if (stream == NULL) {
13036 return (NULL);
13037 }
13038 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013039 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013040}
13041
13042/**
13043 * xmlCtxtReadFile:
13044 * @ctxt: an XML parser context
13045 * @filename: a file or URL
13046 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013047 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013048 *
13049 * parse an XML file from the filesystem or the network.
13050 * This reuses the existing @ctxt parser context
13051 *
13052 * Returns the resulting document tree
13053 */
13054xmlDocPtr
13055xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13056 const char *encoding, int options)
13057{
13058 xmlParserInputPtr stream;
13059
13060 if (filename == NULL)
13061 return (NULL);
13062 if (ctxt == NULL)
13063 return (NULL);
13064
13065 xmlCtxtReset(ctxt);
13066
Daniel Veillard29614c72004-11-26 10:47:26 +000013067 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013068 if (stream == NULL) {
13069 return (NULL);
13070 }
13071 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013072 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013073}
13074
13075/**
13076 * xmlCtxtReadMemory:
13077 * @ctxt: an XML parser context
13078 * @buffer: a pointer to a char array
13079 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013080 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013081 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013082 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013083 *
13084 * parse an XML in-memory document and build a tree.
13085 * This reuses the existing @ctxt parser context
13086 *
13087 * Returns the resulting document tree
13088 */
13089xmlDocPtr
13090xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013091 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013092{
13093 xmlParserInputBufferPtr input;
13094 xmlParserInputPtr stream;
13095
13096 if (ctxt == NULL)
13097 return (NULL);
13098 if (buffer == NULL)
13099 return (NULL);
13100
13101 xmlCtxtReset(ctxt);
13102
13103 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13104 if (input == NULL) {
13105 return(NULL);
13106 }
13107
13108 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13109 if (stream == NULL) {
13110 xmlFreeParserInputBuffer(input);
13111 return(NULL);
13112 }
13113
13114 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013115 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013116}
13117
13118/**
13119 * xmlCtxtReadFd:
13120 * @ctxt: an XML parser context
13121 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013122 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013123 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013124 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013125 *
13126 * parse an XML from a file descriptor and build a tree.
13127 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013128 * NOTE that the file descriptor will not be closed when the
13129 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013130 *
13131 * Returns the resulting document tree
13132 */
13133xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013134xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13135 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013136{
13137 xmlParserInputBufferPtr input;
13138 xmlParserInputPtr stream;
13139
13140 if (fd < 0)
13141 return (NULL);
13142 if (ctxt == NULL)
13143 return (NULL);
13144
13145 xmlCtxtReset(ctxt);
13146
13147
13148 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13149 if (input == NULL)
13150 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013151 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013152 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13153 if (stream == NULL) {
13154 xmlFreeParserInputBuffer(input);
13155 return (NULL);
13156 }
13157 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013158 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013159}
13160
13161/**
13162 * xmlCtxtReadIO:
13163 * @ctxt: an XML parser context
13164 * @ioread: an I/O read function
13165 * @ioclose: an I/O close function
13166 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013167 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013168 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013169 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013170 *
13171 * parse an XML document from I/O functions and source and build a tree.
13172 * This reuses the existing @ctxt parser context
13173 *
13174 * Returns the resulting document tree
13175 */
13176xmlDocPtr
13177xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13178 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013179 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013180 const char *encoding, int options)
13181{
13182 xmlParserInputBufferPtr input;
13183 xmlParserInputPtr stream;
13184
13185 if (ioread == NULL)
13186 return (NULL);
13187 if (ctxt == NULL)
13188 return (NULL);
13189
13190 xmlCtxtReset(ctxt);
13191
13192 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13193 XML_CHAR_ENCODING_NONE);
13194 if (input == NULL)
13195 return (NULL);
13196 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13197 if (stream == NULL) {
13198 xmlFreeParserInputBuffer(input);
13199 return (NULL);
13200 }
13201 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013202 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013203}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013204
13205#define bottom_parser
13206#include "elfgcchack.h"