blob: 3d810f034ab1558054e0052144628a76e3e0b19d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000586 * SAX2 defaulted attributes handling *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlDetectSAX2:
592 * @ctxt: an XML parser context
593 *
594 * Do the SAX2 detection and specific intialization
595 */
596static void
597xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
598 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000599#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000600 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
601 ((ctxt->sax->startElementNs != NULL) ||
602 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000603#else
604 ctxt->sax2 = 1;
605#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000606
607 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
608 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
609 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000610 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
611 (ctxt->str_xml_ns == NULL)) {
612 xmlErrMemory(ctxt, NULL);
613 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000614}
615
Daniel Veillarde57ec792003-09-10 10:50:59 +0000616typedef struct _xmlDefAttrs xmlDefAttrs;
617typedef xmlDefAttrs *xmlDefAttrsPtr;
618struct _xmlDefAttrs {
619 int nbAttrs; /* number of defaulted attributes on that element */
620 int maxAttrs; /* the size of the array */
621 const xmlChar *values[4]; /* array of localname/prefix/values */
622};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000623
624/**
625 * xmlAddDefAttrs:
626 * @ctxt: an XML parser context
627 * @fullname: the element fullname
628 * @fullattr: the attribute fullname
629 * @value: the attribute value
630 *
631 * Add a defaulted attribute for an element
632 */
633static void
634xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
635 const xmlChar *fullname,
636 const xmlChar *fullattr,
637 const xmlChar *value) {
638 xmlDefAttrsPtr defaults;
639 int len;
640 const xmlChar *name;
641 const xmlChar *prefix;
642
643 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000644 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000645 if (ctxt->attsDefault == NULL)
646 goto mem_error;
647 }
648
649 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000650 * split the element name into prefix:localname , the string found
651 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000652 */
653 name = xmlSplitQName3(fullname, &len);
654 if (name == NULL) {
655 name = xmlDictLookup(ctxt->dict, fullname, -1);
656 prefix = NULL;
657 } else {
658 name = xmlDictLookup(ctxt->dict, name, -1);
659 prefix = xmlDictLookup(ctxt->dict, fullname, len);
660 }
661
662 /*
663 * make sure there is some storage
664 */
665 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
666 if (defaults == NULL) {
667 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000668 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000669 if (defaults == NULL)
670 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000672 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000673 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
674 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000675 xmlDefAttrsPtr temp;
676
677 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000678 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000679 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000680 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000681 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000682 defaults->maxAttrs *= 2;
683 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
684 }
685
686 /*
687 * plit the element name into prefix:localname , the string found
688 * are within the DTD and hen not associated to namespace names.
689 */
690 name = xmlSplitQName3(fullattr, &len);
691 if (name == NULL) {
692 name = xmlDictLookup(ctxt->dict, fullattr, -1);
693 prefix = NULL;
694 } else {
695 name = xmlDictLookup(ctxt->dict, name, -1);
696 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
697 }
698
699 defaults->values[4 * defaults->nbAttrs] = name;
700 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
701 /* intern the string and precompute the end */
702 len = xmlStrlen(value);
703 value = xmlDictLookup(ctxt->dict, value, len);
704 defaults->values[4 * defaults->nbAttrs + 2] = value;
705 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
706 defaults->nbAttrs++;
707
708 return;
709
710mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000711 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000712 return;
713}
714
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000715/**
716 * xmlAddSpecialAttr:
717 * @ctxt: an XML parser context
718 * @fullname: the element fullname
719 * @fullattr: the attribute fullname
720 * @type: the attribute type
721 *
722 * Register that this attribute is not CDATA
723 */
724static void
725xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
726 const xmlChar *fullname,
727 const xmlChar *fullattr,
728 int type)
729{
730 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000731 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000732 if (ctxt->attsSpecial == NULL)
733 goto mem_error;
734 }
735
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000736 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
737 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000738 return;
739
740mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000741 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000742 return;
743}
744
Daniel Veillard4432df22003-09-28 18:58:27 +0000745/**
746 * xmlCheckLanguageID:
747 * @lang: pointer to the string value
748 *
749 * Checks that the value conforms to the LanguageID production:
750 *
751 * NOTE: this is somewhat deprecated, those productions were removed from
752 * the XML Second edition.
753 *
754 * [33] LanguageID ::= Langcode ('-' Subcode)*
755 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
756 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
757 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
758 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
759 * [38] Subcode ::= ([a-z] | [A-Z])+
760 *
761 * Returns 1 if correct 0 otherwise
762 **/
763int
764xmlCheckLanguageID(const xmlChar * lang)
765{
766 const xmlChar *cur = lang;
767
768 if (cur == NULL)
769 return (0);
770 if (((cur[0] == 'i') && (cur[1] == '-')) ||
771 ((cur[0] == 'I') && (cur[1] == '-'))) {
772 /*
773 * IANA code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
780 ((cur[0] == 'X') && (cur[1] == '-'))) {
781 /*
782 * User code
783 */
784 cur += 2;
785 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
789 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
790 /*
791 * ISO639
792 */
793 cur++;
794 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
795 ((cur[0] >= 'a') && (cur[0] <= 'z')))
796 cur++;
797 else
798 return (0);
799 } else
800 return (0);
801 while (cur[0] != 0) { /* non input consuming */
802 if (cur[0] != '-')
803 return (0);
804 cur++;
805 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
806 ((cur[0] >= 'a') && (cur[0] <= 'z')))
807 cur++;
808 else
809 return (0);
810 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
811 ((cur[0] >= 'a') && (cur[0] <= 'z')))
812 cur++;
813 }
814 return (1);
815}
816
Owen Taylor3473f882001-02-23 17:55:21 +0000817/************************************************************************
818 * *
819 * Parser stacks related functions and macros *
820 * *
821 ************************************************************************/
822
823xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
824 const xmlChar ** str);
825
Daniel Veillard0fb18932003-09-07 09:14:37 +0000826#ifdef SAX2
827/**
828 * nsPush:
829 * @ctxt: an XML parser context
830 * @prefix: the namespace prefix or NULL
831 * @URL: the namespace name
832 *
833 * Pushes a new parser namespace on top of the ns stack
834 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000835 * Returns -1 in case of error, -2 if the namespace should be discarded
836 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000837 */
838static int
839nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
840{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000841 if (ctxt->options & XML_PARSE_NSCLEAN) {
842 int i;
843 for (i = 0;i < ctxt->nsNr;i += 2) {
844 if (ctxt->nsTab[i] == prefix) {
845 /* in scope */
846 if (ctxt->nsTab[i + 1] == URL)
847 return(-2);
848 /* out of scope keep it */
849 break;
850 }
851 }
852 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000853 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
854 ctxt->nsMax = 10;
855 ctxt->nsNr = 0;
856 ctxt->nsTab = (const xmlChar **)
857 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
858 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000859 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000860 ctxt->nsMax = 0;
861 return (-1);
862 }
863 } else if (ctxt->nsNr >= ctxt->nsMax) {
864 ctxt->nsMax *= 2;
865 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000866 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000867 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
868 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000869 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000870 ctxt->nsMax /= 2;
871 return (-1);
872 }
873 }
874 ctxt->nsTab[ctxt->nsNr++] = prefix;
875 ctxt->nsTab[ctxt->nsNr++] = URL;
876 return (ctxt->nsNr);
877}
878/**
879 * nsPop:
880 * @ctxt: an XML parser context
881 * @nr: the number to pop
882 *
883 * Pops the top @nr parser prefix/namespace from the ns stack
884 *
885 * Returns the number of namespaces removed
886 */
887static int
888nsPop(xmlParserCtxtPtr ctxt, int nr)
889{
890 int i;
891
892 if (ctxt->nsTab == NULL) return(0);
893 if (ctxt->nsNr < nr) {
894 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
895 nr = ctxt->nsNr;
896 }
897 if (ctxt->nsNr <= 0)
898 return (0);
899
900 for (i = 0;i < nr;i++) {
901 ctxt->nsNr--;
902 ctxt->nsTab[ctxt->nsNr] = NULL;
903 }
904 return(nr);
905}
906#endif
907
908static int
909xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
910 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000912 int maxatts;
913
914 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000916 atts = (const xmlChar **)
917 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000919 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
921 if (attallocs == NULL) goto mem_error;
922 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000923 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000924 } else if (nr + 5 > ctxt->maxatts) {
925 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000926 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
927 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000929 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
931 (maxatts / 5) * sizeof(int));
932 if (attallocs == NULL) goto mem_error;
933 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000934 ctxt->maxatts = maxatts;
935 }
936 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000937mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000938 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000939 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000940}
941
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000942/**
943 * inputPush:
944 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000945 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000946 *
947 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000948 *
949 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000950 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000951int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000952inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
953{
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000954 if ((ctxt == NULL) || (value == NULL))
955 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000956 if (ctxt->inputNr >= ctxt->inputMax) {
957 ctxt->inputMax *= 2;
958 ctxt->inputTab =
959 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
960 ctxt->inputMax *
961 sizeof(ctxt->inputTab[0]));
962 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000963 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 return (0);
965 }
966 }
967 ctxt->inputTab[ctxt->inputNr] = value;
968 ctxt->input = value;
969 return (ctxt->inputNr++);
970}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000971/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000972 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000973 * @ctxt: an XML parser context
974 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000975 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000976 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000977 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000978 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000979xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000980inputPop(xmlParserCtxtPtr ctxt)
981{
982 xmlParserInputPtr ret;
983
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000984 if (ctxt == NULL)
985 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000986 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +0000987 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000988 ctxt->inputNr--;
989 if (ctxt->inputNr > 0)
990 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
991 else
992 ctxt->input = NULL;
993 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +0000994 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000995 return (ret);
996}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000997/**
998 * nodePush:
999 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001000 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001001 *
1002 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001003 *
1004 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001005 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001006int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001007nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1008{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001009 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001010 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001011 xmlNodePtr *tmp;
1012
1013 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1014 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001015 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001016 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001017 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001018 return (0);
1019 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001020 ctxt->nodeTab = tmp;
1021 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001022 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001023 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001024 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001025 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1026 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001027 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001028 return(0);
1029 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001030 ctxt->nodeTab[ctxt->nodeNr] = value;
1031 ctxt->node = value;
1032 return (ctxt->nodeNr++);
1033}
1034/**
1035 * nodePop:
1036 * @ctxt: an XML parser context
1037 *
1038 * Pops the top element node from the node stack
1039 *
1040 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001041 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001042xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001043nodePop(xmlParserCtxtPtr ctxt)
1044{
1045 xmlNodePtr ret;
1046
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001047 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001048 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001049 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001050 ctxt->nodeNr--;
1051 if (ctxt->nodeNr > 0)
1052 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1053 else
1054 ctxt->node = NULL;
1055 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001056 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001057 return (ret);
1058}
Daniel Veillarda2351322004-06-27 12:08:10 +00001059
1060#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001061/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 * nameNsPush:
1063 * @ctxt: an XML parser context
1064 * @value: the element name
1065 * @prefix: the element prefix
1066 * @URI: the element namespace name
1067 *
1068 * Pushes a new element name/prefix/URL on top of the name stack
1069 *
1070 * Returns -1 in case of error, the index in the stack otherwise
1071 */
1072static int
1073nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1074 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1075{
1076 if (ctxt->nameNr >= ctxt->nameMax) {
1077 const xmlChar * *tmp;
1078 void **tmp2;
1079 ctxt->nameMax *= 2;
1080 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1081 ctxt->nameMax *
1082 sizeof(ctxt->nameTab[0]));
1083 if (tmp == NULL) {
1084 ctxt->nameMax /= 2;
1085 goto mem_error;
1086 }
1087 ctxt->nameTab = tmp;
1088 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1089 ctxt->nameMax * 3 *
1090 sizeof(ctxt->pushTab[0]));
1091 if (tmp2 == NULL) {
1092 ctxt->nameMax /= 2;
1093 goto mem_error;
1094 }
1095 ctxt->pushTab = tmp2;
1096 }
1097 ctxt->nameTab[ctxt->nameNr] = value;
1098 ctxt->name = value;
1099 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1100 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001101 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102 return (ctxt->nameNr++);
1103mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001104 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001105 return (-1);
1106}
1107/**
1108 * nameNsPop:
1109 * @ctxt: an XML parser context
1110 *
1111 * Pops the top element/prefix/URI name from the name stack
1112 *
1113 * Returns the name just removed
1114 */
1115static const xmlChar *
1116nameNsPop(xmlParserCtxtPtr ctxt)
1117{
1118 const xmlChar *ret;
1119
1120 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001121 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122 ctxt->nameNr--;
1123 if (ctxt->nameNr > 0)
1124 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1125 else
1126 ctxt->name = NULL;
1127 ret = ctxt->nameTab[ctxt->nameNr];
1128 ctxt->nameTab[ctxt->nameNr] = NULL;
1129 return (ret);
1130}
Daniel Veillarda2351322004-06-27 12:08:10 +00001131#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001132
1133/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 * namePush:
1135 * @ctxt: an XML parser context
1136 * @value: the element name
1137 *
1138 * Pushes a new element name on top of the name stack
1139 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001141 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001142int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001143namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001144{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001145 if (ctxt == NULL) return (-1);
1146
Daniel Veillard1c732d22002-11-30 11:22:59 +00001147 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001149 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001150 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001151 ctxt->nameMax *
1152 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 if (tmp == NULL) {
1154 ctxt->nameMax /= 2;
1155 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158 }
1159 ctxt->nameTab[ctxt->nameNr] = value;
1160 ctxt->name = value;
1161 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001163 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001164 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001165}
1166/**
1167 * namePop:
1168 * @ctxt: an XML parser context
1169 *
1170 * Pops the top element name from the name stack
1171 *
1172 * Returns the name just removed
1173 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001174const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001175namePop(xmlParserCtxtPtr ctxt)
1176{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001177 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001179 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1180 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 ctxt->nameNr--;
1182 if (ctxt->nameNr > 0)
1183 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1184 else
1185 ctxt->name = NULL;
1186 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001187 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001188 return (ret);
1189}
Owen Taylor3473f882001-02-23 17:55:21 +00001190
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001191static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001192 if (ctxt->spaceNr >= ctxt->spaceMax) {
1193 ctxt->spaceMax *= 2;
1194 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1195 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1196 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001197 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001198 return(0);
1199 }
1200 }
1201 ctxt->spaceTab[ctxt->spaceNr] = val;
1202 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1203 return(ctxt->spaceNr++);
1204}
1205
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001206static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001207 int ret;
1208 if (ctxt->spaceNr <= 0) return(0);
1209 ctxt->spaceNr--;
1210 if (ctxt->spaceNr > 0)
1211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1212 else
1213 ctxt->space = NULL;
1214 ret = ctxt->spaceTab[ctxt->spaceNr];
1215 ctxt->spaceTab[ctxt->spaceNr] = -1;
1216 return(ret);
1217}
1218
1219/*
1220 * Macros for accessing the content. Those should be used only by the parser,
1221 * and not exported.
1222 *
1223 * Dirty macros, i.e. one often need to make assumption on the context to
1224 * use them
1225 *
1226 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1227 * To be used with extreme caution since operations consuming
1228 * characters may move the input buffer to a different location !
1229 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1230 * This should be used internally by the parser
1231 * only to compare to ASCII values otherwise it would break when
1232 * running with UTF-8 encoding.
1233 * RAW same as CUR but in the input buffer, bypass any token
1234 * extraction that may have been done
1235 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1236 * to compare on ASCII based substring.
1237 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001238 * strings without newlines within the parser.
1239 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1240 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001241 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1242 *
1243 * NEXT Skip to the next character, this does the proper decoding
1244 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001245 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001246 * CUR_CHAR(l) returns the current unicode character (int), set l
1247 * to the number of xmlChars used for the encoding [0-5].
1248 * CUR_SCHAR same but operate on a string instead of the context
1249 * COPY_BUF copy the current unicode char to the target buffer, increment
1250 * the index
1251 * GROW, SHRINK handling of input buffers
1252 */
1253
Daniel Veillardfdc91562002-07-01 21:52:03 +00001254#define RAW (*ctxt->input->cur)
1255#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001256#define NXT(val) ctxt->input->cur[(val)]
1257#define CUR_PTR ctxt->input->cur
1258
Daniel Veillarda07050d2003-10-19 14:46:32 +00001259#define CMP4( s, c1, c2, c3, c4 ) \
1260 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1261 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1262#define CMP5( s, c1, c2, c3, c4, c5 ) \
1263 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1264#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1265 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1266#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1267 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1268#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1269 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1270#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1271 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1272 ((unsigned char *) s)[ 8 ] == c9 )
1273#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1274 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1275 ((unsigned char *) s)[ 9 ] == c10 )
1276
Owen Taylor3473f882001-02-23 17:55:21 +00001277#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001278 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001280 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001281 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1282 xmlPopInput(ctxt); \
1283 } while (0)
1284
Daniel Veillard0b787f32004-03-26 17:29:53 +00001285#define SKIPL(val) do { \
1286 int skipl; \
1287 for(skipl=0; skipl<val; skipl++) { \
1288 if (*(ctxt->input->cur) == '\n') { \
1289 ctxt->input->line++; ctxt->input->col = 1; \
1290 } else ctxt->input->col++; \
1291 ctxt->nbChars++; \
1292 ctxt->input->cur++; \
1293 } \
1294 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1295 if ((*ctxt->input->cur == 0) && \
1296 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1297 xmlPopInput(ctxt); \
1298 } while (0)
1299
Daniel Veillarda880b122003-04-21 21:36:41 +00001300#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001301 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1302 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001303 xmlSHRINK (ctxt);
1304
1305static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1306 xmlParserInputShrink(ctxt->input);
1307 if ((*ctxt->input->cur == 0) &&
1308 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1309 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001310 }
Owen Taylor3473f882001-02-23 17:55:21 +00001311
Daniel Veillarda880b122003-04-21 21:36:41 +00001312#define GROW if ((ctxt->progressive == 0) && \
1313 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001314 xmlGROW (ctxt);
1315
1316static void xmlGROW (xmlParserCtxtPtr ctxt) {
1317 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1318 if ((*ctxt->input->cur == 0) &&
1319 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1320 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001321}
Owen Taylor3473f882001-02-23 17:55:21 +00001322
1323#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1324
1325#define NEXT xmlNextChar(ctxt)
1326
Daniel Veillard21a0f912001-02-25 19:54:14 +00001327#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001328 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001329 ctxt->input->cur++; \
1330 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001331 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001332 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1333 }
1334
Owen Taylor3473f882001-02-23 17:55:21 +00001335#define NEXTL(l) do { \
1336 if (*(ctxt->input->cur) == '\n') { \
1337 ctxt->input->line++; ctxt->input->col = 1; \
1338 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001339 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001340 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001341 } while (0)
1342
1343#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1344#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1345
1346#define COPY_BUF(l,b,i,v) \
1347 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001348 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001349
1350/**
1351 * xmlSkipBlankChars:
1352 * @ctxt: the XML parser context
1353 *
1354 * skip all blanks character found at that point in the input streams.
1355 * It pops up finished entities in the process if allowable at that point.
1356 *
1357 * Returns the number of space chars skipped
1358 */
1359
1360int
1361xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001362 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001363
1364 /*
1365 * It's Okay to use CUR/NEXT here since all the blanks are on
1366 * the ASCII range.
1367 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001368 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1369 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001370 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001371 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001372 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001373 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001374 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001375 if (*cur == '\n') {
1376 ctxt->input->line++; ctxt->input->col = 1;
1377 }
1378 cur++;
1379 res++;
1380 if (*cur == 0) {
1381 ctxt->input->cur = cur;
1382 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1383 cur = ctxt->input->cur;
1384 }
1385 }
1386 ctxt->input->cur = cur;
1387 } else {
1388 int cur;
1389 do {
1390 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001391 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001392 NEXT;
1393 cur = CUR;
1394 res++;
1395 }
1396 while ((cur == 0) && (ctxt->inputNr > 1) &&
1397 (ctxt->instate != XML_PARSER_COMMENT)) {
1398 xmlPopInput(ctxt);
1399 cur = CUR;
1400 }
1401 /*
1402 * Need to handle support of entities branching here
1403 */
1404 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1405 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1406 }
Owen Taylor3473f882001-02-23 17:55:21 +00001407 return(res);
1408}
1409
1410/************************************************************************
1411 * *
1412 * Commodity functions to handle entities *
1413 * *
1414 ************************************************************************/
1415
1416/**
1417 * xmlPopInput:
1418 * @ctxt: an XML parser context
1419 *
1420 * xmlPopInput: the current input pointed by ctxt->input came to an end
1421 * pop it and return the next char.
1422 *
1423 * Returns the current xmlChar in the parser context
1424 */
1425xmlChar
1426xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001427 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001428 if (xmlParserDebugEntities)
1429 xmlGenericError(xmlGenericErrorContext,
1430 "Popping input %d\n", ctxt->inputNr);
1431 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001432 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001433 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1434 return(xmlPopInput(ctxt));
1435 return(CUR);
1436}
1437
1438/**
1439 * xmlPushInput:
1440 * @ctxt: an XML parser context
1441 * @input: an XML parser input fragment (entity, XML fragment ...).
1442 *
1443 * xmlPushInput: switch to a new input stream which is stacked on top
1444 * of the previous one(s).
1445 */
1446void
1447xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1448 if (input == NULL) return;
1449
1450 if (xmlParserDebugEntities) {
1451 if ((ctxt->input != NULL) && (ctxt->input->filename))
1452 xmlGenericError(xmlGenericErrorContext,
1453 "%s(%d): ", ctxt->input->filename,
1454 ctxt->input->line);
1455 xmlGenericError(xmlGenericErrorContext,
1456 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1457 }
1458 inputPush(ctxt, input);
1459 GROW;
1460}
1461
1462/**
1463 * xmlParseCharRef:
1464 * @ctxt: an XML parser context
1465 *
1466 * parse Reference declarations
1467 *
1468 * [66] CharRef ::= '&#' [0-9]+ ';' |
1469 * '&#x' [0-9a-fA-F]+ ';'
1470 *
1471 * [ WFC: Legal Character ]
1472 * Characters referred to using character references must match the
1473 * production for Char.
1474 *
1475 * Returns the value parsed (as an int), 0 in case of error
1476 */
1477int
1478xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001479 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001480 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001481 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001482
Owen Taylor3473f882001-02-23 17:55:21 +00001483 /*
1484 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1485 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (NXT(2) == 'x')) {
1488 SKIP(3);
1489 GROW;
1490 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001491 if (count++ > 20) {
1492 count = 0;
1493 GROW;
1494 }
1495 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001496 val = val * 16 + (CUR - '0');
1497 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1498 val = val * 16 + (CUR - 'a') + 10;
1499 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1500 val = val * 16 + (CUR - 'A') + 10;
1501 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 val = 0;
1504 break;
1505 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001506 if (val > 0x10FFFF)
1507 outofrange = val;
1508
Owen Taylor3473f882001-02-23 17:55:21 +00001509 NEXT;
1510 count++;
1511 }
1512 if (RAW == ';') {
1513 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001514 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001515 ctxt->nbChars ++;
1516 ctxt->input->cur++;
1517 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001519 SKIP(2);
1520 GROW;
1521 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001522 if (count++ > 20) {
1523 count = 0;
1524 GROW;
1525 }
1526 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001527 val = val * 10 + (CUR - '0');
1528 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001529 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001530 val = 0;
1531 break;
1532 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001533 if (val > 0x10FFFF)
1534 outofrange = val;
1535
Owen Taylor3473f882001-02-23 17:55:21 +00001536 NEXT;
1537 count++;
1538 }
1539 if (RAW == ';') {
1540 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001541 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001542 ctxt->nbChars ++;
1543 ctxt->input->cur++;
1544 }
1545 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001546 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001547 }
1548
1549 /*
1550 * [ WFC: Legal Character ]
1551 * Characters referred to using character references must match the
1552 * production for Char.
1553 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001554 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001555 return(val);
1556 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001557 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1558 "xmlParseCharRef: invalid xmlChar value %d\n",
1559 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001560 }
1561 return(0);
1562}
1563
1564/**
1565 * xmlParseStringCharRef:
1566 * @ctxt: an XML parser context
1567 * @str: a pointer to an index in the string
1568 *
1569 * parse Reference declarations, variant parsing from a string rather
1570 * than an an input flow.
1571 *
1572 * [66] CharRef ::= '&#' [0-9]+ ';' |
1573 * '&#x' [0-9a-fA-F]+ ';'
1574 *
1575 * [ WFC: Legal Character ]
1576 * Characters referred to using character references must match the
1577 * production for Char.
1578 *
1579 * Returns the value parsed (as an int), 0 in case of error, str will be
1580 * updated to the current value of the index
1581 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001582static int
Owen Taylor3473f882001-02-23 17:55:21 +00001583xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1584 const xmlChar *ptr;
1585 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001586 unsigned int val = 0;
1587 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001588
1589 if ((str == NULL) || (*str == NULL)) return(0);
1590 ptr = *str;
1591 cur = *ptr;
1592 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1593 ptr += 3;
1594 cur = *ptr;
1595 while (cur != ';') { /* Non input consuming loop */
1596 if ((cur >= '0') && (cur <= '9'))
1597 val = val * 16 + (cur - '0');
1598 else if ((cur >= 'a') && (cur <= 'f'))
1599 val = val * 16 + (cur - 'a') + 10;
1600 else if ((cur >= 'A') && (cur <= 'F'))
1601 val = val * 16 + (cur - 'A') + 10;
1602 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001603 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001604 val = 0;
1605 break;
1606 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001607 if (val > 0x10FFFF)
1608 outofrange = val;
1609
Owen Taylor3473f882001-02-23 17:55:21 +00001610 ptr++;
1611 cur = *ptr;
1612 }
1613 if (cur == ';')
1614 ptr++;
1615 } else if ((cur == '&') && (ptr[1] == '#')){
1616 ptr += 2;
1617 cur = *ptr;
1618 while (cur != ';') { /* Non input consuming loops */
1619 if ((cur >= '0') && (cur <= '9'))
1620 val = val * 10 + (cur - '0');
1621 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001622 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001623 val = 0;
1624 break;
1625 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001626 if (val > 0x10FFFF)
1627 outofrange = val;
1628
Owen Taylor3473f882001-02-23 17:55:21 +00001629 ptr++;
1630 cur = *ptr;
1631 }
1632 if (cur == ';')
1633 ptr++;
1634 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001635 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001636 return(0);
1637 }
1638 *str = ptr;
1639
1640 /*
1641 * [ WFC: Legal Character ]
1642 * Characters referred to using character references must match the
1643 * production for Char.
1644 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001645 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001646 return(val);
1647 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001648 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1649 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1650 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001651 }
1652 return(0);
1653}
1654
1655/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001656 * xmlNewBlanksWrapperInputStream:
1657 * @ctxt: an XML parser context
1658 * @entity: an Entity pointer
1659 *
1660 * Create a new input stream for wrapping
1661 * blanks around a PEReference
1662 *
1663 * Returns the new input stream or NULL
1664 */
1665
1666static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1667
Daniel Veillardf4862f02002-09-10 11:13:43 +00001668static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001669xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1670 xmlParserInputPtr input;
1671 xmlChar *buffer;
1672 size_t length;
1673 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001674 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1675 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001676 return(NULL);
1677 }
1678 if (xmlParserDebugEntities)
1679 xmlGenericError(xmlGenericErrorContext,
1680 "new blanks wrapper for entity: %s\n", entity->name);
1681 input = xmlNewInputStream(ctxt);
1682 if (input == NULL) {
1683 return(NULL);
1684 }
1685 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001686 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001687 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001688 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001689 return(NULL);
1690 }
1691 buffer [0] = ' ';
1692 buffer [1] = '%';
1693 buffer [length-3] = ';';
1694 buffer [length-2] = ' ';
1695 buffer [length-1] = 0;
1696 memcpy(buffer + 2, entity->name, length - 5);
1697 input->free = deallocblankswrapper;
1698 input->base = buffer;
1699 input->cur = buffer;
1700 input->length = length;
1701 input->end = &buffer[length];
1702 return(input);
1703}
1704
1705/**
Owen Taylor3473f882001-02-23 17:55:21 +00001706 * xmlParserHandlePEReference:
1707 * @ctxt: the parser context
1708 *
1709 * [69] PEReference ::= '%' Name ';'
1710 *
1711 * [ WFC: No Recursion ]
1712 * A parsed entity must not contain a recursive
1713 * reference to itself, either directly or indirectly.
1714 *
1715 * [ WFC: Entity Declared ]
1716 * In a document without any DTD, a document with only an internal DTD
1717 * subset which contains no parameter entity references, or a document
1718 * with "standalone='yes'", ... ... The declaration of a parameter
1719 * entity must precede any reference to it...
1720 *
1721 * [ VC: Entity Declared ]
1722 * In a document with an external subset or external parameter entities
1723 * with "standalone='no'", ... ... The declaration of a parameter entity
1724 * must precede any reference to it...
1725 *
1726 * [ WFC: In DTD ]
1727 * Parameter-entity references may only appear in the DTD.
1728 * NOTE: misleading but this is handled.
1729 *
1730 * A PEReference may have been detected in the current input stream
1731 * the handling is done accordingly to
1732 * http://www.w3.org/TR/REC-xml#entproc
1733 * i.e.
1734 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001735 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001736 */
1737void
1738xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001739 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001740 xmlEntityPtr entity = NULL;
1741 xmlParserInputPtr input;
1742
Owen Taylor3473f882001-02-23 17:55:21 +00001743 if (RAW != '%') return;
1744 switch(ctxt->instate) {
1745 case XML_PARSER_CDATA_SECTION:
1746 return;
1747 case XML_PARSER_COMMENT:
1748 return;
1749 case XML_PARSER_START_TAG:
1750 return;
1751 case XML_PARSER_END_TAG:
1752 return;
1753 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001754 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001755 return;
1756 case XML_PARSER_PROLOG:
1757 case XML_PARSER_START:
1758 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001759 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001760 return;
1761 case XML_PARSER_ENTITY_DECL:
1762 case XML_PARSER_CONTENT:
1763 case XML_PARSER_ATTRIBUTE_VALUE:
1764 case XML_PARSER_PI:
1765 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001766 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001767 /* we just ignore it there */
1768 return;
1769 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001770 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001771 return;
1772 case XML_PARSER_ENTITY_VALUE:
1773 /*
1774 * NOTE: in the case of entity values, we don't do the
1775 * substitution here since we need the literal
1776 * entity value to be able to save the internal
1777 * subset of the document.
1778 * This will be handled by xmlStringDecodeEntities
1779 */
1780 return;
1781 case XML_PARSER_DTD:
1782 /*
1783 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1784 * In the internal DTD subset, parameter-entity references
1785 * can occur only where markup declarations can occur, not
1786 * within markup declarations.
1787 * In that case this is handled in xmlParseMarkupDecl
1788 */
1789 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1790 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001791 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001792 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001793 break;
1794 case XML_PARSER_IGNORE:
1795 return;
1796 }
1797
1798 NEXT;
1799 name = xmlParseName(ctxt);
1800 if (xmlParserDebugEntities)
1801 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001802 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001803 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001804 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001805 } else {
1806 if (RAW == ';') {
1807 NEXT;
1808 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1809 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1810 if (entity == NULL) {
1811
1812 /*
1813 * [ WFC: Entity Declared ]
1814 * In a document without any DTD, a document with only an
1815 * internal DTD subset which contains no parameter entity
1816 * references, or a document with "standalone='yes'", ...
1817 * ... The declaration of a parameter entity must precede
1818 * any reference to it...
1819 */
1820 if ((ctxt->standalone == 1) ||
1821 ((ctxt->hasExternalSubset == 0) &&
1822 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001823 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001824 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001825 } else {
1826 /*
1827 * [ VC: Entity Declared ]
1828 * In a document with an external subset or external
1829 * parameter entities with "standalone='no'", ...
1830 * ... The declaration of a parameter entity must precede
1831 * any reference to it...
1832 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001833 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1834 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1835 "PEReference: %%%s; not found\n",
1836 name);
1837 } else
1838 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1839 "PEReference: %%%s; not found\n",
1840 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001841 ctxt->valid = 0;
1842 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001843 } else if (ctxt->input->free != deallocblankswrapper) {
1844 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1845 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001846 } else {
1847 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1848 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001849 xmlChar start[4];
1850 xmlCharEncoding enc;
1851
Owen Taylor3473f882001-02-23 17:55:21 +00001852 /*
1853 * handle the extra spaces added before and after
1854 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001855 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001856 */
1857 input = xmlNewEntityInputStream(ctxt, entity);
1858 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001859
1860 /*
1861 * Get the 4 first bytes and decode the charset
1862 * if enc != XML_CHAR_ENCODING_NONE
1863 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001864 * Note that, since we may have some non-UTF8
1865 * encoding (like UTF16, bug 135229), the 'length'
1866 * is not known, but we can calculate based upon
1867 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001868 */
1869 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001870 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001871 start[0] = RAW;
1872 start[1] = NXT(1);
1873 start[2] = NXT(2);
1874 start[3] = NXT(3);
1875 enc = xmlDetectCharEncoding(start, 4);
1876 if (enc != XML_CHAR_ENCODING_NONE) {
1877 xmlSwitchEncoding(ctxt, enc);
1878 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001879 }
1880
Owen Taylor3473f882001-02-23 17:55:21 +00001881 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001882 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1883 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 xmlParseTextDecl(ctxt);
1885 }
Owen Taylor3473f882001-02-23 17:55:21 +00001886 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001887 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1888 "PEReference: %s is not a parameter entity\n",
1889 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001890 }
1891 }
1892 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001893 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001894 }
Owen Taylor3473f882001-02-23 17:55:21 +00001895 }
1896}
1897
1898/*
1899 * Macro used to grow the current buffer.
1900 */
1901#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001902 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001903 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001904 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001905 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001906 if (tmp == NULL) goto mem_error; \
1907 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001908}
1909
1910/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001911 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * @ctxt: the parser context
1913 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001914 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001915 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1916 * @end: an end marker xmlChar, 0 if none
1917 * @end2: an end marker xmlChar, 0 if none
1918 * @end3: an end marker xmlChar, 0 if none
1919 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001920 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001921 *
1922 * [67] Reference ::= EntityRef | CharRef
1923 *
1924 * [69] PEReference ::= '%' Name ';'
1925 *
1926 * Returns A newly allocated string with the substitution done. The caller
1927 * must deallocate it !
1928 */
1929xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001930xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1931 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 xmlChar *buffer = NULL;
1933 int buffer_size = 0;
1934
1935 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001936 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001937 xmlEntityPtr ent;
1938 int c,l;
1939 int nbchars = 0;
1940
Daniel Veillarda82b1822004-11-08 16:24:57 +00001941 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001942 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001943 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001944
1945 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001946 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001947 return(NULL);
1948 }
1949
1950 /*
1951 * allocate a translation buffer.
1952 */
1953 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001954 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001955 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001956
1957 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001958 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001959 * we are operating on already parsed values.
1960 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001961 if (str < last)
1962 c = CUR_SCHAR(str, l);
1963 else
1964 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001965 while ((c != 0) && (c != end) && /* non input consuming loop */
1966 (c != end2) && (c != end3)) {
1967
1968 if (c == 0) break;
1969 if ((c == '&') && (str[1] == '#')) {
1970 int val = xmlParseStringCharRef(ctxt, &str);
1971 if (val != 0) {
1972 COPY_BUF(0,buffer,nbchars,val);
1973 }
1974 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1975 if (xmlParserDebugEntities)
1976 xmlGenericError(xmlGenericErrorContext,
1977 "String decoding Entity Reference: %.30s\n",
1978 str);
1979 ent = xmlParseStringEntityRef(ctxt, &str);
1980 if ((ent != NULL) &&
1981 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1982 if (ent->content != NULL) {
1983 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1984 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001985 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1986 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001987 }
1988 } else if ((ent != NULL) && (ent->content != NULL)) {
1989 xmlChar *rep;
1990
1991 ctxt->depth++;
1992 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1993 0, 0, 0);
1994 ctxt->depth--;
1995 if (rep != NULL) {
1996 current = rep;
1997 while (*current != 0) { /* non input consuming loop */
1998 buffer[nbchars++] = *current++;
1999 if (nbchars >
2000 buffer_size - XML_PARSER_BUFFER_SIZE) {
2001 growBuffer(buffer);
2002 }
2003 }
2004 xmlFree(rep);
2005 }
2006 } else if (ent != NULL) {
2007 int i = xmlStrlen(ent->name);
2008 const xmlChar *cur = ent->name;
2009
2010 buffer[nbchars++] = '&';
2011 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2012 growBuffer(buffer);
2013 }
2014 for (;i > 0;i--)
2015 buffer[nbchars++] = *cur++;
2016 buffer[nbchars++] = ';';
2017 }
2018 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2019 if (xmlParserDebugEntities)
2020 xmlGenericError(xmlGenericErrorContext,
2021 "String decoding PE Reference: %.30s\n", str);
2022 ent = xmlParseStringPEReference(ctxt, &str);
2023 if (ent != NULL) {
2024 xmlChar *rep;
2025
2026 ctxt->depth++;
2027 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2028 0, 0, 0);
2029 ctxt->depth--;
2030 if (rep != NULL) {
2031 current = rep;
2032 while (*current != 0) { /* non input consuming loop */
2033 buffer[nbchars++] = *current++;
2034 if (nbchars >
2035 buffer_size - XML_PARSER_BUFFER_SIZE) {
2036 growBuffer(buffer);
2037 }
2038 }
2039 xmlFree(rep);
2040 }
2041 }
2042 } else {
2043 COPY_BUF(l,buffer,nbchars,c);
2044 str += l;
2045 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2046 growBuffer(buffer);
2047 }
2048 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002049 if (str < last)
2050 c = CUR_SCHAR(str, l);
2051 else
2052 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002053 }
2054 buffer[nbchars++] = 0;
2055 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002056
2057mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002058 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002059 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002060}
2061
Daniel Veillarde57ec792003-09-10 10:50:59 +00002062/**
2063 * xmlStringDecodeEntities:
2064 * @ctxt: the parser context
2065 * @str: the input string
2066 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2067 * @end: an end marker xmlChar, 0 if none
2068 * @end2: an end marker xmlChar, 0 if none
2069 * @end3: an end marker xmlChar, 0 if none
2070 *
2071 * Takes a entity string content and process to do the adequate substitutions.
2072 *
2073 * [67] Reference ::= EntityRef | CharRef
2074 *
2075 * [69] PEReference ::= '%' Name ';'
2076 *
2077 * Returns A newly allocated string with the substitution done. The caller
2078 * must deallocate it !
2079 */
2080xmlChar *
2081xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2082 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002083 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002084 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2085 end, end2, end3));
2086}
Owen Taylor3473f882001-02-23 17:55:21 +00002087
2088/************************************************************************
2089 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002090 * Commodity functions, cleanup needed ? *
2091 * *
2092 ************************************************************************/
2093
2094/**
2095 * areBlanks:
2096 * @ctxt: an XML parser context
2097 * @str: a xmlChar *
2098 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002099 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002100 *
2101 * Is this a sequence of blank chars that one can ignore ?
2102 *
2103 * Returns 1 if ignorable 0 otherwise.
2104 */
2105
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002106static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2107 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002108 int i, ret;
2109 xmlNodePtr lastChild;
2110
Daniel Veillard05c13a22001-09-09 08:38:09 +00002111 /*
2112 * Don't spend time trying to differentiate them, the same callback is
2113 * used !
2114 */
2115 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002116 return(0);
2117
Owen Taylor3473f882001-02-23 17:55:21 +00002118 /*
2119 * Check for xml:space value.
2120 */
2121 if (*(ctxt->space) == 1)
2122 return(0);
2123
2124 /*
2125 * Check that the string is made of blanks
2126 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002127 if (blank_chars == 0) {
2128 for (i = 0;i < len;i++)
2129 if (!(IS_BLANK_CH(str[i]))) return(0);
2130 }
Owen Taylor3473f882001-02-23 17:55:21 +00002131
2132 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002133 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002134 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002135 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002136 if (ctxt->myDoc != NULL) {
2137 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2138 if (ret == 0) return(1);
2139 if (ret == 1) return(0);
2140 }
2141
2142 /*
2143 * Otherwise, heuristic :-\
2144 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002145 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002146 if ((ctxt->node->children == NULL) &&
2147 (RAW == '<') && (NXT(1) == '/')) return(0);
2148
2149 lastChild = xmlGetLastChild(ctxt->node);
2150 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002151 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2152 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 } else if (xmlNodeIsText(lastChild))
2154 return(0);
2155 else if ((ctxt->node->children != NULL) &&
2156 (xmlNodeIsText(ctxt->node->children)))
2157 return(0);
2158 return(1);
2159}
2160
Owen Taylor3473f882001-02-23 17:55:21 +00002161/************************************************************************
2162 * *
2163 * Extra stuff for namespace support *
2164 * Relates to http://www.w3.org/TR/WD-xml-names *
2165 * *
2166 ************************************************************************/
2167
2168/**
2169 * xmlSplitQName:
2170 * @ctxt: an XML parser context
2171 * @name: an XML parser context
2172 * @prefix: a xmlChar **
2173 *
2174 * parse an UTF8 encoded XML qualified name string
2175 *
2176 * [NS 5] QName ::= (Prefix ':')? LocalPart
2177 *
2178 * [NS 6] Prefix ::= NCName
2179 *
2180 * [NS 7] LocalPart ::= NCName
2181 *
2182 * Returns the local part, and prefix is updated
2183 * to get the Prefix if any.
2184 */
2185
2186xmlChar *
2187xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2188 xmlChar buf[XML_MAX_NAMELEN + 5];
2189 xmlChar *buffer = NULL;
2190 int len = 0;
2191 int max = XML_MAX_NAMELEN;
2192 xmlChar *ret = NULL;
2193 const xmlChar *cur = name;
2194 int c;
2195
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002196 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002197 *prefix = NULL;
2198
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002199 if (cur == NULL) return(NULL);
2200
Owen Taylor3473f882001-02-23 17:55:21 +00002201#ifndef XML_XML_NAMESPACE
2202 /* xml: prefix is not really a namespace */
2203 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2204 (cur[2] == 'l') && (cur[3] == ':'))
2205 return(xmlStrdup(name));
2206#endif
2207
Daniel Veillard597bc482003-07-24 16:08:28 +00002208 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002209 if (cur[0] == ':')
2210 return(xmlStrdup(name));
2211
2212 c = *cur++;
2213 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2214 buf[len++] = c;
2215 c = *cur++;
2216 }
2217 if (len >= max) {
2218 /*
2219 * Okay someone managed to make a huge name, so he's ready to pay
2220 * for the processing speed.
2221 */
2222 max = len * 2;
2223
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002224 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002225 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002226 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002227 return(NULL);
2228 }
2229 memcpy(buffer, buf, len);
2230 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2231 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002232 xmlChar *tmp;
2233
Owen Taylor3473f882001-02-23 17:55:21 +00002234 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002235 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002236 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002237 if (tmp == NULL) {
2238 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002239 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002240 return(NULL);
2241 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002242 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002243 }
2244 buffer[len++] = c;
2245 c = *cur++;
2246 }
2247 buffer[len] = 0;
2248 }
2249
Daniel Veillard597bc482003-07-24 16:08:28 +00002250 /* nasty but well=formed
2251 if ((c == ':') && (*cur == 0)) {
2252 return(xmlStrdup(name));
2253 } */
2254
Owen Taylor3473f882001-02-23 17:55:21 +00002255 if (buffer == NULL)
2256 ret = xmlStrndup(buf, len);
2257 else {
2258 ret = buffer;
2259 buffer = NULL;
2260 max = XML_MAX_NAMELEN;
2261 }
2262
2263
2264 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002265 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002266 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002267 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002268 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 len = 0;
2271
Daniel Veillardbb284f42002-10-16 18:02:47 +00002272 /*
2273 * Check that the first character is proper to start
2274 * a new name
2275 */
2276 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2277 ((c >= 0x41) && (c <= 0x5A)) ||
2278 (c == '_') || (c == ':'))) {
2279 int l;
2280 int first = CUR_SCHAR(cur, l);
2281
2282 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002283 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002284 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002285 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002286 }
2287 }
2288 cur++;
2289
Owen Taylor3473f882001-02-23 17:55:21 +00002290 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2291 buf[len++] = c;
2292 c = *cur++;
2293 }
2294 if (len >= max) {
2295 /*
2296 * Okay someone managed to make a huge name, so he's ready to pay
2297 * for the processing speed.
2298 */
2299 max = len * 2;
2300
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002301 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002302 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002303 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304 return(NULL);
2305 }
2306 memcpy(buffer, buf, len);
2307 while (c != 0) { /* tested bigname2.xml */
2308 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002309 xmlChar *tmp;
2310
Owen Taylor3473f882001-02-23 17:55:21 +00002311 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002312 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002313 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002314 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002315 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002316 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002317 return(NULL);
2318 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002319 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002320 }
2321 buffer[len++] = c;
2322 c = *cur++;
2323 }
2324 buffer[len] = 0;
2325 }
2326
2327 if (buffer == NULL)
2328 ret = xmlStrndup(buf, len);
2329 else {
2330 ret = buffer;
2331 }
2332 }
2333
2334 return(ret);
2335}
2336
2337/************************************************************************
2338 * *
2339 * The parser itself *
2340 * Relates to http://www.w3.org/TR/REC-xml *
2341 * *
2342 ************************************************************************/
2343
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002344static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002345static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002346 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002347
Owen Taylor3473f882001-02-23 17:55:21 +00002348/**
2349 * xmlParseName:
2350 * @ctxt: an XML parser context
2351 *
2352 * parse an XML name.
2353 *
2354 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2355 * CombiningChar | Extender
2356 *
2357 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2358 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002359 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002360 *
2361 * Returns the Name parsed or NULL
2362 */
2363
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002364const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002365xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002366 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002367 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002368 int count = 0;
2369
2370 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002371
2372 /*
2373 * Accelerator for simple ASCII names
2374 */
2375 in = ctxt->input->cur;
2376 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2377 ((*in >= 0x41) && (*in <= 0x5A)) ||
2378 (*in == '_') || (*in == ':')) {
2379 in++;
2380 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2381 ((*in >= 0x41) && (*in <= 0x5A)) ||
2382 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002383 (*in == '_') || (*in == '-') ||
2384 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002385 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002386 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002387 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002388 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002389 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002390 ctxt->nbChars += count;
2391 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002392 if (ret == NULL)
2393 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002394 return(ret);
2395 }
2396 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002397 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002398}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002399
Daniel Veillard46de64e2002-05-29 08:21:33 +00002400/**
2401 * xmlParseNameAndCompare:
2402 * @ctxt: an XML parser context
2403 *
2404 * parse an XML name and compares for match
2405 * (specialized for endtag parsing)
2406 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002407 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2408 * and the name for mismatch
2409 */
2410
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002411static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002412xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002413 register const xmlChar *cmp = other;
2414 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002415 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002416
2417 GROW;
2418
2419 in = ctxt->input->cur;
2420 while (*in != 0 && *in == *cmp) {
2421 ++in;
2422 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002423 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002424 }
William M. Brack76e95df2003-10-18 16:20:14 +00002425 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002426 /* success */
2427 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002428 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002429 }
2430 /* failure (or end of input buffer), check with full function */
2431 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002432 /* strings coming from the dictionnary direct compare possible */
2433 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002434 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002435 }
2436 return ret;
2437}
2438
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002439static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002440xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002441 int len = 0, l;
2442 int c;
2443 int count = 0;
2444
2445 /*
2446 * Handler for more complex cases
2447 */
2448 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002449 c = CUR_CHAR(l);
2450 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2451 (!IS_LETTER(c) && (c != '_') &&
2452 (c != ':'))) {
2453 return(NULL);
2454 }
2455
2456 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002457 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002458 (c == '.') || (c == '-') ||
2459 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002460 (IS_COMBINING(c)) ||
2461 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002462 if (count++ > 100) {
2463 count = 0;
2464 GROW;
2465 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002466 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002467 NEXTL(l);
2468 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002469 }
Daniel Veillard96688262005-08-23 18:14:12 +00002470 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2471 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002472 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002473}
2474
2475/**
2476 * xmlParseStringName:
2477 * @ctxt: an XML parser context
2478 * @str: a pointer to the string pointer (IN/OUT)
2479 *
2480 * parse an XML name.
2481 *
2482 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2483 * CombiningChar | Extender
2484 *
2485 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2486 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002487 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002488 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002489 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002490 * is updated to the current location in the string.
2491 */
2492
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002493static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002494xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2495 xmlChar buf[XML_MAX_NAMELEN + 5];
2496 const xmlChar *cur = *str;
2497 int len = 0, l;
2498 int c;
2499
2500 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002501 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002502 (c != ':')) {
2503 return(NULL);
2504 }
2505
William M. Brack871611b2003-10-18 04:53:14 +00002506 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002507 (c == '.') || (c == '-') ||
2508 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002509 (IS_COMBINING(c)) ||
2510 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002511 COPY_BUF(l,buf,len,c);
2512 cur += l;
2513 c = CUR_SCHAR(cur, l);
2514 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2515 /*
2516 * Okay someone managed to make a huge name, so he's ready to pay
2517 * for the processing speed.
2518 */
2519 xmlChar *buffer;
2520 int max = len * 2;
2521
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002522 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002523 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002524 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002525 return(NULL);
2526 }
2527 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002528 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002529 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002530 (c == '.') || (c == '-') ||
2531 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002532 (IS_COMBINING(c)) ||
2533 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002534 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002535 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002536 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002537 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002538 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002539 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002540 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002541 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002542 return(NULL);
2543 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002544 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002545 }
2546 COPY_BUF(l,buffer,len,c);
2547 cur += l;
2548 c = CUR_SCHAR(cur, l);
2549 }
2550 buffer[len] = 0;
2551 *str = cur;
2552 return(buffer);
2553 }
2554 }
2555 *str = cur;
2556 return(xmlStrndup(buf, len));
2557}
2558
2559/**
2560 * xmlParseNmtoken:
2561 * @ctxt: an XML parser context
2562 *
2563 * parse an XML Nmtoken.
2564 *
2565 * [7] Nmtoken ::= (NameChar)+
2566 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002567 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002568 *
2569 * Returns the Nmtoken parsed or NULL
2570 */
2571
2572xmlChar *
2573xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2574 xmlChar buf[XML_MAX_NAMELEN + 5];
2575 int len = 0, l;
2576 int c;
2577 int count = 0;
2578
2579 GROW;
2580 c = CUR_CHAR(l);
2581
William M. Brack871611b2003-10-18 04:53:14 +00002582 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002583 (c == '.') || (c == '-') ||
2584 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002585 (IS_COMBINING(c)) ||
2586 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002587 if (count++ > 100) {
2588 count = 0;
2589 GROW;
2590 }
2591 COPY_BUF(l,buf,len,c);
2592 NEXTL(l);
2593 c = CUR_CHAR(l);
2594 if (len >= XML_MAX_NAMELEN) {
2595 /*
2596 * Okay someone managed to make a huge token, so he's ready to pay
2597 * for the processing speed.
2598 */
2599 xmlChar *buffer;
2600 int max = len * 2;
2601
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002602 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002603 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002605 return(NULL);
2606 }
2607 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002608 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002609 (c == '.') || (c == '-') ||
2610 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002611 (IS_COMBINING(c)) ||
2612 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002613 if (count++ > 100) {
2614 count = 0;
2615 GROW;
2616 }
2617 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002618 xmlChar *tmp;
2619
Owen Taylor3473f882001-02-23 17:55:21 +00002620 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002621 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002622 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002623 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002624 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002625 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002626 return(NULL);
2627 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002628 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002629 }
2630 COPY_BUF(l,buffer,len,c);
2631 NEXTL(l);
2632 c = CUR_CHAR(l);
2633 }
2634 buffer[len] = 0;
2635 return(buffer);
2636 }
2637 }
2638 if (len == 0)
2639 return(NULL);
2640 return(xmlStrndup(buf, len));
2641}
2642
2643/**
2644 * xmlParseEntityValue:
2645 * @ctxt: an XML parser context
2646 * @orig: if non-NULL store a copy of the original entity value
2647 *
2648 * parse a value for ENTITY declarations
2649 *
2650 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2651 * "'" ([^%&'] | PEReference | Reference)* "'"
2652 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002653 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002654 */
2655
2656xmlChar *
2657xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2658 xmlChar *buf = NULL;
2659 int len = 0;
2660 int size = XML_PARSER_BUFFER_SIZE;
2661 int c, l;
2662 xmlChar stop;
2663 xmlChar *ret = NULL;
2664 const xmlChar *cur = NULL;
2665 xmlParserInputPtr input;
2666
2667 if (RAW == '"') stop = '"';
2668 else if (RAW == '\'') stop = '\'';
2669 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002670 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002671 return(NULL);
2672 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002673 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002674 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002675 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002676 return(NULL);
2677 }
2678
2679 /*
2680 * The content of the entity definition is copied in a buffer.
2681 */
2682
2683 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2684 input = ctxt->input;
2685 GROW;
2686 NEXT;
2687 c = CUR_CHAR(l);
2688 /*
2689 * NOTE: 4.4.5 Included in Literal
2690 * When a parameter entity reference appears in a literal entity
2691 * value, ... a single or double quote character in the replacement
2692 * text is always treated as a normal data character and will not
2693 * terminate the literal.
2694 * In practice it means we stop the loop only when back at parsing
2695 * the initial entity and the quote is found
2696 */
William M. Brack871611b2003-10-18 04:53:14 +00002697 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002698 (ctxt->input != input))) {
2699 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002700 xmlChar *tmp;
2701
Owen Taylor3473f882001-02-23 17:55:21 +00002702 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002703 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2704 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002705 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002706 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002707 return(NULL);
2708 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002709 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002710 }
2711 COPY_BUF(l,buf,len,c);
2712 NEXTL(l);
2713 /*
2714 * Pop-up of finished entities.
2715 */
2716 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2717 xmlPopInput(ctxt);
2718
2719 GROW;
2720 c = CUR_CHAR(l);
2721 if (c == 0) {
2722 GROW;
2723 c = CUR_CHAR(l);
2724 }
2725 }
2726 buf[len] = 0;
2727
2728 /*
2729 * Raise problem w.r.t. '&' and '%' being used in non-entities
2730 * reference constructs. Note Charref will be handled in
2731 * xmlStringDecodeEntities()
2732 */
2733 cur = buf;
2734 while (*cur != 0) { /* non input consuming */
2735 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2736 xmlChar *name;
2737 xmlChar tmp = *cur;
2738
2739 cur++;
2740 name = xmlParseStringName(ctxt, &cur);
2741 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002742 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002743 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002744 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002745 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002746 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2747 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002748 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002749 }
2750 if (name != NULL)
2751 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002752 if (*cur == 0)
2753 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002754 }
2755 cur++;
2756 }
2757
2758 /*
2759 * Then PEReference entities are substituted.
2760 */
2761 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002762 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002763 xmlFree(buf);
2764 } else {
2765 NEXT;
2766 /*
2767 * NOTE: 4.4.7 Bypassed
2768 * When a general entity reference appears in the EntityValue in
2769 * an entity declaration, it is bypassed and left as is.
2770 * so XML_SUBSTITUTE_REF is not set here.
2771 */
2772 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2773 0, 0, 0);
2774 if (orig != NULL)
2775 *orig = buf;
2776 else
2777 xmlFree(buf);
2778 }
2779
2780 return(ret);
2781}
2782
2783/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002784 * xmlParseAttValueComplex:
2785 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002786 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002787 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002788 *
2789 * parse a value for an attribute, this is the fallback function
2790 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002791 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002792 *
2793 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2794 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002795static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002796xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002797 xmlChar limit = 0;
2798 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002799 int len = 0;
2800 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002801 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002802 xmlChar *current = NULL;
2803 xmlEntityPtr ent;
2804
Owen Taylor3473f882001-02-23 17:55:21 +00002805 if (NXT(0) == '"') {
2806 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2807 limit = '"';
2808 NEXT;
2809 } else if (NXT(0) == '\'') {
2810 limit = '\'';
2811 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2812 NEXT;
2813 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002814 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002815 return(NULL);
2816 }
2817
2818 /*
2819 * allocate a translation buffer.
2820 */
2821 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002822 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002823 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002824
2825 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002826 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002827 */
2828 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002829 while ((NXT(0) != limit) && /* checked */
2830 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002831 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002832 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002833 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002834 if (NXT(1) == '#') {
2835 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002836
Owen Taylor3473f882001-02-23 17:55:21 +00002837 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002838 if (ctxt->replaceEntities) {
2839 if (len > buf_size - 10) {
2840 growBuffer(buf);
2841 }
2842 buf[len++] = '&';
2843 } else {
2844 /*
2845 * The reparsing will be done in xmlStringGetNodeList()
2846 * called by the attribute() function in SAX.c
2847 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002848 if (len > buf_size - 10) {
2849 growBuffer(buf);
2850 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002851 buf[len++] = '&';
2852 buf[len++] = '#';
2853 buf[len++] = '3';
2854 buf[len++] = '8';
2855 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002856 }
2857 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002858 if (len > buf_size - 10) {
2859 growBuffer(buf);
2860 }
Owen Taylor3473f882001-02-23 17:55:21 +00002861 len += xmlCopyChar(0, &buf[len], val);
2862 }
2863 } else {
2864 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002865 if ((ent != NULL) &&
2866 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2867 if (len > buf_size - 10) {
2868 growBuffer(buf);
2869 }
2870 if ((ctxt->replaceEntities == 0) &&
2871 (ent->content[0] == '&')) {
2872 buf[len++] = '&';
2873 buf[len++] = '#';
2874 buf[len++] = '3';
2875 buf[len++] = '8';
2876 buf[len++] = ';';
2877 } else {
2878 buf[len++] = ent->content[0];
2879 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002880 } else if ((ent != NULL) &&
2881 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002882 xmlChar *rep;
2883
2884 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2885 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002886 XML_SUBSTITUTE_REF,
2887 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 if (rep != NULL) {
2889 current = rep;
2890 while (*current != 0) { /* non input consuming */
2891 buf[len++] = *current++;
2892 if (len > buf_size - 10) {
2893 growBuffer(buf);
2894 }
2895 }
2896 xmlFree(rep);
2897 }
2898 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002899 if (len > buf_size - 10) {
2900 growBuffer(buf);
2901 }
Owen Taylor3473f882001-02-23 17:55:21 +00002902 if (ent->content != NULL)
2903 buf[len++] = ent->content[0];
2904 }
2905 } else if (ent != NULL) {
2906 int i = xmlStrlen(ent->name);
2907 const xmlChar *cur = ent->name;
2908
2909 /*
2910 * This may look absurd but is needed to detect
2911 * entities problems
2912 */
2913 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2914 (ent->content != NULL)) {
2915 xmlChar *rep;
2916 rep = xmlStringDecodeEntities(ctxt, ent->content,
2917 XML_SUBSTITUTE_REF, 0, 0, 0);
2918 if (rep != NULL)
2919 xmlFree(rep);
2920 }
2921
2922 /*
2923 * Just output the reference
2924 */
2925 buf[len++] = '&';
2926 if (len > buf_size - i - 10) {
2927 growBuffer(buf);
2928 }
2929 for (;i > 0;i--)
2930 buf[len++] = *cur++;
2931 buf[len++] = ';';
2932 }
2933 }
2934 } else {
2935 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002936 if ((len != 0) || (!normalize)) {
2937 if ((!normalize) || (!in_space)) {
2938 COPY_BUF(l,buf,len,0x20);
2939 if (len > buf_size - 10) {
2940 growBuffer(buf);
2941 }
2942 }
2943 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002944 }
2945 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002946 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002947 COPY_BUF(l,buf,len,c);
2948 if (len > buf_size - 10) {
2949 growBuffer(buf);
2950 }
2951 }
2952 NEXTL(l);
2953 }
2954 GROW;
2955 c = CUR_CHAR(l);
2956 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002957 if ((in_space) && (normalize)) {
2958 while (buf[len - 1] == 0x20) len--;
2959 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002960 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002961 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002962 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002963 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002964 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2965 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002966 } else
2967 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002968 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002969 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002970
2971mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002972 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002973 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002974}
2975
2976/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002977 * xmlParseAttValue:
2978 * @ctxt: an XML parser context
2979 *
2980 * parse a value for an attribute
2981 * Note: the parser won't do substitution of entities here, this
2982 * will be handled later in xmlStringGetNodeList
2983 *
2984 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2985 * "'" ([^<&'] | Reference)* "'"
2986 *
2987 * 3.3.3 Attribute-Value Normalization:
2988 * Before the value of an attribute is passed to the application or
2989 * checked for validity, the XML processor must normalize it as follows:
2990 * - a character reference is processed by appending the referenced
2991 * character to the attribute value
2992 * - an entity reference is processed by recursively processing the
2993 * replacement text of the entity
2994 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2995 * appending #x20 to the normalized value, except that only a single
2996 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2997 * parsed entity or the literal entity value of an internal parsed entity
2998 * - other characters are processed by appending them to the normalized value
2999 * If the declared value is not CDATA, then the XML processor must further
3000 * process the normalized attribute value by discarding any leading and
3001 * trailing space (#x20) characters, and by replacing sequences of space
3002 * (#x20) characters by a single space (#x20) character.
3003 * All attributes for which no declaration has been read should be treated
3004 * by a non-validating parser as if declared CDATA.
3005 *
3006 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3007 */
3008
3009
3010xmlChar *
3011xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003012 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003013 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003014}
3015
3016/**
Owen Taylor3473f882001-02-23 17:55:21 +00003017 * xmlParseSystemLiteral:
3018 * @ctxt: an XML parser context
3019 *
3020 * parse an XML Literal
3021 *
3022 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3023 *
3024 * Returns the SystemLiteral parsed or NULL
3025 */
3026
3027xmlChar *
3028xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3029 xmlChar *buf = NULL;
3030 int len = 0;
3031 int size = XML_PARSER_BUFFER_SIZE;
3032 int cur, l;
3033 xmlChar stop;
3034 int state = ctxt->instate;
3035 int count = 0;
3036
3037 SHRINK;
3038 if (RAW == '"') {
3039 NEXT;
3040 stop = '"';
3041 } else if (RAW == '\'') {
3042 NEXT;
3043 stop = '\'';
3044 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003045 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003046 return(NULL);
3047 }
3048
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003049 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003050 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003051 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003052 return(NULL);
3053 }
3054 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3055 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003056 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003057 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003058 xmlChar *tmp;
3059
Owen Taylor3473f882001-02-23 17:55:21 +00003060 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003061 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3062 if (tmp == NULL) {
3063 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003064 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003065 ctxt->instate = (xmlParserInputState) state;
3066 return(NULL);
3067 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003068 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003069 }
3070 count++;
3071 if (count > 50) {
3072 GROW;
3073 count = 0;
3074 }
3075 COPY_BUF(l,buf,len,cur);
3076 NEXTL(l);
3077 cur = CUR_CHAR(l);
3078 if (cur == 0) {
3079 GROW;
3080 SHRINK;
3081 cur = CUR_CHAR(l);
3082 }
3083 }
3084 buf[len] = 0;
3085 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003086 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003087 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003088 } else {
3089 NEXT;
3090 }
3091 return(buf);
3092}
3093
3094/**
3095 * xmlParsePubidLiteral:
3096 * @ctxt: an XML parser context
3097 *
3098 * parse an XML public literal
3099 *
3100 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3101 *
3102 * Returns the PubidLiteral parsed or NULL.
3103 */
3104
3105xmlChar *
3106xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3107 xmlChar *buf = NULL;
3108 int len = 0;
3109 int size = XML_PARSER_BUFFER_SIZE;
3110 xmlChar cur;
3111 xmlChar stop;
3112 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003113 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003114
3115 SHRINK;
3116 if (RAW == '"') {
3117 NEXT;
3118 stop = '"';
3119 } else if (RAW == '\'') {
3120 NEXT;
3121 stop = '\'';
3122 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003123 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003124 return(NULL);
3125 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003126 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003127 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003128 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003129 return(NULL);
3130 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003131 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003132 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003133 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003134 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003135 xmlChar *tmp;
3136
Owen Taylor3473f882001-02-23 17:55:21 +00003137 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003138 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3139 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003140 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003141 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003142 return(NULL);
3143 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003144 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003145 }
3146 buf[len++] = cur;
3147 count++;
3148 if (count > 50) {
3149 GROW;
3150 count = 0;
3151 }
3152 NEXT;
3153 cur = CUR;
3154 if (cur == 0) {
3155 GROW;
3156 SHRINK;
3157 cur = CUR;
3158 }
3159 }
3160 buf[len] = 0;
3161 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003162 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003163 } else {
3164 NEXT;
3165 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003166 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003167 return(buf);
3168}
3169
Daniel Veillard48b2f892001-02-25 16:11:03 +00003170void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003171
3172/*
3173 * used for the test in the inner loop of the char data testing
3174 */
3175static const unsigned char test_char_data[256] = {
3176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3177 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3180 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3181 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3182 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3183 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3184 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3185 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3186 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3187 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3188 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3189 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3190 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3191 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3193 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3194 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3195 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3196 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3197 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3199 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3200 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3202 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3204 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3205 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3206 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3207 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3208};
3209
Owen Taylor3473f882001-02-23 17:55:21 +00003210/**
3211 * xmlParseCharData:
3212 * @ctxt: an XML parser context
3213 * @cdata: int indicating whether we are within a CDATA section
3214 *
3215 * parse a CharData section.
3216 * if we are within a CDATA section ']]>' marks an end of section.
3217 *
3218 * The right angle bracket (>) may be represented using the string "&gt;",
3219 * and must, for compatibility, be escaped using "&gt;" or a character
3220 * reference when it appears in the string "]]>" in content, when that
3221 * string is not marking the end of a CDATA section.
3222 *
3223 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3224 */
3225
3226void
3227xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003228 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003229 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003230 int line = ctxt->input->line;
3231 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003232 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003233
3234 SHRINK;
3235 GROW;
3236 /*
3237 * Accelerated common case where input don't need to be
3238 * modified before passing it to the handler.
3239 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003240 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003241 in = ctxt->input->cur;
3242 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003243get_more_space:
3244 while (*in == 0x20) in++;
3245 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003246 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003247 in++;
3248 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003249 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003250 in++;
3251 }
3252 goto get_more_space;
3253 }
3254 if (*in == '<') {
3255 nbchar = in - ctxt->input->cur;
3256 if (nbchar > 0) {
3257 const xmlChar *tmp = ctxt->input->cur;
3258 ctxt->input->cur = in;
3259
Daniel Veillard34099b42004-11-04 17:34:35 +00003260 if ((ctxt->sax != NULL) &&
3261 (ctxt->sax->ignorableWhitespace !=
3262 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003263 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003264 if (ctxt->sax->ignorableWhitespace != NULL)
3265 ctxt->sax->ignorableWhitespace(ctxt->userData,
3266 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003267 } else if (ctxt->sax->characters != NULL)
3268 ctxt->sax->characters(ctxt->userData,
3269 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003270 } else if ((ctxt->sax != NULL) &&
3271 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003272 ctxt->sax->characters(ctxt->userData,
3273 tmp, nbchar);
3274 }
3275 }
3276 return;
3277 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003278
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003279get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003280 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003281 while (test_char_data[*in]) {
3282 in++;
3283 ccol++;
3284 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003285 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003286 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003287 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003288 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003289 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003290 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003291 in++;
3292 }
3293 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003294 }
3295 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003296 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003297 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003298 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003299 return;
3300 }
3301 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003302 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003303 goto get_more;
3304 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003305 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003306 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003307 if ((ctxt->sax != NULL) &&
3308 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003309 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003310 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003311 const xmlChar *tmp = ctxt->input->cur;
3312 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003313
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003314 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003315 if (ctxt->sax->ignorableWhitespace != NULL)
3316 ctxt->sax->ignorableWhitespace(ctxt->userData,
3317 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003318 } else if (ctxt->sax->characters != NULL)
3319 ctxt->sax->characters(ctxt->userData,
3320 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003321 line = ctxt->input->line;
3322 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003323 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003324 if (ctxt->sax->characters != NULL)
3325 ctxt->sax->characters(ctxt->userData,
3326 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003327 line = ctxt->input->line;
3328 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003329 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003330 }
3331 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003332 if (*in == 0xD) {
3333 in++;
3334 if (*in == 0xA) {
3335 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003336 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003337 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003338 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003339 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003340 in--;
3341 }
3342 if (*in == '<') {
3343 return;
3344 }
3345 if (*in == '&') {
3346 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003347 }
3348 SHRINK;
3349 GROW;
3350 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003351 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003352 nbchar = 0;
3353 }
Daniel Veillard50582112001-03-26 22:52:16 +00003354 ctxt->input->line = line;
3355 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003356 xmlParseCharDataComplex(ctxt, cdata);
3357}
3358
Daniel Veillard01c13b52002-12-10 15:19:08 +00003359/**
3360 * xmlParseCharDataComplex:
3361 * @ctxt: an XML parser context
3362 * @cdata: int indicating whether we are within a CDATA section
3363 *
3364 * parse a CharData section.this is the fallback function
3365 * of xmlParseCharData() when the parsing requires handling
3366 * of non-ASCII characters.
3367 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003368void
3369xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003370 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3371 int nbchar = 0;
3372 int cur, l;
3373 int count = 0;
3374
3375 SHRINK;
3376 GROW;
3377 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003378 while ((cur != '<') && /* checked */
3379 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003380 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003381 if ((cur == ']') && (NXT(1) == ']') &&
3382 (NXT(2) == '>')) {
3383 if (cdata) break;
3384 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003385 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003386 }
3387 }
3388 COPY_BUF(l,buf,nbchar,cur);
3389 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003390 buf[nbchar] = 0;
3391
Owen Taylor3473f882001-02-23 17:55:21 +00003392 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003393 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003394 */
3395 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003396 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003397 if (ctxt->sax->ignorableWhitespace != NULL)
3398 ctxt->sax->ignorableWhitespace(ctxt->userData,
3399 buf, nbchar);
3400 } else {
3401 if (ctxt->sax->characters != NULL)
3402 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3403 }
3404 }
3405 nbchar = 0;
3406 }
3407 count++;
3408 if (count > 50) {
3409 GROW;
3410 count = 0;
3411 }
3412 NEXTL(l);
3413 cur = CUR_CHAR(l);
3414 }
3415 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003416 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003417 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003418 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003419 */
3420 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003421 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003422 if (ctxt->sax->ignorableWhitespace != NULL)
3423 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3424 } else {
3425 if (ctxt->sax->characters != NULL)
3426 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3427 }
3428 }
3429 }
3430}
3431
3432/**
3433 * xmlParseExternalID:
3434 * @ctxt: an XML parser context
3435 * @publicID: a xmlChar** receiving PubidLiteral
3436 * @strict: indicate whether we should restrict parsing to only
3437 * production [75], see NOTE below
3438 *
3439 * Parse an External ID or a Public ID
3440 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003441 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003442 * 'PUBLIC' S PubidLiteral S SystemLiteral
3443 *
3444 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3445 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3446 *
3447 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3448 *
3449 * Returns the function returns SystemLiteral and in the second
3450 * case publicID receives PubidLiteral, is strict is off
3451 * it is possible to return NULL and have publicID set.
3452 */
3453
3454xmlChar *
3455xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3456 xmlChar *URI = NULL;
3457
3458 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003459
3460 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003461 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003462 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003463 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003464 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3465 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003466 }
3467 SKIP_BLANKS;
3468 URI = xmlParseSystemLiteral(ctxt);
3469 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003470 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003471 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003472 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003473 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003474 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003475 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003476 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003477 }
3478 SKIP_BLANKS;
3479 *publicID = xmlParsePubidLiteral(ctxt);
3480 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003481 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003482 }
3483 if (strict) {
3484 /*
3485 * We don't handle [83] so "S SystemLiteral" is required.
3486 */
William M. Brack76e95df2003-10-18 16:20:14 +00003487 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003489 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003490 }
3491 } else {
3492 /*
3493 * We handle [83] so we return immediately, if
3494 * "S SystemLiteral" is not detected. From a purely parsing
3495 * point of view that's a nice mess.
3496 */
3497 const xmlChar *ptr;
3498 GROW;
3499
3500 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003501 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003502
William M. Brack76e95df2003-10-18 16:20:14 +00003503 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003504 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3505 }
3506 SKIP_BLANKS;
3507 URI = xmlParseSystemLiteral(ctxt);
3508 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003509 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003510 }
3511 }
3512 return(URI);
3513}
3514
3515/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003516 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003517 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003518 * @buf: the already parsed part of the buffer
3519 * @len: number of bytes filles in the buffer
3520 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003521 *
3522 * Skip an XML (SGML) comment <!-- .... -->
3523 * The spec says that "For compatibility, the string "--" (double-hyphen)
3524 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003525 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003526 *
3527 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3528 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003529static void
3530xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003531 int q, ql;
3532 int r, rl;
3533 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003534 xmlParserInputPtr input = ctxt->input;
3535 int count = 0;
3536
Owen Taylor3473f882001-02-23 17:55:21 +00003537 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003538 len = 0;
3539 size = XML_PARSER_BUFFER_SIZE;
3540 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3541 if (buf == NULL) {
3542 xmlErrMemory(ctxt, NULL);
3543 return;
3544 }
Owen Taylor3473f882001-02-23 17:55:21 +00003545 }
3546 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003547 if (q == 0)
3548 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003549 NEXTL(ql);
3550 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003551 if (r == 0)
3552 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003553 NEXTL(rl);
3554 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003555 if (cur == 0)
3556 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003557 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003558 ((cur != '>') ||
3559 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003560 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003561 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003562 }
3563 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003564 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003565 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003566 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3567 if (new_buf == NULL) {
3568 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003569 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003570 return;
3571 }
William M. Bracka3215c72004-07-31 16:24:01 +00003572 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003573 }
3574 COPY_BUF(ql,buf,len,q);
3575 q = r;
3576 ql = rl;
3577 r = cur;
3578 rl = l;
3579
3580 count++;
3581 if (count > 50) {
3582 GROW;
3583 count = 0;
3584 }
3585 NEXTL(l);
3586 cur = CUR_CHAR(l);
3587 if (cur == 0) {
3588 SHRINK;
3589 GROW;
3590 cur = CUR_CHAR(l);
3591 }
3592 }
3593 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003594 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003595 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003596 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003597 xmlFree(buf);
3598 } else {
3599 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003600 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3601 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003602 }
3603 NEXT;
3604 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3605 (!ctxt->disableSAX))
3606 ctxt->sax->comment(ctxt->userData, buf);
3607 xmlFree(buf);
3608 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003609 return;
3610not_terminated:
3611 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3612 "Comment not terminated\n", NULL);
3613 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003614}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003615/**
3616 * xmlParseComment:
3617 * @ctxt: an XML parser context
3618 *
3619 * Skip an XML (SGML) comment <!-- .... -->
3620 * The spec says that "For compatibility, the string "--" (double-hyphen)
3621 * must not occur within comments. "
3622 *
3623 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3624 */
3625void
3626xmlParseComment(xmlParserCtxtPtr ctxt) {
3627 xmlChar *buf = NULL;
3628 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003629 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003630 xmlParserInputState state;
3631 const xmlChar *in;
3632 int nbchar = 0, ccol;
3633
3634 /*
3635 * Check that there is a comment right here.
3636 */
3637 if ((RAW != '<') || (NXT(1) != '!') ||
3638 (NXT(2) != '-') || (NXT(3) != '-')) return;
3639
3640 state = ctxt->instate;
3641 ctxt->instate = XML_PARSER_COMMENT;
3642 SKIP(4);
3643 SHRINK;
3644 GROW;
3645
3646 /*
3647 * Accelerated common case where input don't need to be
3648 * modified before passing it to the handler.
3649 */
3650 in = ctxt->input->cur;
3651 do {
3652 if (*in == 0xA) {
3653 ctxt->input->line++; ctxt->input->col = 1;
3654 in++;
3655 while (*in == 0xA) {
3656 ctxt->input->line++; ctxt->input->col = 1;
3657 in++;
3658 }
3659 }
3660get_more:
3661 ccol = ctxt->input->col;
3662 while (((*in > '-') && (*in <= 0x7F)) ||
3663 ((*in >= 0x20) && (*in < '-')) ||
3664 (*in == 0x09)) {
3665 in++;
3666 ccol++;
3667 }
3668 ctxt->input->col = ccol;
3669 if (*in == 0xA) {
3670 ctxt->input->line++; ctxt->input->col = 1;
3671 in++;
3672 while (*in == 0xA) {
3673 ctxt->input->line++; ctxt->input->col = 1;
3674 in++;
3675 }
3676 goto get_more;
3677 }
3678 nbchar = in - ctxt->input->cur;
3679 /*
3680 * save current set of data
3681 */
3682 if (nbchar > 0) {
3683 if ((ctxt->sax != NULL) &&
3684 (ctxt->sax->comment != NULL)) {
3685 if (buf == NULL) {
3686 if ((*in == '-') && (in[1] == '-'))
3687 size = nbchar + 1;
3688 else
3689 size = XML_PARSER_BUFFER_SIZE + nbchar;
3690 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3691 if (buf == NULL) {
3692 xmlErrMemory(ctxt, NULL);
3693 ctxt->instate = state;
3694 return;
3695 }
3696 len = 0;
3697 } else if (len + nbchar + 1 >= size) {
3698 xmlChar *new_buf;
3699 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3700 new_buf = (xmlChar *) xmlRealloc(buf,
3701 size * sizeof(xmlChar));
3702 if (new_buf == NULL) {
3703 xmlFree (buf);
3704 xmlErrMemory(ctxt, NULL);
3705 ctxt->instate = state;
3706 return;
3707 }
3708 buf = new_buf;
3709 }
3710 memcpy(&buf[len], ctxt->input->cur, nbchar);
3711 len += nbchar;
3712 buf[len] = 0;
3713 }
3714 }
3715 ctxt->input->cur = in;
3716 if (*in == 0xA)
3717
3718 if (*in == 0xD) {
3719 in++;
3720 if (*in == 0xA) {
3721 ctxt->input->cur = in;
3722 in++;
3723 ctxt->input->line++; ctxt->input->col = 1;
3724 continue; /* while */
3725 }
3726 in--;
3727 }
3728 SHRINK;
3729 GROW;
3730 in = ctxt->input->cur;
3731 if (*in == '-') {
3732 if (in[1] == '-') {
3733 if (in[2] == '>') {
3734 SKIP(3);
3735 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3736 (!ctxt->disableSAX)) {
3737 if (buf != NULL)
3738 ctxt->sax->comment(ctxt->userData, buf);
3739 else
3740 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3741 }
3742 if (buf != NULL)
3743 xmlFree(buf);
3744 ctxt->instate = state;
3745 return;
3746 }
3747 if (buf != NULL)
3748 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3749 "Comment not terminated \n<!--%.50s\n",
3750 buf);
3751 else
3752 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3753 "Comment not terminated \n", NULL);
3754 in++;
3755 ctxt->input->col++;
3756 }
3757 in++;
3758 ctxt->input->col++;
3759 goto get_more;
3760 }
3761 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3762 xmlParseCommentComplex(ctxt, buf, len, size);
3763 ctxt->instate = state;
3764 return;
3765}
3766
Owen Taylor3473f882001-02-23 17:55:21 +00003767
3768/**
3769 * xmlParsePITarget:
3770 * @ctxt: an XML parser context
3771 *
3772 * parse the name of a PI
3773 *
3774 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3775 *
3776 * Returns the PITarget name or NULL
3777 */
3778
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003779const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003780xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003781 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003782
3783 name = xmlParseName(ctxt);
3784 if ((name != NULL) &&
3785 ((name[0] == 'x') || (name[0] == 'X')) &&
3786 ((name[1] == 'm') || (name[1] == 'M')) &&
3787 ((name[2] == 'l') || (name[2] == 'L'))) {
3788 int i;
3789 if ((name[0] == 'x') && (name[1] == 'm') &&
3790 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003791 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003792 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003793 return(name);
3794 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003795 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003796 return(name);
3797 }
3798 for (i = 0;;i++) {
3799 if (xmlW3CPIs[i] == NULL) break;
3800 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3801 return(name);
3802 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003803 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3804 "xmlParsePITarget: invalid name prefix 'xml'\n",
3805 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003806 }
3807 return(name);
3808}
3809
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003810#ifdef LIBXML_CATALOG_ENABLED
3811/**
3812 * xmlParseCatalogPI:
3813 * @ctxt: an XML parser context
3814 * @catalog: the PI value string
3815 *
3816 * parse an XML Catalog Processing Instruction.
3817 *
3818 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3819 *
3820 * Occurs only if allowed by the user and if happening in the Misc
3821 * part of the document before any doctype informations
3822 * This will add the given catalog to the parsing context in order
3823 * to be used if there is a resolution need further down in the document
3824 */
3825
3826static void
3827xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3828 xmlChar *URL = NULL;
3829 const xmlChar *tmp, *base;
3830 xmlChar marker;
3831
3832 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003833 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003834 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3835 goto error;
3836 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003837 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003838 if (*tmp != '=') {
3839 return;
3840 }
3841 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003842 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003843 marker = *tmp;
3844 if ((marker != '\'') && (marker != '"'))
3845 goto error;
3846 tmp++;
3847 base = tmp;
3848 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3849 if (*tmp == 0)
3850 goto error;
3851 URL = xmlStrndup(base, tmp - base);
3852 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003853 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003854 if (*tmp != 0)
3855 goto error;
3856
3857 if (URL != NULL) {
3858 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3859 xmlFree(URL);
3860 }
3861 return;
3862
3863error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003864 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3865 "Catalog PI syntax error: %s\n",
3866 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003867 if (URL != NULL)
3868 xmlFree(URL);
3869}
3870#endif
3871
Owen Taylor3473f882001-02-23 17:55:21 +00003872/**
3873 * xmlParsePI:
3874 * @ctxt: an XML parser context
3875 *
3876 * parse an XML Processing Instruction.
3877 *
3878 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3879 *
3880 * The processing is transfered to SAX once parsed.
3881 */
3882
3883void
3884xmlParsePI(xmlParserCtxtPtr ctxt) {
3885 xmlChar *buf = NULL;
3886 int len = 0;
3887 int size = XML_PARSER_BUFFER_SIZE;
3888 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003889 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003890 xmlParserInputState state;
3891 int count = 0;
3892
3893 if ((RAW == '<') && (NXT(1) == '?')) {
3894 xmlParserInputPtr input = ctxt->input;
3895 state = ctxt->instate;
3896 ctxt->instate = XML_PARSER_PI;
3897 /*
3898 * this is a Processing Instruction.
3899 */
3900 SKIP(2);
3901 SHRINK;
3902
3903 /*
3904 * Parse the target name and check for special support like
3905 * namespace.
3906 */
3907 target = xmlParsePITarget(ctxt);
3908 if (target != NULL) {
3909 if ((RAW == '?') && (NXT(1) == '>')) {
3910 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003911 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3912 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003913 }
3914 SKIP(2);
3915
3916 /*
3917 * SAX: PI detected.
3918 */
3919 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3920 (ctxt->sax->processingInstruction != NULL))
3921 ctxt->sax->processingInstruction(ctxt->userData,
3922 target, NULL);
3923 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003924 return;
3925 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003926 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003927 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003928 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003929 ctxt->instate = state;
3930 return;
3931 }
3932 cur = CUR;
3933 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003934 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3935 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003936 }
3937 SKIP_BLANKS;
3938 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003939 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003940 ((cur != '?') || (NXT(1) != '>'))) {
3941 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003942 xmlChar *tmp;
3943
Owen Taylor3473f882001-02-23 17:55:21 +00003944 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003945 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3946 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003947 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003948 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003949 ctxt->instate = state;
3950 return;
3951 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003952 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003953 }
3954 count++;
3955 if (count > 50) {
3956 GROW;
3957 count = 0;
3958 }
3959 COPY_BUF(l,buf,len,cur);
3960 NEXTL(l);
3961 cur = CUR_CHAR(l);
3962 if (cur == 0) {
3963 SHRINK;
3964 GROW;
3965 cur = CUR_CHAR(l);
3966 }
3967 }
3968 buf[len] = 0;
3969 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003970 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3971 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003972 } else {
3973 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003974 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3975 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003976 }
3977 SKIP(2);
3978
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003979#ifdef LIBXML_CATALOG_ENABLED
3980 if (((state == XML_PARSER_MISC) ||
3981 (state == XML_PARSER_START)) &&
3982 (xmlStrEqual(target, XML_CATALOG_PI))) {
3983 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3984 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3985 (allow == XML_CATA_ALLOW_ALL))
3986 xmlParseCatalogPI(ctxt, buf);
3987 }
3988#endif
3989
3990
Owen Taylor3473f882001-02-23 17:55:21 +00003991 /*
3992 * SAX: PI detected.
3993 */
3994 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3995 (ctxt->sax->processingInstruction != NULL))
3996 ctxt->sax->processingInstruction(ctxt->userData,
3997 target, buf);
3998 }
3999 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004000 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004001 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004002 }
4003 ctxt->instate = state;
4004 }
4005}
4006
4007/**
4008 * xmlParseNotationDecl:
4009 * @ctxt: an XML parser context
4010 *
4011 * parse a notation declaration
4012 *
4013 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4014 *
4015 * Hence there is actually 3 choices:
4016 * 'PUBLIC' S PubidLiteral
4017 * 'PUBLIC' S PubidLiteral S SystemLiteral
4018 * and 'SYSTEM' S SystemLiteral
4019 *
4020 * See the NOTE on xmlParseExternalID().
4021 */
4022
4023void
4024xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004025 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004026 xmlChar *Pubid;
4027 xmlChar *Systemid;
4028
Daniel Veillarda07050d2003-10-19 14:46:32 +00004029 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004030 xmlParserInputPtr input = ctxt->input;
4031 SHRINK;
4032 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004033 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004034 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4035 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004036 return;
4037 }
4038 SKIP_BLANKS;
4039
Daniel Veillard76d66f42001-05-16 21:05:17 +00004040 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004041 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004042 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004043 return;
4044 }
William M. Brack76e95df2003-10-18 16:20:14 +00004045 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004046 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004047 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004048 return;
4049 }
4050 SKIP_BLANKS;
4051
4052 /*
4053 * Parse the IDs.
4054 */
4055 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4056 SKIP_BLANKS;
4057
4058 if (RAW == '>') {
4059 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004060 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4061 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004062 }
4063 NEXT;
4064 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4065 (ctxt->sax->notationDecl != NULL))
4066 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4067 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004068 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004069 }
Owen Taylor3473f882001-02-23 17:55:21 +00004070 if (Systemid != NULL) xmlFree(Systemid);
4071 if (Pubid != NULL) xmlFree(Pubid);
4072 }
4073}
4074
4075/**
4076 * xmlParseEntityDecl:
4077 * @ctxt: an XML parser context
4078 *
4079 * parse <!ENTITY declarations
4080 *
4081 * [70] EntityDecl ::= GEDecl | PEDecl
4082 *
4083 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4084 *
4085 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4086 *
4087 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4088 *
4089 * [74] PEDef ::= EntityValue | ExternalID
4090 *
4091 * [76] NDataDecl ::= S 'NDATA' S Name
4092 *
4093 * [ VC: Notation Declared ]
4094 * The Name must match the declared name of a notation.
4095 */
4096
4097void
4098xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004099 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004100 xmlChar *value = NULL;
4101 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004102 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004103 int isParameter = 0;
4104 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004105 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004106
Daniel Veillard4c778d82005-01-23 17:37:44 +00004107 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004108 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004109 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004110 SHRINK;
4111 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004112 skipped = SKIP_BLANKS;
4113 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004114 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4115 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004116 }
Owen Taylor3473f882001-02-23 17:55:21 +00004117
4118 if (RAW == '%') {
4119 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004120 skipped = SKIP_BLANKS;
4121 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004122 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4123 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004124 }
Owen Taylor3473f882001-02-23 17:55:21 +00004125 isParameter = 1;
4126 }
4127
Daniel Veillard76d66f42001-05-16 21:05:17 +00004128 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004129 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004130 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4131 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004132 return;
4133 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004134 skipped = SKIP_BLANKS;
4135 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004136 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4137 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004138 }
Owen Taylor3473f882001-02-23 17:55:21 +00004139
Daniel Veillardf5582f12002-06-11 10:08:16 +00004140 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004141 /*
4142 * handle the various case of definitions...
4143 */
4144 if (isParameter) {
4145 if ((RAW == '"') || (RAW == '\'')) {
4146 value = xmlParseEntityValue(ctxt, &orig);
4147 if (value) {
4148 if ((ctxt->sax != NULL) &&
4149 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4150 ctxt->sax->entityDecl(ctxt->userData, name,
4151 XML_INTERNAL_PARAMETER_ENTITY,
4152 NULL, NULL, value);
4153 }
4154 } else {
4155 URI = xmlParseExternalID(ctxt, &literal, 1);
4156 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004157 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004158 }
4159 if (URI) {
4160 xmlURIPtr uri;
4161
4162 uri = xmlParseURI((const char *) URI);
4163 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004164 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4165 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004166 /*
4167 * This really ought to be a well formedness error
4168 * but the XML Core WG decided otherwise c.f. issue
4169 * E26 of the XML erratas.
4170 */
Owen Taylor3473f882001-02-23 17:55:21 +00004171 } else {
4172 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004173 /*
4174 * Okay this is foolish to block those but not
4175 * invalid URIs.
4176 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004177 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004178 } else {
4179 if ((ctxt->sax != NULL) &&
4180 (!ctxt->disableSAX) &&
4181 (ctxt->sax->entityDecl != NULL))
4182 ctxt->sax->entityDecl(ctxt->userData, name,
4183 XML_EXTERNAL_PARAMETER_ENTITY,
4184 literal, URI, NULL);
4185 }
4186 xmlFreeURI(uri);
4187 }
4188 }
4189 }
4190 } else {
4191 if ((RAW == '"') || (RAW == '\'')) {
4192 value = xmlParseEntityValue(ctxt, &orig);
4193 if ((ctxt->sax != NULL) &&
4194 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4195 ctxt->sax->entityDecl(ctxt->userData, name,
4196 XML_INTERNAL_GENERAL_ENTITY,
4197 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004198 /*
4199 * For expat compatibility in SAX mode.
4200 */
4201 if ((ctxt->myDoc == NULL) ||
4202 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4203 if (ctxt->myDoc == NULL) {
4204 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4205 }
4206 if (ctxt->myDoc->intSubset == NULL)
4207 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4208 BAD_CAST "fake", NULL, NULL);
4209
Daniel Veillard1af9a412003-08-20 22:54:39 +00004210 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4211 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004212 }
Owen Taylor3473f882001-02-23 17:55:21 +00004213 } else {
4214 URI = xmlParseExternalID(ctxt, &literal, 1);
4215 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004216 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004217 }
4218 if (URI) {
4219 xmlURIPtr uri;
4220
4221 uri = xmlParseURI((const char *)URI);
4222 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004223 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4224 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004225 /*
4226 * This really ought to be a well formedness error
4227 * but the XML Core WG decided otherwise c.f. issue
4228 * E26 of the XML erratas.
4229 */
Owen Taylor3473f882001-02-23 17:55:21 +00004230 } else {
4231 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004232 /*
4233 * Okay this is foolish to block those but not
4234 * invalid URIs.
4235 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004236 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004237 }
4238 xmlFreeURI(uri);
4239 }
4240 }
William M. Brack76e95df2003-10-18 16:20:14 +00004241 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004242 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4243 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004244 }
4245 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004246 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004247 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004248 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004249 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4250 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004251 }
4252 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004253 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004254 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4255 (ctxt->sax->unparsedEntityDecl != NULL))
4256 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4257 literal, URI, ndata);
4258 } else {
4259 if ((ctxt->sax != NULL) &&
4260 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4261 ctxt->sax->entityDecl(ctxt->userData, name,
4262 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4263 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004264 /*
4265 * For expat compatibility in SAX mode.
4266 * assuming the entity repalcement was asked for
4267 */
4268 if ((ctxt->replaceEntities != 0) &&
4269 ((ctxt->myDoc == NULL) ||
4270 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4271 if (ctxt->myDoc == NULL) {
4272 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4273 }
4274
4275 if (ctxt->myDoc->intSubset == NULL)
4276 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4277 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004278 xmlSAX2EntityDecl(ctxt, name,
4279 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4280 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004281 }
Owen Taylor3473f882001-02-23 17:55:21 +00004282 }
4283 }
4284 }
4285 SKIP_BLANKS;
4286 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004287 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004288 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004289 } else {
4290 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004291 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4292 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004293 }
4294 NEXT;
4295 }
4296 if (orig != NULL) {
4297 /*
4298 * Ugly mechanism to save the raw entity value.
4299 */
4300 xmlEntityPtr cur = NULL;
4301
4302 if (isParameter) {
4303 if ((ctxt->sax != NULL) &&
4304 (ctxt->sax->getParameterEntity != NULL))
4305 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4306 } else {
4307 if ((ctxt->sax != NULL) &&
4308 (ctxt->sax->getEntity != NULL))
4309 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004310 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004311 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004312 }
Owen Taylor3473f882001-02-23 17:55:21 +00004313 }
4314 if (cur != NULL) {
4315 if (cur->orig != NULL)
4316 xmlFree(orig);
4317 else
4318 cur->orig = orig;
4319 } else
4320 xmlFree(orig);
4321 }
Owen Taylor3473f882001-02-23 17:55:21 +00004322 if (value != NULL) xmlFree(value);
4323 if (URI != NULL) xmlFree(URI);
4324 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 }
4326}
4327
4328/**
4329 * xmlParseDefaultDecl:
4330 * @ctxt: an XML parser context
4331 * @value: Receive a possible fixed default value for the attribute
4332 *
4333 * Parse an attribute default declaration
4334 *
4335 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4336 *
4337 * [ VC: Required Attribute ]
4338 * if the default declaration is the keyword #REQUIRED, then the
4339 * attribute must be specified for all elements of the type in the
4340 * attribute-list declaration.
4341 *
4342 * [ VC: Attribute Default Legal ]
4343 * The declared default value must meet the lexical constraints of
4344 * the declared attribute type c.f. xmlValidateAttributeDecl()
4345 *
4346 * [ VC: Fixed Attribute Default ]
4347 * if an attribute has a default value declared with the #FIXED
4348 * keyword, instances of that attribute must match the default value.
4349 *
4350 * [ WFC: No < in Attribute Values ]
4351 * handled in xmlParseAttValue()
4352 *
4353 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4354 * or XML_ATTRIBUTE_FIXED.
4355 */
4356
4357int
4358xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4359 int val;
4360 xmlChar *ret;
4361
4362 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004363 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004364 SKIP(9);
4365 return(XML_ATTRIBUTE_REQUIRED);
4366 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004367 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004368 SKIP(8);
4369 return(XML_ATTRIBUTE_IMPLIED);
4370 }
4371 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004372 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004373 SKIP(6);
4374 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004375 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4377 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004378 }
4379 SKIP_BLANKS;
4380 }
4381 ret = xmlParseAttValue(ctxt);
4382 ctxt->instate = XML_PARSER_DTD;
4383 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004384 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004385 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004386 } else
4387 *value = ret;
4388 return(val);
4389}
4390
4391/**
4392 * xmlParseNotationType:
4393 * @ctxt: an XML parser context
4394 *
4395 * parse an Notation attribute type.
4396 *
4397 * Note: the leading 'NOTATION' S part has already being parsed...
4398 *
4399 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4400 *
4401 * [ VC: Notation Attributes ]
4402 * Values of this type must match one of the notation names included
4403 * in the declaration; all notation names in the declaration must be declared.
4404 *
4405 * Returns: the notation attribute tree built while parsing
4406 */
4407
4408xmlEnumerationPtr
4409xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004410 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004411 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4412
4413 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004414 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004415 return(NULL);
4416 }
4417 SHRINK;
4418 do {
4419 NEXT;
4420 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004421 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004422 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004423 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4424 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004425 return(ret);
4426 }
4427 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004428 if (cur == NULL) return(ret);
4429 if (last == NULL) ret = last = cur;
4430 else {
4431 last->next = cur;
4432 last = cur;
4433 }
4434 SKIP_BLANKS;
4435 } while (RAW == '|');
4436 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004437 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004438 if ((last != NULL) && (last != ret))
4439 xmlFreeEnumeration(last);
4440 return(ret);
4441 }
4442 NEXT;
4443 return(ret);
4444}
4445
4446/**
4447 * xmlParseEnumerationType:
4448 * @ctxt: an XML parser context
4449 *
4450 * parse an Enumeration attribute type.
4451 *
4452 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4453 *
4454 * [ VC: Enumeration ]
4455 * Values of this type must match one of the Nmtoken tokens in
4456 * the declaration
4457 *
4458 * Returns: the enumeration attribute tree built while parsing
4459 */
4460
4461xmlEnumerationPtr
4462xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4463 xmlChar *name;
4464 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4465
4466 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004467 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004468 return(NULL);
4469 }
4470 SHRINK;
4471 do {
4472 NEXT;
4473 SKIP_BLANKS;
4474 name = xmlParseNmtoken(ctxt);
4475 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004476 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004477 return(ret);
4478 }
4479 cur = xmlCreateEnumeration(name);
4480 xmlFree(name);
4481 if (cur == NULL) return(ret);
4482 if (last == NULL) ret = last = cur;
4483 else {
4484 last->next = cur;
4485 last = cur;
4486 }
4487 SKIP_BLANKS;
4488 } while (RAW == '|');
4489 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004490 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004491 return(ret);
4492 }
4493 NEXT;
4494 return(ret);
4495}
4496
4497/**
4498 * xmlParseEnumeratedType:
4499 * @ctxt: an XML parser context
4500 * @tree: the enumeration tree built while parsing
4501 *
4502 * parse an Enumerated attribute type.
4503 *
4504 * [57] EnumeratedType ::= NotationType | Enumeration
4505 *
4506 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4507 *
4508 *
4509 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4510 */
4511
4512int
4513xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004514 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004515 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004516 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004517 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4518 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004519 return(0);
4520 }
4521 SKIP_BLANKS;
4522 *tree = xmlParseNotationType(ctxt);
4523 if (*tree == NULL) return(0);
4524 return(XML_ATTRIBUTE_NOTATION);
4525 }
4526 *tree = xmlParseEnumerationType(ctxt);
4527 if (*tree == NULL) return(0);
4528 return(XML_ATTRIBUTE_ENUMERATION);
4529}
4530
4531/**
4532 * xmlParseAttributeType:
4533 * @ctxt: an XML parser context
4534 * @tree: the enumeration tree built while parsing
4535 *
4536 * parse the Attribute list def for an element
4537 *
4538 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4539 *
4540 * [55] StringType ::= 'CDATA'
4541 *
4542 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4543 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4544 *
4545 * Validity constraints for attribute values syntax are checked in
4546 * xmlValidateAttributeValue()
4547 *
4548 * [ VC: ID ]
4549 * Values of type ID must match the Name production. A name must not
4550 * appear more than once in an XML document as a value of this type;
4551 * i.e., ID values must uniquely identify the elements which bear them.
4552 *
4553 * [ VC: One ID per Element Type ]
4554 * No element type may have more than one ID attribute specified.
4555 *
4556 * [ VC: ID Attribute Default ]
4557 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4558 *
4559 * [ VC: IDREF ]
4560 * Values of type IDREF must match the Name production, and values
4561 * of type IDREFS must match Names; each IDREF Name must match the value
4562 * of an ID attribute on some element in the XML document; i.e. IDREF
4563 * values must match the value of some ID attribute.
4564 *
4565 * [ VC: Entity Name ]
4566 * Values of type ENTITY must match the Name production, values
4567 * of type ENTITIES must match Names; each Entity Name must match the
4568 * name of an unparsed entity declared in the DTD.
4569 *
4570 * [ VC: Name Token ]
4571 * Values of type NMTOKEN must match the Nmtoken production; values
4572 * of type NMTOKENS must match Nmtokens.
4573 *
4574 * Returns the attribute type
4575 */
4576int
4577xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4578 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004579 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004580 SKIP(5);
4581 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004582 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004583 SKIP(6);
4584 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004585 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004586 SKIP(5);
4587 return(XML_ATTRIBUTE_IDREF);
4588 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4589 SKIP(2);
4590 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004591 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004592 SKIP(6);
4593 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004594 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004595 SKIP(8);
4596 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004597 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004598 SKIP(8);
4599 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004600 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004601 SKIP(7);
4602 return(XML_ATTRIBUTE_NMTOKEN);
4603 }
4604 return(xmlParseEnumeratedType(ctxt, tree));
4605}
4606
4607/**
4608 * xmlParseAttributeListDecl:
4609 * @ctxt: an XML parser context
4610 *
4611 * : parse the Attribute list def for an element
4612 *
4613 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4614 *
4615 * [53] AttDef ::= S Name S AttType S DefaultDecl
4616 *
4617 */
4618void
4619xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004620 const xmlChar *elemName;
4621 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004622 xmlEnumerationPtr tree;
4623
Daniel Veillarda07050d2003-10-19 14:46:32 +00004624 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004625 xmlParserInputPtr input = ctxt->input;
4626
4627 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004628 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004629 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004630 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004631 }
4632 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004633 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004634 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004635 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4636 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004637 return;
4638 }
4639 SKIP_BLANKS;
4640 GROW;
4641 while (RAW != '>') {
4642 const xmlChar *check = CUR_PTR;
4643 int type;
4644 int def;
4645 xmlChar *defaultValue = NULL;
4646
4647 GROW;
4648 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004649 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004650 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004651 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4652 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004653 break;
4654 }
4655 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004656 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004657 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004658 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004659 if (defaultValue != NULL)
4660 xmlFree(defaultValue);
4661 break;
4662 }
4663 SKIP_BLANKS;
4664
4665 type = xmlParseAttributeType(ctxt, &tree);
4666 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004667 if (defaultValue != NULL)
4668 xmlFree(defaultValue);
4669 break;
4670 }
4671
4672 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004673 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004674 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4675 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004676 if (defaultValue != NULL)
4677 xmlFree(defaultValue);
4678 if (tree != NULL)
4679 xmlFreeEnumeration(tree);
4680 break;
4681 }
4682 SKIP_BLANKS;
4683
4684 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4685 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004686 if (defaultValue != NULL)
4687 xmlFree(defaultValue);
4688 if (tree != NULL)
4689 xmlFreeEnumeration(tree);
4690 break;
4691 }
4692
4693 GROW;
4694 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004695 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004696 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004697 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004698 if (defaultValue != NULL)
4699 xmlFree(defaultValue);
4700 if (tree != NULL)
4701 xmlFreeEnumeration(tree);
4702 break;
4703 }
4704 SKIP_BLANKS;
4705 }
4706 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004707 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4708 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004709 if (defaultValue != NULL)
4710 xmlFree(defaultValue);
4711 if (tree != NULL)
4712 xmlFreeEnumeration(tree);
4713 break;
4714 }
4715 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4716 (ctxt->sax->attributeDecl != NULL))
4717 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4718 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004719 else if (tree != NULL)
4720 xmlFreeEnumeration(tree);
4721
4722 if ((ctxt->sax2) && (defaultValue != NULL) &&
4723 (def != XML_ATTRIBUTE_IMPLIED) &&
4724 (def != XML_ATTRIBUTE_REQUIRED)) {
4725 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4726 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004727 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4728 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4729 }
Owen Taylor3473f882001-02-23 17:55:21 +00004730 if (defaultValue != NULL)
4731 xmlFree(defaultValue);
4732 GROW;
4733 }
4734 if (RAW == '>') {
4735 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004736 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4737 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004738 }
4739 NEXT;
4740 }
Owen Taylor3473f882001-02-23 17:55:21 +00004741 }
4742}
4743
4744/**
4745 * xmlParseElementMixedContentDecl:
4746 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004747 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004748 *
4749 * parse the declaration for a Mixed Element content
4750 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4751 *
4752 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4753 * '(' S? '#PCDATA' S? ')'
4754 *
4755 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4756 *
4757 * [ VC: No Duplicate Types ]
4758 * The same name must not appear more than once in a single
4759 * mixed-content declaration.
4760 *
4761 * returns: the list of the xmlElementContentPtr describing the element choices
4762 */
4763xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004764xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004765 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004766 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004767
4768 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004769 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004770 SKIP(7);
4771 SKIP_BLANKS;
4772 SHRINK;
4773 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004774 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004775 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4776"Element content declaration doesn't start and stop in the same entity\n",
4777 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004778 }
Owen Taylor3473f882001-02-23 17:55:21 +00004779 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004780 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004781 if (RAW == '*') {
4782 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4783 NEXT;
4784 }
4785 return(ret);
4786 }
4787 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004788 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004789 if (ret == NULL) return(NULL);
4790 }
4791 while (RAW == '|') {
4792 NEXT;
4793 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004794 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004795 if (ret == NULL) return(NULL);
4796 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004797 if (cur != NULL)
4798 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004799 cur = ret;
4800 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004801 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004802 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004803 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004804 if (n->c1 != NULL)
4805 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004806 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004807 if (n != NULL)
4808 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004809 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004810 }
4811 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004812 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004813 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004814 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004815 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004816 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004817 return(NULL);
4818 }
4819 SKIP_BLANKS;
4820 GROW;
4821 }
4822 if ((RAW == ')') && (NXT(1) == '*')) {
4823 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004824 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00004825 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004826 if (cur->c2 != NULL)
4827 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004828 }
4829 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004830 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004831 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4832"Element content declaration doesn't start and stop in the same entity\n",
4833 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004834 }
Owen Taylor3473f882001-02-23 17:55:21 +00004835 SKIP(2);
4836 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004837 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004838 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004839 return(NULL);
4840 }
4841
4842 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004843 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004844 }
4845 return(ret);
4846}
4847
4848/**
4849 * xmlParseElementChildrenContentDecl:
4850 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004851 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004852 *
4853 * parse the declaration for a Mixed Element content
4854 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4855 *
4856 *
4857 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4858 *
4859 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4860 *
4861 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4862 *
4863 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4864 *
4865 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4866 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004867 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004868 * opening or closing parentheses in a choice, seq, or Mixed
4869 * construct is contained in the replacement text for a parameter
4870 * entity, both must be contained in the same replacement text. For
4871 * interoperability, if a parameter-entity reference appears in a
4872 * choice, seq, or Mixed construct, its replacement text should not
4873 * be empty, and neither the first nor last non-blank character of
4874 * the replacement text should be a connector (| or ,).
4875 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004876 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004877 * hierarchy.
4878 */
4879xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004880xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004881 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004882 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004883 xmlChar type = 0;
4884
4885 SKIP_BLANKS;
4886 GROW;
4887 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004888 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004889
Owen Taylor3473f882001-02-23 17:55:21 +00004890 /* Recurse on first child */
4891 NEXT;
4892 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004893 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004894 SKIP_BLANKS;
4895 GROW;
4896 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004897 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004898 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004899 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004900 return(NULL);
4901 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004902 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004903 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004904 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004905 return(NULL);
4906 }
Owen Taylor3473f882001-02-23 17:55:21 +00004907 GROW;
4908 if (RAW == '?') {
4909 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4910 NEXT;
4911 } else if (RAW == '*') {
4912 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4913 NEXT;
4914 } else if (RAW == '+') {
4915 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4916 NEXT;
4917 } else {
4918 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4919 }
Owen Taylor3473f882001-02-23 17:55:21 +00004920 GROW;
4921 }
4922 SKIP_BLANKS;
4923 SHRINK;
4924 while (RAW != ')') {
4925 /*
4926 * Each loop we parse one separator and one element.
4927 */
4928 if (RAW == ',') {
4929 if (type == 0) type = CUR;
4930
4931 /*
4932 * Detect "Name | Name , Name" error
4933 */
4934 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004935 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004936 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004937 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004938 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004939 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004940 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004941 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004942 return(NULL);
4943 }
4944 NEXT;
4945
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004946 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00004947 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004948 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004949 xmlFreeDocElementContent(ctxt->myDoc, last);
4950 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004951 return(NULL);
4952 }
4953 if (last == NULL) {
4954 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004955 if (ret != NULL)
4956 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004957 ret = cur = op;
4958 } else {
4959 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004960 if (op != NULL)
4961 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004962 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004963 if (last != NULL)
4964 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004965 cur =op;
4966 last = NULL;
4967 }
4968 } else if (RAW == '|') {
4969 if (type == 0) type = CUR;
4970
4971 /*
4972 * Detect "Name , Name | Name" error
4973 */
4974 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004975 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004976 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004977 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004978 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004979 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004980 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004981 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004982 return(NULL);
4983 }
4984 NEXT;
4985
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004986 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004987 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004988 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004989 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004990 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004991 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004992 return(NULL);
4993 }
4994 if (last == NULL) {
4995 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004996 if (ret != NULL)
4997 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004998 ret = cur = op;
4999 } else {
5000 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005001 if (op != NULL)
5002 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005003 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005004 if (last != NULL)
5005 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005006 cur =op;
5007 last = NULL;
5008 }
5009 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005010 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005011 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005012 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005013 return(NULL);
5014 }
5015 GROW;
5016 SKIP_BLANKS;
5017 GROW;
5018 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005019 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005020 /* Recurse on second child */
5021 NEXT;
5022 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005023 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005024 SKIP_BLANKS;
5025 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005026 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005027 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005028 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005029 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005030 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005031 return(NULL);
5032 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005033 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005034 if (RAW == '?') {
5035 last->ocur = XML_ELEMENT_CONTENT_OPT;
5036 NEXT;
5037 } else if (RAW == '*') {
5038 last->ocur = XML_ELEMENT_CONTENT_MULT;
5039 NEXT;
5040 } else if (RAW == '+') {
5041 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5042 NEXT;
5043 } else {
5044 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5045 }
5046 }
5047 SKIP_BLANKS;
5048 GROW;
5049 }
5050 if ((cur != NULL) && (last != NULL)) {
5051 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005052 if (last != NULL)
5053 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005054 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005055 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005056 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5057"Element content declaration doesn't start and stop in the same entity\n",
5058 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005059 }
Owen Taylor3473f882001-02-23 17:55:21 +00005060 NEXT;
5061 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005062 if (ret != NULL) {
5063 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5064 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5065 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5066 else
5067 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5068 }
Owen Taylor3473f882001-02-23 17:55:21 +00005069 NEXT;
5070 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005071 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005072 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005073 cur = ret;
5074 /*
5075 * Some normalization:
5076 * (a | b* | c?)* == (a | b | c)*
5077 */
5078 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5079 if ((cur->c1 != NULL) &&
5080 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5081 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5082 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5083 if ((cur->c2 != NULL) &&
5084 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5085 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5086 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5087 cur = cur->c2;
5088 }
5089 }
Owen Taylor3473f882001-02-23 17:55:21 +00005090 NEXT;
5091 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005092 if (ret != NULL) {
5093 int found = 0;
5094
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005095 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5096 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5097 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005098 else
5099 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005100 /*
5101 * Some normalization:
5102 * (a | b*)+ == (a | b)*
5103 * (a | b?)+ == (a | b)*
5104 */
5105 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5106 if ((cur->c1 != NULL) &&
5107 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5108 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5109 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5110 found = 1;
5111 }
5112 if ((cur->c2 != NULL) &&
5113 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5114 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5115 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5116 found = 1;
5117 }
5118 cur = cur->c2;
5119 }
5120 if (found)
5121 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5122 }
Owen Taylor3473f882001-02-23 17:55:21 +00005123 NEXT;
5124 }
5125 return(ret);
5126}
5127
5128/**
5129 * xmlParseElementContentDecl:
5130 * @ctxt: an XML parser context
5131 * @name: the name of the element being defined.
5132 * @result: the Element Content pointer will be stored here if any
5133 *
5134 * parse the declaration for an Element content either Mixed or Children,
5135 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5136 *
5137 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5138 *
5139 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5140 */
5141
5142int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005143xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005144 xmlElementContentPtr *result) {
5145
5146 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005147 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005148 int res;
5149
5150 *result = NULL;
5151
5152 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005153 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005154 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005155 return(-1);
5156 }
5157 NEXT;
5158 GROW;
5159 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005160 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005161 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005162 res = XML_ELEMENT_TYPE_MIXED;
5163 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005164 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005165 res = XML_ELEMENT_TYPE_ELEMENT;
5166 }
Owen Taylor3473f882001-02-23 17:55:21 +00005167 SKIP_BLANKS;
5168 *result = tree;
5169 return(res);
5170}
5171
5172/**
5173 * xmlParseElementDecl:
5174 * @ctxt: an XML parser context
5175 *
5176 * parse an Element declaration.
5177 *
5178 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5179 *
5180 * [ VC: Unique Element Type Declaration ]
5181 * No element type may be declared more than once
5182 *
5183 * Returns the type of the element, or -1 in case of error
5184 */
5185int
5186xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005187 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005188 int ret = -1;
5189 xmlElementContentPtr content = NULL;
5190
Daniel Veillard4c778d82005-01-23 17:37:44 +00005191 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005192 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005193 xmlParserInputPtr input = ctxt->input;
5194
5195 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005196 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005197 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5198 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005199 }
5200 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005201 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005202 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005203 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5204 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005205 return(-1);
5206 }
5207 while ((RAW == 0) && (ctxt->inputNr > 1))
5208 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005209 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005210 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5211 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005212 }
5213 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005214 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005215 SKIP(5);
5216 /*
5217 * Element must always be empty.
5218 */
5219 ret = XML_ELEMENT_TYPE_EMPTY;
5220 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5221 (NXT(2) == 'Y')) {
5222 SKIP(3);
5223 /*
5224 * Element is a generic container.
5225 */
5226 ret = XML_ELEMENT_TYPE_ANY;
5227 } else if (RAW == '(') {
5228 ret = xmlParseElementContentDecl(ctxt, name, &content);
5229 } else {
5230 /*
5231 * [ WFC: PEs in Internal Subset ] error handling.
5232 */
5233 if ((RAW == '%') && (ctxt->external == 0) &&
5234 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005235 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005236 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005237 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005238 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005239 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5240 }
Owen Taylor3473f882001-02-23 17:55:21 +00005241 return(-1);
5242 }
5243
5244 SKIP_BLANKS;
5245 /*
5246 * Pop-up of finished entities.
5247 */
5248 while ((RAW == 0) && (ctxt->inputNr > 1))
5249 xmlPopInput(ctxt);
5250 SKIP_BLANKS;
5251
5252 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005253 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005254 if (content != NULL) {
5255 xmlFreeDocElementContent(ctxt->myDoc, content);
5256 }
Owen Taylor3473f882001-02-23 17:55:21 +00005257 } else {
5258 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005259 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5260 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005261 }
5262
5263 NEXT;
5264 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005265 (ctxt->sax->elementDecl != NULL)) {
5266 if (content != NULL)
5267 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005268 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5269 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005270 if ((content != NULL) && (content->parent == NULL)) {
5271 /*
5272 * this is a trick: if xmlAddElementDecl is called,
5273 * instead of copying the full tree it is plugged directly
5274 * if called from the parser. Avoid duplicating the
5275 * interfaces or change the API/ABI
5276 */
5277 xmlFreeDocElementContent(ctxt->myDoc, content);
5278 }
5279 } else if (content != NULL) {
5280 xmlFreeDocElementContent(ctxt->myDoc, content);
5281 }
Owen Taylor3473f882001-02-23 17:55:21 +00005282 }
Owen Taylor3473f882001-02-23 17:55:21 +00005283 }
5284 return(ret);
5285}
5286
5287/**
Owen Taylor3473f882001-02-23 17:55:21 +00005288 * xmlParseConditionalSections
5289 * @ctxt: an XML parser context
5290 *
5291 * [61] conditionalSect ::= includeSect | ignoreSect
5292 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5293 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5294 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5295 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5296 */
5297
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005298static void
Owen Taylor3473f882001-02-23 17:55:21 +00005299xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5300 SKIP(3);
5301 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005302 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005303 SKIP(7);
5304 SKIP_BLANKS;
5305 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005306 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005307 } else {
5308 NEXT;
5309 }
5310 if (xmlParserDebugEntities) {
5311 if ((ctxt->input != NULL) && (ctxt->input->filename))
5312 xmlGenericError(xmlGenericErrorContext,
5313 "%s(%d): ", ctxt->input->filename,
5314 ctxt->input->line);
5315 xmlGenericError(xmlGenericErrorContext,
5316 "Entering INCLUDE Conditional Section\n");
5317 }
5318
5319 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5320 (NXT(2) != '>'))) {
5321 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005322 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005323
5324 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5325 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005326 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005327 NEXT;
5328 } else if (RAW == '%') {
5329 xmlParsePEReference(ctxt);
5330 } else
5331 xmlParseMarkupDecl(ctxt);
5332
5333 /*
5334 * Pop-up of finished entities.
5335 */
5336 while ((RAW == 0) && (ctxt->inputNr > 1))
5337 xmlPopInput(ctxt);
5338
Daniel Veillardfdc91562002-07-01 21:52:03 +00005339 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005340 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005341 break;
5342 }
5343 }
5344 if (xmlParserDebugEntities) {
5345 if ((ctxt->input != NULL) && (ctxt->input->filename))
5346 xmlGenericError(xmlGenericErrorContext,
5347 "%s(%d): ", ctxt->input->filename,
5348 ctxt->input->line);
5349 xmlGenericError(xmlGenericErrorContext,
5350 "Leaving INCLUDE Conditional Section\n");
5351 }
5352
Daniel Veillarda07050d2003-10-19 14:46:32 +00005353 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005354 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005355 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005356 int depth = 0;
5357
5358 SKIP(6);
5359 SKIP_BLANKS;
5360 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005361 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005362 } else {
5363 NEXT;
5364 }
5365 if (xmlParserDebugEntities) {
5366 if ((ctxt->input != NULL) && (ctxt->input->filename))
5367 xmlGenericError(xmlGenericErrorContext,
5368 "%s(%d): ", ctxt->input->filename,
5369 ctxt->input->line);
5370 xmlGenericError(xmlGenericErrorContext,
5371 "Entering IGNORE Conditional Section\n");
5372 }
5373
5374 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005375 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005376 * But disable SAX event generating DTD building in the meantime
5377 */
5378 state = ctxt->disableSAX;
5379 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005380 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005381 ctxt->instate = XML_PARSER_IGNORE;
5382
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005383 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005384 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5385 depth++;
5386 SKIP(3);
5387 continue;
5388 }
5389 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5390 if (--depth >= 0) SKIP(3);
5391 continue;
5392 }
5393 NEXT;
5394 continue;
5395 }
5396
5397 ctxt->disableSAX = state;
5398 ctxt->instate = instate;
5399
5400 if (xmlParserDebugEntities) {
5401 if ((ctxt->input != NULL) && (ctxt->input->filename))
5402 xmlGenericError(xmlGenericErrorContext,
5403 "%s(%d): ", ctxt->input->filename,
5404 ctxt->input->line);
5405 xmlGenericError(xmlGenericErrorContext,
5406 "Leaving IGNORE Conditional Section\n");
5407 }
5408
5409 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005410 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005411 }
5412
5413 if (RAW == 0)
5414 SHRINK;
5415
5416 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005417 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 } else {
5419 SKIP(3);
5420 }
5421}
5422
5423/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005424 * xmlParseMarkupDecl:
5425 * @ctxt: an XML parser context
5426 *
5427 * parse Markup declarations
5428 *
5429 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5430 * NotationDecl | PI | Comment
5431 *
5432 * [ VC: Proper Declaration/PE Nesting ]
5433 * Parameter-entity replacement text must be properly nested with
5434 * markup declarations. That is to say, if either the first character
5435 * or the last character of a markup declaration (markupdecl above) is
5436 * contained in the replacement text for a parameter-entity reference,
5437 * both must be contained in the same replacement text.
5438 *
5439 * [ WFC: PEs in Internal Subset ]
5440 * In the internal DTD subset, parameter-entity references can occur
5441 * only where markup declarations can occur, not within markup declarations.
5442 * (This does not apply to references that occur in external parameter
5443 * entities or to the external subset.)
5444 */
5445void
5446xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5447 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005448 if (CUR == '<') {
5449 if (NXT(1) == '!') {
5450 switch (NXT(2)) {
5451 case 'E':
5452 if (NXT(3) == 'L')
5453 xmlParseElementDecl(ctxt);
5454 else if (NXT(3) == 'N')
5455 xmlParseEntityDecl(ctxt);
5456 break;
5457 case 'A':
5458 xmlParseAttributeListDecl(ctxt);
5459 break;
5460 case 'N':
5461 xmlParseNotationDecl(ctxt);
5462 break;
5463 case '-':
5464 xmlParseComment(ctxt);
5465 break;
5466 default:
5467 /* there is an error but it will be detected later */
5468 break;
5469 }
5470 } else if (NXT(1) == '?') {
5471 xmlParsePI(ctxt);
5472 }
5473 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005474 /*
5475 * This is only for internal subset. On external entities,
5476 * the replacement is done before parsing stage
5477 */
5478 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5479 xmlParsePEReference(ctxt);
5480
5481 /*
5482 * Conditional sections are allowed from entities included
5483 * by PE References in the internal subset.
5484 */
5485 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5486 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5487 xmlParseConditionalSections(ctxt);
5488 }
5489 }
5490
5491 ctxt->instate = XML_PARSER_DTD;
5492}
5493
5494/**
5495 * xmlParseTextDecl:
5496 * @ctxt: an XML parser context
5497 *
5498 * parse an XML declaration header for external entities
5499 *
5500 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5501 *
5502 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5503 */
5504
5505void
5506xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5507 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005508 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005509
5510 /*
5511 * We know that '<?xml' is here.
5512 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005513 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005514 SKIP(5);
5515 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005516 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005517 return;
5518 }
5519
William M. Brack76e95df2003-10-18 16:20:14 +00005520 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005521 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5522 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005523 }
5524 SKIP_BLANKS;
5525
5526 /*
5527 * We may have the VersionInfo here.
5528 */
5529 version = xmlParseVersionInfo(ctxt);
5530 if (version == NULL)
5531 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005532 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005533 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005534 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5535 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005536 }
5537 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005538 ctxt->input->version = version;
5539
5540 /*
5541 * We must have the encoding declaration
5542 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005543 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005544 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5545 /*
5546 * The XML REC instructs us to stop parsing right here
5547 */
5548 return;
5549 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005550 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5551 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5552 "Missing encoding in text declaration\n");
5553 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005554
5555 SKIP_BLANKS;
5556 if ((RAW == '?') && (NXT(1) == '>')) {
5557 SKIP(2);
5558 } else if (RAW == '>') {
5559 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005560 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005561 NEXT;
5562 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005563 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005564 MOVETO_ENDTAG(CUR_PTR);
5565 NEXT;
5566 }
5567}
5568
5569/**
Owen Taylor3473f882001-02-23 17:55:21 +00005570 * xmlParseExternalSubset:
5571 * @ctxt: an XML parser context
5572 * @ExternalID: the external identifier
5573 * @SystemID: the system identifier (or URL)
5574 *
5575 * parse Markup declarations from an external subset
5576 *
5577 * [30] extSubset ::= textDecl? extSubsetDecl
5578 *
5579 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5580 */
5581void
5582xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5583 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005584 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005585 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005586 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005587 xmlParseTextDecl(ctxt);
5588 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5589 /*
5590 * The XML REC instructs us to stop parsing right here
5591 */
5592 ctxt->instate = XML_PARSER_EOF;
5593 return;
5594 }
5595 }
5596 if (ctxt->myDoc == NULL) {
5597 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5598 }
5599 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5600 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5601
5602 ctxt->instate = XML_PARSER_DTD;
5603 ctxt->external = 1;
5604 while (((RAW == '<') && (NXT(1) == '?')) ||
5605 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005606 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005607 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005608 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005609
5610 GROW;
5611 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5612 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005613 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005614 NEXT;
5615 } else if (RAW == '%') {
5616 xmlParsePEReference(ctxt);
5617 } else
5618 xmlParseMarkupDecl(ctxt);
5619
5620 /*
5621 * Pop-up of finished entities.
5622 */
5623 while ((RAW == 0) && (ctxt->inputNr > 1))
5624 xmlPopInput(ctxt);
5625
Daniel Veillardfdc91562002-07-01 21:52:03 +00005626 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005627 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005628 break;
5629 }
5630 }
5631
5632 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005633 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005634 }
5635
5636}
5637
5638/**
5639 * xmlParseReference:
5640 * @ctxt: an XML parser context
5641 *
5642 * parse and handle entity references in content, depending on the SAX
5643 * interface, this may end-up in a call to character() if this is a
5644 * CharRef, a predefined entity, if there is no reference() callback.
5645 * or if the parser was asked to switch to that mode.
5646 *
5647 * [67] Reference ::= EntityRef | CharRef
5648 */
5649void
5650xmlParseReference(xmlParserCtxtPtr ctxt) {
5651 xmlEntityPtr ent;
5652 xmlChar *val;
5653 if (RAW != '&') return;
5654
5655 if (NXT(1) == '#') {
5656 int i = 0;
5657 xmlChar out[10];
5658 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005659 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005660
5661 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5662 /*
5663 * So we are using non-UTF-8 buffers
5664 * Check that the char fit on 8bits, if not
5665 * generate a CharRef.
5666 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005667 if (value <= 0xFF) {
5668 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005669 out[1] = 0;
5670 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5671 (!ctxt->disableSAX))
5672 ctxt->sax->characters(ctxt->userData, out, 1);
5673 } else {
5674 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005675 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005676 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005677 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005678 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5679 (!ctxt->disableSAX))
5680 ctxt->sax->reference(ctxt->userData, out);
5681 }
5682 } else {
5683 /*
5684 * Just encode the value in UTF-8
5685 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005686 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005687 out[i] = 0;
5688 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5689 (!ctxt->disableSAX))
5690 ctxt->sax->characters(ctxt->userData, out, i);
5691 }
5692 } else {
5693 ent = xmlParseEntityRef(ctxt);
5694 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005695 if (!ctxt->wellFormed)
5696 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005697 if ((ent->name != NULL) &&
5698 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5699 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005700 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005701
5702
5703 /*
5704 * The first reference to the entity trigger a parsing phase
5705 * where the ent->children is filled with the result from
5706 * the parsing.
5707 */
5708 if (ent->children == NULL) {
5709 xmlChar *value;
5710 value = ent->content;
5711
5712 /*
5713 * Check that this entity is well formed
5714 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005715 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005716 (value[1] == 0) && (value[0] == '<') &&
5717 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5718 /*
5719 * DONE: get definite answer on this !!!
5720 * Lots of entity decls are used to declare a single
5721 * char
5722 * <!ENTITY lt "<">
5723 * Which seems to be valid since
5724 * 2.4: The ampersand character (&) and the left angle
5725 * bracket (<) may appear in their literal form only
5726 * when used ... They are also legal within the literal
5727 * entity value of an internal entity declaration;i
5728 * see "4.3.2 Well-Formed Parsed Entities".
5729 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5730 * Looking at the OASIS test suite and James Clark
5731 * tests, this is broken. However the XML REC uses
5732 * it. Is the XML REC not well-formed ????
5733 * This is a hack to avoid this problem
5734 *
5735 * ANSWER: since lt gt amp .. are already defined,
5736 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005737 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005738 * is lousy but acceptable.
5739 */
5740 list = xmlNewDocText(ctxt->myDoc, value);
5741 if (list != NULL) {
5742 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5743 (ent->children == NULL)) {
5744 ent->children = list;
5745 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005746 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005747 list->parent = (xmlNodePtr) ent;
5748 } else {
5749 xmlFreeNodeList(list);
5750 }
5751 } else if (list != NULL) {
5752 xmlFreeNodeList(list);
5753 }
5754 } else {
5755 /*
5756 * 4.3.2: An internal general parsed entity is well-formed
5757 * if its replacement text matches the production labeled
5758 * content.
5759 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005760
5761 void *user_data;
5762 /*
5763 * This is a bit hackish but this seems the best
5764 * way to make sure both SAX and DOM entity support
5765 * behaves okay.
5766 */
5767 if (ctxt->userData == ctxt)
5768 user_data = NULL;
5769 else
5770 user_data = ctxt->userData;
5771
Owen Taylor3473f882001-02-23 17:55:21 +00005772 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5773 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005774 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5775 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005776 ctxt->depth--;
5777 } else if (ent->etype ==
5778 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5779 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005780 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005781 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005782 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005783 ctxt->depth--;
5784 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005785 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005786 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5787 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005788 }
5789 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005790 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005791 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005792 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005793 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5794 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005795 (ent->children == NULL)) {
5796 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005797 if (ctxt->replaceEntities) {
5798 /*
5799 * Prune it directly in the generated document
5800 * except for single text nodes.
5801 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005802 if (((list->type == XML_TEXT_NODE) &&
5803 (list->next == NULL)) ||
5804 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005805 list->parent = (xmlNodePtr) ent;
5806 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005807 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005808 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005809 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005810 while (list != NULL) {
5811 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005812 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005813 if (list->next == NULL)
5814 ent->last = list;
5815 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005816 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005817 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005818#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005819 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5820 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005821#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005822 }
5823 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005824 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005825 while (list != NULL) {
5826 list->parent = (xmlNodePtr) ent;
5827 if (list->next == NULL)
5828 ent->last = list;
5829 list = list->next;
5830 }
Owen Taylor3473f882001-02-23 17:55:21 +00005831 }
5832 } else {
5833 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005834 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005835 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005836 } else if ((ret != XML_ERR_OK) &&
5837 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005838 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005839 } else if (list != NULL) {
5840 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005841 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005842 }
5843 }
5844 }
5845 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5846 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5847 /*
5848 * Create a node.
5849 */
5850 ctxt->sax->reference(ctxt->userData, ent->name);
5851 return;
5852 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005853 /*
5854 * There is a problem on the handling of _private for entities
5855 * (bug 155816): Should we copy the content of the field from
5856 * the entity (possibly overwriting some value set by the user
5857 * when a copy is created), should we leave it alone, or should
5858 * we try to take care of different situations? The problem
5859 * is exacerbated by the usage of this field by the xmlReader.
5860 * To fix this bug, we look at _private on the created node
5861 * and, if it's NULL, we copy in whatever was in the entity.
5862 * If it's not NULL we leave it alone. This is somewhat of a
5863 * hack - maybe we should have further tests to determine
5864 * what to do.
5865 */
Owen Taylor3473f882001-02-23 17:55:21 +00005866 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5867 /*
5868 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005869 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005870 * In the first occurrence list contains the replacement.
5871 * progressive == 2 means we are operating on the Reader
5872 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005873 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005874 if (((list == NULL) && (ent->owner == 0)) ||
5875 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005876 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005877
5878 /*
5879 * when operating on a reader, the entities definitions
5880 * are always owning the entities subtree.
5881 if (ctxt->parseMode == XML_PARSE_READER)
5882 ent->owner = 1;
5883 */
5884
Daniel Veillard62f313b2001-07-04 19:49:14 +00005885 cur = ent->children;
5886 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005887 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005888 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005889 if (nw->_private == NULL)
5890 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005891 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005892 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005893 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005894 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005895 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005896 if (cur == ent->last) {
5897 /*
5898 * needed to detect some strange empty
5899 * node cases in the reader tests
5900 */
5901 if ((ctxt->parseMode == XML_PARSE_READER) &&
5902 (nw->type == XML_ELEMENT_NODE) &&
5903 (nw->children == NULL))
5904 nw->extra = 1;
5905
Daniel Veillard62f313b2001-07-04 19:49:14 +00005906 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005907 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005908 cur = cur->next;
5909 }
Daniel Veillard81273902003-09-30 00:43:48 +00005910#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005911 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005912 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005913#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005914 } else if (list == NULL) {
5915 xmlNodePtr nw = NULL, cur, next, last,
5916 firstChild = NULL;
5917 /*
5918 * Copy the entity child list and make it the new
5919 * entity child list. The goal is to make sure any
5920 * ID or REF referenced will be the one from the
5921 * document content and not the entity copy.
5922 */
5923 cur = ent->children;
5924 ent->children = NULL;
5925 last = ent->last;
5926 ent->last = NULL;
5927 while (cur != NULL) {
5928 next = cur->next;
5929 cur->next = NULL;
5930 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005931 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005932 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005933 if (nw->_private == NULL)
5934 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005935 if (firstChild == NULL){
5936 firstChild = cur;
5937 }
5938 xmlAddChild((xmlNodePtr) ent, nw);
5939 xmlAddChild(ctxt->node, cur);
5940 }
5941 if (cur == last)
5942 break;
5943 cur = next;
5944 }
5945 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005946#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005947 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5948 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005949#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005950 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005951 const xmlChar *nbktext;
5952
Daniel Veillard62f313b2001-07-04 19:49:14 +00005953 /*
5954 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005955 * node with a possible previous text one which
5956 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005957 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005958 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5959 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005960 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005961 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005962 if ((ent->last != ent->children) &&
5963 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005964 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005965 xmlAddChildList(ctxt->node, ent->children);
5966 }
5967
Owen Taylor3473f882001-02-23 17:55:21 +00005968 /*
5969 * This is to avoid a nasty side effect, see
5970 * characters() in SAX.c
5971 */
5972 ctxt->nodemem = 0;
5973 ctxt->nodelen = 0;
5974 return;
5975 } else {
5976 /*
5977 * Probably running in SAX mode
5978 */
5979 xmlParserInputPtr input;
5980
5981 input = xmlNewEntityInputStream(ctxt, ent);
5982 xmlPushInput(ctxt, input);
5983 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005984 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5985 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005986 xmlParseTextDecl(ctxt);
5987 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5988 /*
5989 * The XML REC instructs us to stop parsing right here
5990 */
5991 ctxt->instate = XML_PARSER_EOF;
5992 return;
5993 }
5994 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005995 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5996 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005997 }
5998 }
5999 return;
6000 }
6001 }
6002 } else {
6003 val = ent->content;
6004 if (val == NULL) return;
6005 /*
6006 * inline the entity.
6007 */
6008 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6009 (!ctxt->disableSAX))
6010 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6011 }
6012 }
6013}
6014
6015/**
6016 * xmlParseEntityRef:
6017 * @ctxt: an XML parser context
6018 *
6019 * parse ENTITY references declarations
6020 *
6021 * [68] EntityRef ::= '&' Name ';'
6022 *
6023 * [ WFC: Entity Declared ]
6024 * In a document without any DTD, a document with only an internal DTD
6025 * subset which contains no parameter entity references, or a document
6026 * with "standalone='yes'", the Name given in the entity reference
6027 * must match that in an entity declaration, except that well-formed
6028 * documents need not declare any of the following entities: amp, lt,
6029 * gt, apos, quot. The declaration of a parameter entity must precede
6030 * any reference to it. Similarly, the declaration of a general entity
6031 * must precede any reference to it which appears in a default value in an
6032 * attribute-list declaration. Note that if entities are declared in the
6033 * external subset or in external parameter entities, a non-validating
6034 * processor is not obligated to read and process their declarations;
6035 * for such documents, the rule that an entity must be declared is a
6036 * well-formedness constraint only if standalone='yes'.
6037 *
6038 * [ WFC: Parsed Entity ]
6039 * An entity reference must not contain the name of an unparsed entity
6040 *
6041 * Returns the xmlEntityPtr if found, or NULL otherwise.
6042 */
6043xmlEntityPtr
6044xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006045 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006046 xmlEntityPtr ent = NULL;
6047
6048 GROW;
6049
6050 if (RAW == '&') {
6051 NEXT;
6052 name = xmlParseName(ctxt);
6053 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006054 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6055 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006056 } else {
6057 if (RAW == ';') {
6058 NEXT;
6059 /*
6060 * Ask first SAX for entity resolution, otherwise try the
6061 * predefined set.
6062 */
6063 if (ctxt->sax != NULL) {
6064 if (ctxt->sax->getEntity != NULL)
6065 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006066 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006067 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006068 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6069 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006070 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006071 }
Owen Taylor3473f882001-02-23 17:55:21 +00006072 }
6073 /*
6074 * [ WFC: Entity Declared ]
6075 * In a document without any DTD, a document with only an
6076 * internal DTD subset which contains no parameter entity
6077 * references, or a document with "standalone='yes'", the
6078 * Name given in the entity reference must match that in an
6079 * entity declaration, except that well-formed documents
6080 * need not declare any of the following entities: amp, lt,
6081 * gt, apos, quot.
6082 * The declaration of a parameter entity must precede any
6083 * reference to it.
6084 * Similarly, the declaration of a general entity must
6085 * precede any reference to it which appears in a default
6086 * value in an attribute-list declaration. Note that if
6087 * entities are declared in the external subset or in
6088 * external parameter entities, a non-validating processor
6089 * is not obligated to read and process their declarations;
6090 * for such documents, the rule that an entity must be
6091 * declared is a well-formedness constraint only if
6092 * standalone='yes'.
6093 */
6094 if (ent == NULL) {
6095 if ((ctxt->standalone == 1) ||
6096 ((ctxt->hasExternalSubset == 0) &&
6097 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006098 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006099 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006100 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006101 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006102 "Entity '%s' not defined\n", name);
6103 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006104 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006105 }
6106
6107 /*
6108 * [ WFC: Parsed Entity ]
6109 * An entity reference must not contain the name of an
6110 * unparsed entity
6111 */
6112 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006113 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006114 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006115 }
6116
6117 /*
6118 * [ WFC: No External Entity References ]
6119 * Attribute values cannot contain direct or indirect
6120 * entity references to external entities.
6121 */
6122 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6123 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006124 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6125 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006126 }
6127 /*
6128 * [ WFC: No < in Attribute Values ]
6129 * The replacement text of any entity referred to directly or
6130 * indirectly in an attribute value (other than "&lt;") must
6131 * not contain a <.
6132 */
6133 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6134 (ent != NULL) &&
6135 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6136 (ent->content != NULL) &&
6137 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006138 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006139 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006140 }
6141
6142 /*
6143 * Internal check, no parameter entities here ...
6144 */
6145 else {
6146 switch (ent->etype) {
6147 case XML_INTERNAL_PARAMETER_ENTITY:
6148 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006149 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6150 "Attempt to reference the parameter entity '%s'\n",
6151 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006152 break;
6153 default:
6154 break;
6155 }
6156 }
6157
6158 /*
6159 * [ WFC: No Recursion ]
6160 * A parsed entity must not contain a recursive reference
6161 * to itself, either directly or indirectly.
6162 * Done somewhere else
6163 */
6164
6165 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006166 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006167 }
Owen Taylor3473f882001-02-23 17:55:21 +00006168 }
6169 }
6170 return(ent);
6171}
6172
6173/**
6174 * xmlParseStringEntityRef:
6175 * @ctxt: an XML parser context
6176 * @str: a pointer to an index in the string
6177 *
6178 * parse ENTITY references declarations, but this version parses it from
6179 * a string value.
6180 *
6181 * [68] EntityRef ::= '&' Name ';'
6182 *
6183 * [ WFC: Entity Declared ]
6184 * In a document without any DTD, a document with only an internal DTD
6185 * subset which contains no parameter entity references, or a document
6186 * with "standalone='yes'", the Name given in the entity reference
6187 * must match that in an entity declaration, except that well-formed
6188 * documents need not declare any of the following entities: amp, lt,
6189 * gt, apos, quot. The declaration of a parameter entity must precede
6190 * any reference to it. Similarly, the declaration of a general entity
6191 * must precede any reference to it which appears in a default value in an
6192 * attribute-list declaration. Note that if entities are declared in the
6193 * external subset or in external parameter entities, a non-validating
6194 * processor is not obligated to read and process their declarations;
6195 * for such documents, the rule that an entity must be declared is a
6196 * well-formedness constraint only if standalone='yes'.
6197 *
6198 * [ WFC: Parsed Entity ]
6199 * An entity reference must not contain the name of an unparsed entity
6200 *
6201 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6202 * is updated to the current location in the string.
6203 */
6204xmlEntityPtr
6205xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6206 xmlChar *name;
6207 const xmlChar *ptr;
6208 xmlChar cur;
6209 xmlEntityPtr ent = NULL;
6210
6211 if ((str == NULL) || (*str == NULL))
6212 return(NULL);
6213 ptr = *str;
6214 cur = *ptr;
6215 if (cur == '&') {
6216 ptr++;
6217 cur = *ptr;
6218 name = xmlParseStringName(ctxt, &ptr);
6219 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006220 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6221 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006222 } else {
6223 if (*ptr == ';') {
6224 ptr++;
6225 /*
6226 * Ask first SAX for entity resolution, otherwise try the
6227 * predefined set.
6228 */
6229 if (ctxt->sax != NULL) {
6230 if (ctxt->sax->getEntity != NULL)
6231 ent = ctxt->sax->getEntity(ctxt->userData, name);
6232 if (ent == NULL)
6233 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006234 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006235 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006236 }
Owen Taylor3473f882001-02-23 17:55:21 +00006237 }
6238 /*
6239 * [ WFC: Entity Declared ]
6240 * In a document without any DTD, a document with only an
6241 * internal DTD subset which contains no parameter entity
6242 * references, or a document with "standalone='yes'", the
6243 * Name given in the entity reference must match that in an
6244 * entity declaration, except that well-formed documents
6245 * need not declare any of the following entities: amp, lt,
6246 * gt, apos, quot.
6247 * The declaration of a parameter entity must precede any
6248 * reference to it.
6249 * Similarly, the declaration of a general entity must
6250 * precede any reference to it which appears in a default
6251 * value in an attribute-list declaration. Note that if
6252 * entities are declared in the external subset or in
6253 * external parameter entities, a non-validating processor
6254 * is not obligated to read and process their declarations;
6255 * for such documents, the rule that an entity must be
6256 * declared is a well-formedness constraint only if
6257 * standalone='yes'.
6258 */
6259 if (ent == NULL) {
6260 if ((ctxt->standalone == 1) ||
6261 ((ctxt->hasExternalSubset == 0) &&
6262 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006263 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006264 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006265 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006266 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006267 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006268 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006269 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006270 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006271 }
6272
6273 /*
6274 * [ WFC: Parsed Entity ]
6275 * An entity reference must not contain the name of an
6276 * unparsed entity
6277 */
6278 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006279 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006280 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006281 }
6282
6283 /*
6284 * [ WFC: No External Entity References ]
6285 * Attribute values cannot contain direct or indirect
6286 * entity references to external entities.
6287 */
6288 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6289 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006290 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006291 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006292 }
6293 /*
6294 * [ WFC: No < in Attribute Values ]
6295 * The replacement text of any entity referred to directly or
6296 * indirectly in an attribute value (other than "&lt;") must
6297 * not contain a <.
6298 */
6299 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6300 (ent != NULL) &&
6301 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6302 (ent->content != NULL) &&
6303 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006304 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6305 "'<' in entity '%s' is not allowed in attributes values\n",
6306 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006307 }
6308
6309 /*
6310 * Internal check, no parameter entities here ...
6311 */
6312 else {
6313 switch (ent->etype) {
6314 case XML_INTERNAL_PARAMETER_ENTITY:
6315 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006316 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6317 "Attempt to reference the parameter entity '%s'\n",
6318 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006319 break;
6320 default:
6321 break;
6322 }
6323 }
6324
6325 /*
6326 * [ WFC: No Recursion ]
6327 * A parsed entity must not contain a recursive reference
6328 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006329 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006330 */
6331
6332 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006333 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006334 }
6335 xmlFree(name);
6336 }
6337 }
6338 *str = ptr;
6339 return(ent);
6340}
6341
6342/**
6343 * xmlParsePEReference:
6344 * @ctxt: an XML parser context
6345 *
6346 * parse PEReference declarations
6347 * The entity content is handled directly by pushing it's content as
6348 * a new input stream.
6349 *
6350 * [69] PEReference ::= '%' Name ';'
6351 *
6352 * [ WFC: No Recursion ]
6353 * A parsed entity must not contain a recursive
6354 * reference to itself, either directly or indirectly.
6355 *
6356 * [ WFC: Entity Declared ]
6357 * In a document without any DTD, a document with only an internal DTD
6358 * subset which contains no parameter entity references, or a document
6359 * with "standalone='yes'", ... ... The declaration of a parameter
6360 * entity must precede any reference to it...
6361 *
6362 * [ VC: Entity Declared ]
6363 * In a document with an external subset or external parameter entities
6364 * with "standalone='no'", ... ... The declaration of a parameter entity
6365 * must precede any reference to it...
6366 *
6367 * [ WFC: In DTD ]
6368 * Parameter-entity references may only appear in the DTD.
6369 * NOTE: misleading but this is handled.
6370 */
6371void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006372xmlParsePEReference(xmlParserCtxtPtr ctxt)
6373{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006374 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006375 xmlEntityPtr entity = NULL;
6376 xmlParserInputPtr input;
6377
6378 if (RAW == '%') {
6379 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006380 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006381 if (name == NULL) {
6382 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6383 "xmlParsePEReference: no name\n");
6384 } else {
6385 if (RAW == ';') {
6386 NEXT;
6387 if ((ctxt->sax != NULL) &&
6388 (ctxt->sax->getParameterEntity != NULL))
6389 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6390 name);
6391 if (entity == NULL) {
6392 /*
6393 * [ WFC: Entity Declared ]
6394 * In a document without any DTD, a document with only an
6395 * internal DTD subset which contains no parameter entity
6396 * references, or a document with "standalone='yes'", ...
6397 * ... The declaration of a parameter entity must precede
6398 * any reference to it...
6399 */
6400 if ((ctxt->standalone == 1) ||
6401 ((ctxt->hasExternalSubset == 0) &&
6402 (ctxt->hasPErefs == 0))) {
6403 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6404 "PEReference: %%%s; not found\n",
6405 name);
6406 } else {
6407 /*
6408 * [ VC: Entity Declared ]
6409 * In a document with an external subset or external
6410 * parameter entities with "standalone='no'", ...
6411 * ... The declaration of a parameter entity must
6412 * precede any reference to it...
6413 */
6414 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6415 "PEReference: %%%s; not found\n",
6416 name, NULL);
6417 ctxt->valid = 0;
6418 }
6419 } else {
6420 /*
6421 * Internal checking in case the entity quest barfed
6422 */
6423 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6424 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6425 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6426 "Internal: %%%s; is not a parameter entity\n",
6427 name, NULL);
6428 } else if (ctxt->input->free != deallocblankswrapper) {
6429 input =
6430 xmlNewBlanksWrapperInputStream(ctxt, entity);
6431 xmlPushInput(ctxt, input);
6432 } else {
6433 /*
6434 * TODO !!!
6435 * handle the extra spaces added before and after
6436 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6437 */
6438 input = xmlNewEntityInputStream(ctxt, entity);
6439 xmlPushInput(ctxt, input);
6440 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006441 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006442 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006443 xmlParseTextDecl(ctxt);
6444 if (ctxt->errNo ==
6445 XML_ERR_UNSUPPORTED_ENCODING) {
6446 /*
6447 * The XML REC instructs us to stop parsing
6448 * right here
6449 */
6450 ctxt->instate = XML_PARSER_EOF;
6451 return;
6452 }
6453 }
6454 }
6455 }
6456 ctxt->hasPErefs = 1;
6457 } else {
6458 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6459 }
6460 }
Owen Taylor3473f882001-02-23 17:55:21 +00006461 }
6462}
6463
6464/**
6465 * xmlParseStringPEReference:
6466 * @ctxt: an XML parser context
6467 * @str: a pointer to an index in the string
6468 *
6469 * parse PEReference declarations
6470 *
6471 * [69] PEReference ::= '%' Name ';'
6472 *
6473 * [ WFC: No Recursion ]
6474 * A parsed entity must not contain a recursive
6475 * reference to itself, either directly or indirectly.
6476 *
6477 * [ WFC: Entity Declared ]
6478 * In a document without any DTD, a document with only an internal DTD
6479 * subset which contains no parameter entity references, or a document
6480 * with "standalone='yes'", ... ... The declaration of a parameter
6481 * entity must precede any reference to it...
6482 *
6483 * [ VC: Entity Declared ]
6484 * In a document with an external subset or external parameter entities
6485 * with "standalone='no'", ... ... The declaration of a parameter entity
6486 * must precede any reference to it...
6487 *
6488 * [ WFC: In DTD ]
6489 * Parameter-entity references may only appear in the DTD.
6490 * NOTE: misleading but this is handled.
6491 *
6492 * Returns the string of the entity content.
6493 * str is updated to the current value of the index
6494 */
6495xmlEntityPtr
6496xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6497 const xmlChar *ptr;
6498 xmlChar cur;
6499 xmlChar *name;
6500 xmlEntityPtr entity = NULL;
6501
6502 if ((str == NULL) || (*str == NULL)) return(NULL);
6503 ptr = *str;
6504 cur = *ptr;
6505 if (cur == '%') {
6506 ptr++;
6507 cur = *ptr;
6508 name = xmlParseStringName(ctxt, &ptr);
6509 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006510 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6511 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006512 } else {
6513 cur = *ptr;
6514 if (cur == ';') {
6515 ptr++;
6516 cur = *ptr;
6517 if ((ctxt->sax != NULL) &&
6518 (ctxt->sax->getParameterEntity != NULL))
6519 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6520 name);
6521 if (entity == NULL) {
6522 /*
6523 * [ WFC: Entity Declared ]
6524 * In a document without any DTD, a document with only an
6525 * internal DTD subset which contains no parameter entity
6526 * references, or a document with "standalone='yes'", ...
6527 * ... The declaration of a parameter entity must precede
6528 * any reference to it...
6529 */
6530 if ((ctxt->standalone == 1) ||
6531 ((ctxt->hasExternalSubset == 0) &&
6532 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006533 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006534 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006535 } else {
6536 /*
6537 * [ VC: Entity Declared ]
6538 * In a document with an external subset or external
6539 * parameter entities with "standalone='no'", ...
6540 * ... The declaration of a parameter entity must
6541 * precede any reference to it...
6542 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006543 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6544 "PEReference: %%%s; not found\n",
6545 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006546 ctxt->valid = 0;
6547 }
6548 } else {
6549 /*
6550 * Internal checking in case the entity quest barfed
6551 */
6552 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6553 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006554 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6555 "%%%s; is not a parameter entity\n",
6556 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006557 }
6558 }
6559 ctxt->hasPErefs = 1;
6560 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006561 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006562 }
6563 xmlFree(name);
6564 }
6565 }
6566 *str = ptr;
6567 return(entity);
6568}
6569
6570/**
6571 * xmlParseDocTypeDecl:
6572 * @ctxt: an XML parser context
6573 *
6574 * parse a DOCTYPE declaration
6575 *
6576 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6577 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6578 *
6579 * [ VC: Root Element Type ]
6580 * The Name in the document type declaration must match the element
6581 * type of the root element.
6582 */
6583
6584void
6585xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006586 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006587 xmlChar *ExternalID = NULL;
6588 xmlChar *URI = NULL;
6589
6590 /*
6591 * We know that '<!DOCTYPE' has been detected.
6592 */
6593 SKIP(9);
6594
6595 SKIP_BLANKS;
6596
6597 /*
6598 * Parse the DOCTYPE name.
6599 */
6600 name = xmlParseName(ctxt);
6601 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006602 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6603 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006604 }
6605 ctxt->intSubName = name;
6606
6607 SKIP_BLANKS;
6608
6609 /*
6610 * Check for SystemID and ExternalID
6611 */
6612 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6613
6614 if ((URI != NULL) || (ExternalID != NULL)) {
6615 ctxt->hasExternalSubset = 1;
6616 }
6617 ctxt->extSubURI = URI;
6618 ctxt->extSubSystem = ExternalID;
6619
6620 SKIP_BLANKS;
6621
6622 /*
6623 * Create and update the internal subset.
6624 */
6625 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6626 (!ctxt->disableSAX))
6627 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6628
6629 /*
6630 * Is there any internal subset declarations ?
6631 * they are handled separately in xmlParseInternalSubset()
6632 */
6633 if (RAW == '[')
6634 return;
6635
6636 /*
6637 * We should be at the end of the DOCTYPE declaration.
6638 */
6639 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006640 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006641 }
6642 NEXT;
6643}
6644
6645/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006646 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006647 * @ctxt: an XML parser context
6648 *
6649 * parse the internal subset declaration
6650 *
6651 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6652 */
6653
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006654static void
Owen Taylor3473f882001-02-23 17:55:21 +00006655xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6656 /*
6657 * Is there any DTD definition ?
6658 */
6659 if (RAW == '[') {
6660 ctxt->instate = XML_PARSER_DTD;
6661 NEXT;
6662 /*
6663 * Parse the succession of Markup declarations and
6664 * PEReferences.
6665 * Subsequence (markupdecl | PEReference | S)*
6666 */
6667 while (RAW != ']') {
6668 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006669 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006670
6671 SKIP_BLANKS;
6672 xmlParseMarkupDecl(ctxt);
6673 xmlParsePEReference(ctxt);
6674
6675 /*
6676 * Pop-up of finished entities.
6677 */
6678 while ((RAW == 0) && (ctxt->inputNr > 1))
6679 xmlPopInput(ctxt);
6680
6681 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006682 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006683 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006684 break;
6685 }
6686 }
6687 if (RAW == ']') {
6688 NEXT;
6689 SKIP_BLANKS;
6690 }
6691 }
6692
6693 /*
6694 * We should be at the end of the DOCTYPE declaration.
6695 */
6696 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006697 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006698 }
6699 NEXT;
6700}
6701
Daniel Veillard81273902003-09-30 00:43:48 +00006702#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006703/**
6704 * xmlParseAttribute:
6705 * @ctxt: an XML parser context
6706 * @value: a xmlChar ** used to store the value of the attribute
6707 *
6708 * parse an attribute
6709 *
6710 * [41] Attribute ::= Name Eq AttValue
6711 *
6712 * [ WFC: No External Entity References ]
6713 * Attribute values cannot contain direct or indirect entity references
6714 * to external entities.
6715 *
6716 * [ WFC: No < in Attribute Values ]
6717 * The replacement text of any entity referred to directly or indirectly in
6718 * an attribute value (other than "&lt;") must not contain a <.
6719 *
6720 * [ VC: Attribute Value Type ]
6721 * The attribute must have been declared; the value must be of the type
6722 * declared for it.
6723 *
6724 * [25] Eq ::= S? '=' S?
6725 *
6726 * With namespace:
6727 *
6728 * [NS 11] Attribute ::= QName Eq AttValue
6729 *
6730 * Also the case QName == xmlns:??? is handled independently as a namespace
6731 * definition.
6732 *
6733 * Returns the attribute name, and the value in *value.
6734 */
6735
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006736const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006737xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006738 const xmlChar *name;
6739 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006740
6741 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006742 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006743 name = xmlParseName(ctxt);
6744 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006745 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006746 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006747 return(NULL);
6748 }
6749
6750 /*
6751 * read the value
6752 */
6753 SKIP_BLANKS;
6754 if (RAW == '=') {
6755 NEXT;
6756 SKIP_BLANKS;
6757 val = xmlParseAttValue(ctxt);
6758 ctxt->instate = XML_PARSER_CONTENT;
6759 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006760 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006761 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006762 return(NULL);
6763 }
6764
6765 /*
6766 * Check that xml:lang conforms to the specification
6767 * No more registered as an error, just generate a warning now
6768 * since this was deprecated in XML second edition
6769 */
6770 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6771 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006772 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6773 "Malformed value for xml:lang : %s\n",
6774 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006775 }
6776 }
6777
6778 /*
6779 * Check that xml:space conforms to the specification
6780 */
6781 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6782 if (xmlStrEqual(val, BAD_CAST "default"))
6783 *(ctxt->space) = 0;
6784 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6785 *(ctxt->space) = 1;
6786 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00006787 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006788"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00006789 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006790 }
6791 }
6792
6793 *value = val;
6794 return(name);
6795}
6796
6797/**
6798 * xmlParseStartTag:
6799 * @ctxt: an XML parser context
6800 *
6801 * parse a start of tag either for rule element or
6802 * EmptyElement. In both case we don't parse the tag closing chars.
6803 *
6804 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6805 *
6806 * [ WFC: Unique Att Spec ]
6807 * No attribute name may appear more than once in the same start-tag or
6808 * empty-element tag.
6809 *
6810 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6811 *
6812 * [ WFC: Unique Att Spec ]
6813 * No attribute name may appear more than once in the same start-tag or
6814 * empty-element tag.
6815 *
6816 * With namespace:
6817 *
6818 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6819 *
6820 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6821 *
6822 * Returns the element name parsed
6823 */
6824
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006825const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006826xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006827 const xmlChar *name;
6828 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006829 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006830 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006831 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006832 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006833 int i;
6834
6835 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006836 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006837
6838 name = xmlParseName(ctxt);
6839 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006840 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006841 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006842 return(NULL);
6843 }
6844
6845 /*
6846 * Now parse the attributes, it ends up with the ending
6847 *
6848 * (S Attribute)* S?
6849 */
6850 SKIP_BLANKS;
6851 GROW;
6852
Daniel Veillard21a0f912001-02-25 19:54:14 +00006853 while ((RAW != '>') &&
6854 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006855 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006856 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006857 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006858
6859 attname = xmlParseAttribute(ctxt, &attvalue);
6860 if ((attname != NULL) && (attvalue != NULL)) {
6861 /*
6862 * [ WFC: Unique Att Spec ]
6863 * No attribute name may appear more than once in the same
6864 * start-tag or empty-element tag.
6865 */
6866 for (i = 0; i < nbatts;i += 2) {
6867 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006868 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006869 xmlFree(attvalue);
6870 goto failed;
6871 }
6872 }
Owen Taylor3473f882001-02-23 17:55:21 +00006873 /*
6874 * Add the pair to atts
6875 */
6876 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006877 maxatts = 22; /* allow for 10 attrs by default */
6878 atts = (const xmlChar **)
6879 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006880 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006881 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006882 if (attvalue != NULL)
6883 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006884 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006885 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006886 ctxt->atts = atts;
6887 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006888 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006889 const xmlChar **n;
6890
Owen Taylor3473f882001-02-23 17:55:21 +00006891 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006892 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006893 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006894 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006895 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006896 if (attvalue != NULL)
6897 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006898 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006899 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006900 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006901 ctxt->atts = atts;
6902 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006903 }
6904 atts[nbatts++] = attname;
6905 atts[nbatts++] = attvalue;
6906 atts[nbatts] = NULL;
6907 atts[nbatts + 1] = NULL;
6908 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006909 if (attvalue != NULL)
6910 xmlFree(attvalue);
6911 }
6912
6913failed:
6914
Daniel Veillard3772de32002-12-17 10:31:45 +00006915 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006916 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6917 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006918 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006919 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6920 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006921 }
6922 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006923 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6924 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006925 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6926 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006927 break;
6928 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006929 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006930 GROW;
6931 }
6932
6933 /*
6934 * SAX: Start of Element !
6935 */
6936 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006937 (!ctxt->disableSAX)) {
6938 if (nbatts > 0)
6939 ctxt->sax->startElement(ctxt->userData, name, atts);
6940 else
6941 ctxt->sax->startElement(ctxt->userData, name, NULL);
6942 }
Owen Taylor3473f882001-02-23 17:55:21 +00006943
6944 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006945 /* Free only the content strings */
6946 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006947 if (atts[i] != NULL)
6948 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006949 }
6950 return(name);
6951}
6952
6953/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006954 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006955 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006956 * @line: line of the start tag
6957 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006958 *
6959 * parse an end of tag
6960 *
6961 * [42] ETag ::= '</' Name S? '>'
6962 *
6963 * With namespace
6964 *
6965 * [NS 9] ETag ::= '</' QName S? '>'
6966 */
6967
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006968static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006969xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006970 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006971
6972 GROW;
6973 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006974 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006975 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006976 return;
6977 }
6978 SKIP(2);
6979
Daniel Veillard46de64e2002-05-29 08:21:33 +00006980 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006981
6982 /*
6983 * We should definitely be at the ending "S? '>'" part
6984 */
6985 GROW;
6986 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006987 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006988 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006989 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006990 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006991
6992 /*
6993 * [ WFC: Element Type Match ]
6994 * The Name in an element's end-tag must match the element type in the
6995 * start-tag.
6996 *
6997 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006998 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006999 if (name == NULL) name = BAD_CAST "unparseable";
7000 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007001 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007002 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007003 }
7004
7005 /*
7006 * SAX: End of Tag
7007 */
7008 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7009 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007010 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007011
Daniel Veillarde57ec792003-09-10 10:50:59 +00007012 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007013 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007014 return;
7015}
7016
7017/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007018 * xmlParseEndTag:
7019 * @ctxt: an XML parser context
7020 *
7021 * parse an end of tag
7022 *
7023 * [42] ETag ::= '</' Name S? '>'
7024 *
7025 * With namespace
7026 *
7027 * [NS 9] ETag ::= '</' QName S? '>'
7028 */
7029
7030void
7031xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007032 xmlParseEndTag1(ctxt, 0);
7033}
Daniel Veillard81273902003-09-30 00:43:48 +00007034#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007035
7036/************************************************************************
7037 * *
7038 * SAX 2 specific operations *
7039 * *
7040 ************************************************************************/
7041
7042static const xmlChar *
7043xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7044 int len = 0, l;
7045 int c;
7046 int count = 0;
7047
7048 /*
7049 * Handler for more complex cases
7050 */
7051 GROW;
7052 c = CUR_CHAR(l);
7053 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007054 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007055 return(NULL);
7056 }
7057
7058 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007059 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007060 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007061 (IS_COMBINING(c)) ||
7062 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007063 if (count++ > 100) {
7064 count = 0;
7065 GROW;
7066 }
7067 len += l;
7068 NEXTL(l);
7069 c = CUR_CHAR(l);
7070 }
7071 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7072}
7073
7074/*
7075 * xmlGetNamespace:
7076 * @ctxt: an XML parser context
7077 * @prefix: the prefix to lookup
7078 *
7079 * Lookup the namespace name for the @prefix (which ca be NULL)
7080 * The prefix must come from the @ctxt->dict dictionnary
7081 *
7082 * Returns the namespace name or NULL if not bound
7083 */
7084static const xmlChar *
7085xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7086 int i;
7087
Daniel Veillarde57ec792003-09-10 10:50:59 +00007088 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007089 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007090 if (ctxt->nsTab[i] == prefix) {
7091 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7092 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007093 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007094 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007095 return(NULL);
7096}
7097
7098/**
7099 * xmlParseNCName:
7100 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007101 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007102 *
7103 * parse an XML name.
7104 *
7105 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7106 * CombiningChar | Extender
7107 *
7108 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7109 *
7110 * Returns the Name parsed or NULL
7111 */
7112
7113static const xmlChar *
7114xmlParseNCName(xmlParserCtxtPtr ctxt) {
7115 const xmlChar *in;
7116 const xmlChar *ret;
7117 int count = 0;
7118
7119 /*
7120 * Accelerator for simple ASCII names
7121 */
7122 in = ctxt->input->cur;
7123 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7124 ((*in >= 0x41) && (*in <= 0x5A)) ||
7125 (*in == '_')) {
7126 in++;
7127 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7128 ((*in >= 0x41) && (*in <= 0x5A)) ||
7129 ((*in >= 0x30) && (*in <= 0x39)) ||
7130 (*in == '_') || (*in == '-') ||
7131 (*in == '.'))
7132 in++;
7133 if ((*in > 0) && (*in < 0x80)) {
7134 count = in - ctxt->input->cur;
7135 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7136 ctxt->input->cur = in;
7137 ctxt->nbChars += count;
7138 ctxt->input->col += count;
7139 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007140 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007141 }
7142 return(ret);
7143 }
7144 }
7145 return(xmlParseNCNameComplex(ctxt));
7146}
7147
7148/**
7149 * xmlParseQName:
7150 * @ctxt: an XML parser context
7151 * @prefix: pointer to store the prefix part
7152 *
7153 * parse an XML Namespace QName
7154 *
7155 * [6] QName ::= (Prefix ':')? LocalPart
7156 * [7] Prefix ::= NCName
7157 * [8] LocalPart ::= NCName
7158 *
7159 * Returns the Name parsed or NULL
7160 */
7161
7162static const xmlChar *
7163xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7164 const xmlChar *l, *p;
7165
7166 GROW;
7167
7168 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007169 if (l == NULL) {
7170 if (CUR == ':') {
7171 l = xmlParseName(ctxt);
7172 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007173 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7174 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007175 *prefix = NULL;
7176 return(l);
7177 }
7178 }
7179 return(NULL);
7180 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007181 if (CUR == ':') {
7182 NEXT;
7183 p = l;
7184 l = xmlParseNCName(ctxt);
7185 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007186 xmlChar *tmp;
7187
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007188 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7189 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007190 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7191 p = xmlDictLookup(ctxt->dict, tmp, -1);
7192 if (tmp != NULL) xmlFree(tmp);
7193 *prefix = NULL;
7194 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007195 }
7196 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007197 xmlChar *tmp;
7198
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007199 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7200 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007201 NEXT;
7202 tmp = (xmlChar *) xmlParseName(ctxt);
7203 if (tmp != NULL) {
7204 tmp = xmlBuildQName(tmp, l, NULL, 0);
7205 l = xmlDictLookup(ctxt->dict, tmp, -1);
7206 if (tmp != NULL) xmlFree(tmp);
7207 *prefix = p;
7208 return(l);
7209 }
7210 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7211 l = xmlDictLookup(ctxt->dict, tmp, -1);
7212 if (tmp != NULL) xmlFree(tmp);
7213 *prefix = p;
7214 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007215 }
7216 *prefix = p;
7217 } else
7218 *prefix = NULL;
7219 return(l);
7220}
7221
7222/**
7223 * xmlParseQNameAndCompare:
7224 * @ctxt: an XML parser context
7225 * @name: the localname
7226 * @prefix: the prefix, if any.
7227 *
7228 * parse an XML name and compares for match
7229 * (specialized for endtag parsing)
7230 *
7231 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7232 * and the name for mismatch
7233 */
7234
7235static const xmlChar *
7236xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7237 xmlChar const *prefix) {
7238 const xmlChar *cmp = name;
7239 const xmlChar *in;
7240 const xmlChar *ret;
7241 const xmlChar *prefix2;
7242
7243 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7244
7245 GROW;
7246 in = ctxt->input->cur;
7247
7248 cmp = prefix;
7249 while (*in != 0 && *in == *cmp) {
7250 ++in;
7251 ++cmp;
7252 }
7253 if ((*cmp == 0) && (*in == ':')) {
7254 in++;
7255 cmp = name;
7256 while (*in != 0 && *in == *cmp) {
7257 ++in;
7258 ++cmp;
7259 }
William M. Brack76e95df2003-10-18 16:20:14 +00007260 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007261 /* success */
7262 ctxt->input->cur = in;
7263 return((const xmlChar*) 1);
7264 }
7265 }
7266 /*
7267 * all strings coms from the dictionary, equality can be done directly
7268 */
7269 ret = xmlParseQName (ctxt, &prefix2);
7270 if ((ret == name) && (prefix == prefix2))
7271 return((const xmlChar*) 1);
7272 return ret;
7273}
7274
7275/**
7276 * xmlParseAttValueInternal:
7277 * @ctxt: an XML parser context
7278 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007279 * @alloc: whether the attribute was reallocated as a new string
7280 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007281 *
7282 * parse a value for an attribute.
7283 * NOTE: if no normalization is needed, the routine will return pointers
7284 * directly from the data buffer.
7285 *
7286 * 3.3.3 Attribute-Value Normalization:
7287 * Before the value of an attribute is passed to the application or
7288 * checked for validity, the XML processor must normalize it as follows:
7289 * - a character reference is processed by appending the referenced
7290 * character to the attribute value
7291 * - an entity reference is processed by recursively processing the
7292 * replacement text of the entity
7293 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7294 * appending #x20 to the normalized value, except that only a single
7295 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7296 * parsed entity or the literal entity value of an internal parsed entity
7297 * - other characters are processed by appending them to the normalized value
7298 * If the declared value is not CDATA, then the XML processor must further
7299 * process the normalized attribute value by discarding any leading and
7300 * trailing space (#x20) characters, and by replacing sequences of space
7301 * (#x20) characters by a single space (#x20) character.
7302 * All attributes for which no declaration has been read should be treated
7303 * by a non-validating parser as if declared CDATA.
7304 *
7305 * Returns the AttValue parsed or NULL. The value has to be freed by the
7306 * caller if it was copied, this can be detected by val[*len] == 0.
7307 */
7308
7309static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007310xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7311 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007312{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007313 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007314 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007315 xmlChar *ret = NULL;
7316
7317 GROW;
7318 in = (xmlChar *) CUR_PTR;
7319 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007320 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007321 return (NULL);
7322 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007323 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007324
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007325 /*
7326 * try to handle in this routine the most common case where no
7327 * allocation of a new string is required and where content is
7328 * pure ASCII.
7329 */
7330 limit = *in++;
7331 end = ctxt->input->end;
7332 start = in;
7333 if (in >= end) {
7334 const xmlChar *oldbase = ctxt->input->base;
7335 GROW;
7336 if (oldbase != ctxt->input->base) {
7337 long delta = ctxt->input->base - oldbase;
7338 start = start + delta;
7339 in = in + delta;
7340 }
7341 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007342 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007343 if (normalize) {
7344 /*
7345 * Skip any leading spaces
7346 */
7347 while ((in < end) && (*in != limit) &&
7348 ((*in == 0x20) || (*in == 0x9) ||
7349 (*in == 0xA) || (*in == 0xD))) {
7350 in++;
7351 start = in;
7352 if (in >= end) {
7353 const xmlChar *oldbase = ctxt->input->base;
7354 GROW;
7355 if (oldbase != ctxt->input->base) {
7356 long delta = ctxt->input->base - oldbase;
7357 start = start + delta;
7358 in = in + delta;
7359 }
7360 end = ctxt->input->end;
7361 }
7362 }
7363 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7364 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7365 if ((*in++ == 0x20) && (*in == 0x20)) break;
7366 if (in >= end) {
7367 const xmlChar *oldbase = ctxt->input->base;
7368 GROW;
7369 if (oldbase != ctxt->input->base) {
7370 long delta = ctxt->input->base - oldbase;
7371 start = start + delta;
7372 in = in + delta;
7373 }
7374 end = ctxt->input->end;
7375 }
7376 }
7377 last = in;
7378 /*
7379 * skip the trailing blanks
7380 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007381 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007382 while ((in < end) && (*in != limit) &&
7383 ((*in == 0x20) || (*in == 0x9) ||
7384 (*in == 0xA) || (*in == 0xD))) {
7385 in++;
7386 if (in >= end) {
7387 const xmlChar *oldbase = ctxt->input->base;
7388 GROW;
7389 if (oldbase != ctxt->input->base) {
7390 long delta = ctxt->input->base - oldbase;
7391 start = start + delta;
7392 in = in + delta;
7393 last = last + delta;
7394 }
7395 end = ctxt->input->end;
7396 }
7397 }
7398 if (*in != limit) goto need_complex;
7399 } else {
7400 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7401 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7402 in++;
7403 if (in >= end) {
7404 const xmlChar *oldbase = ctxt->input->base;
7405 GROW;
7406 if (oldbase != ctxt->input->base) {
7407 long delta = ctxt->input->base - oldbase;
7408 start = start + delta;
7409 in = in + delta;
7410 }
7411 end = ctxt->input->end;
7412 }
7413 }
7414 last = in;
7415 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007416 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007417 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007418 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007419 *len = last - start;
7420 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007421 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007422 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007423 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007424 }
7425 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007426 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007427 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007428need_complex:
7429 if (alloc) *alloc = 1;
7430 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007431}
7432
7433/**
7434 * xmlParseAttribute2:
7435 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007436 * @pref: the element prefix
7437 * @elem: the element name
7438 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007439 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007440 * @len: an int * to save the length of the attribute
7441 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007442 *
7443 * parse an attribute in the new SAX2 framework.
7444 *
7445 * Returns the attribute name, and the value in *value, .
7446 */
7447
7448static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007449xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7450 const xmlChar *pref, const xmlChar *elem,
7451 const xmlChar **prefix, xmlChar **value,
7452 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007453 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007454 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007455 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007456
7457 *value = NULL;
7458 GROW;
7459 name = xmlParseQName(ctxt, prefix);
7460 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007461 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7462 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007463 return(NULL);
7464 }
7465
7466 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007467 * get the type if needed
7468 */
7469 if (ctxt->attsSpecial != NULL) {
7470 int type;
7471
7472 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7473 pref, elem, *prefix, name);
7474 if (type != 0) normalize = 1;
7475 }
7476
7477 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007478 * read the value
7479 */
7480 SKIP_BLANKS;
7481 if (RAW == '=') {
7482 NEXT;
7483 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007484 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007485 ctxt->instate = XML_PARSER_CONTENT;
7486 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007487 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007488 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007489 return(NULL);
7490 }
7491
Daniel Veillardd8925572005-06-08 22:34:55 +00007492 if (*prefix == ctxt->str_xml) {
7493 /*
7494 * Check that xml:lang conforms to the specification
7495 * No more registered as an error, just generate a warning now
7496 * since this was deprecated in XML second edition
7497 */
7498 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7499 internal_val = xmlStrndup(val, *len);
7500 if (!xmlCheckLanguageID(internal_val)) {
7501 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7502 "Malformed value for xml:lang : %s\n",
7503 internal_val, NULL);
7504 }
7505 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007506
Daniel Veillardd8925572005-06-08 22:34:55 +00007507 /*
7508 * Check that xml:space conforms to the specification
7509 */
7510 if (xmlStrEqual(name, BAD_CAST "space")) {
7511 internal_val = xmlStrndup(val, *len);
7512 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7513 *(ctxt->space) = 0;
7514 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7515 *(ctxt->space) = 1;
7516 else {
7517 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007518"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007519 internal_val, NULL);
7520 }
7521 }
7522 if (internal_val) {
7523 xmlFree(internal_val);
7524 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007525 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007526
7527 *value = val;
7528 return(name);
7529}
7530
7531/**
7532 * xmlParseStartTag2:
7533 * @ctxt: an XML parser context
7534 *
7535 * parse a start of tag either for rule element or
7536 * EmptyElement. In both case we don't parse the tag closing chars.
7537 * This routine is called when running SAX2 parsing
7538 *
7539 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7540 *
7541 * [ WFC: Unique Att Spec ]
7542 * No attribute name may appear more than once in the same start-tag or
7543 * empty-element tag.
7544 *
7545 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7546 *
7547 * [ WFC: Unique Att Spec ]
7548 * No attribute name may appear more than once in the same start-tag or
7549 * empty-element tag.
7550 *
7551 * With namespace:
7552 *
7553 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7554 *
7555 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7556 *
7557 * Returns the element name parsed
7558 */
7559
7560static const xmlChar *
7561xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007562 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007563 const xmlChar *localname;
7564 const xmlChar *prefix;
7565 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007566 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007567 const xmlChar *nsname;
7568 xmlChar *attvalue;
7569 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007570 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007571 int nratts, nbatts, nbdef;
7572 int i, j, nbNs, attval;
7573 const xmlChar *base;
7574 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007575 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007576
7577 if (RAW != '<') return(NULL);
7578 NEXT1;
7579
7580 /*
7581 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7582 * point since the attribute values may be stored as pointers to
7583 * the buffer and calling SHRINK would destroy them !
7584 * The Shrinking is only possible once the full set of attribute
7585 * callbacks have been done.
7586 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007587reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007588 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007589 base = ctxt->input->base;
7590 cur = ctxt->input->cur - ctxt->input->base;
7591 nbatts = 0;
7592 nratts = 0;
7593 nbdef = 0;
7594 nbNs = 0;
7595 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007596 /* Forget any namespaces added during an earlier parse of this element. */
7597 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007598
7599 localname = xmlParseQName(ctxt, &prefix);
7600 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007601 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7602 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007603 return(NULL);
7604 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007605 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007606
7607 /*
7608 * Now parse the attributes, it ends up with the ending
7609 *
7610 * (S Attribute)* S?
7611 */
7612 SKIP_BLANKS;
7613 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007614 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007615
7616 while ((RAW != '>') &&
7617 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007618 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007619 const xmlChar *q = CUR_PTR;
7620 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007621 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007622
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007623 attname = xmlParseAttribute2(ctxt, prefix, localname,
7624 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007625 if ((attname != NULL) && (attvalue != NULL)) {
7626 if (len < 0) len = xmlStrlen(attvalue);
7627 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007628 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7629 xmlURIPtr uri;
7630
7631 if (*URL != 0) {
7632 uri = xmlParseURI((const char *) URL);
7633 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007634 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7635 "xmlns: %s not a valid URI\n",
7636 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007637 } else {
7638 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007639 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7640 "xmlns: URI %s is not absolute\n",
7641 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007642 }
7643 xmlFreeURI(uri);
7644 }
7645 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007646 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007647 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007648 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007649 for (j = 1;j <= nbNs;j++)
7650 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7651 break;
7652 if (j <= nbNs)
7653 xmlErrAttributeDup(ctxt, NULL, attname);
7654 else
7655 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007656 if (alloc != 0) xmlFree(attvalue);
7657 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007658 continue;
7659 }
7660 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007661 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7662 xmlURIPtr uri;
7663
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007664 if (attname == ctxt->str_xml) {
7665 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007666 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7667 "xml namespace prefix mapped to wrong URI\n",
7668 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007669 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007670 /*
7671 * Do not keep a namespace definition node
7672 */
7673 if (alloc != 0) xmlFree(attvalue);
7674 SKIP_BLANKS;
7675 continue;
7676 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007677 uri = xmlParseURI((const char *) URL);
7678 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007679 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7680 "xmlns:%s: '%s' is not a valid URI\n",
7681 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007682 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007683 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007684 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7685 "xmlns:%s: URI %s is not absolute\n",
7686 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007687 }
7688 xmlFreeURI(uri);
7689 }
7690
Daniel Veillard0fb18932003-09-07 09:14:37 +00007691 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007692 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007693 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007694 for (j = 1;j <= nbNs;j++)
7695 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7696 break;
7697 if (j <= nbNs)
7698 xmlErrAttributeDup(ctxt, aprefix, attname);
7699 else
7700 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007701 if (alloc != 0) xmlFree(attvalue);
7702 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007703 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007704 continue;
7705 }
7706
7707 /*
7708 * Add the pair to atts
7709 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007710 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7711 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007712 if (attvalue[len] == 0)
7713 xmlFree(attvalue);
7714 goto failed;
7715 }
7716 maxatts = ctxt->maxatts;
7717 atts = ctxt->atts;
7718 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007719 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007720 atts[nbatts++] = attname;
7721 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007722 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007723 atts[nbatts++] = attvalue;
7724 attvalue += len;
7725 atts[nbatts++] = attvalue;
7726 /*
7727 * tag if some deallocation is needed
7728 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007729 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007730 } else {
7731 if ((attvalue != NULL) && (attvalue[len] == 0))
7732 xmlFree(attvalue);
7733 }
7734
7735failed:
7736
7737 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007738 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007739 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7740 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007741 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007742 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7743 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007744 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007745 }
7746 SKIP_BLANKS;
7747 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7748 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007749 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007750 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007751 break;
7752 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007753 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007754 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007755 }
7756
Daniel Veillard0fb18932003-09-07 09:14:37 +00007757 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007758 * The attributes defaulting
7759 */
7760 if (ctxt->attsDefault != NULL) {
7761 xmlDefAttrsPtr defaults;
7762
7763 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7764 if (defaults != NULL) {
7765 for (i = 0;i < defaults->nbAttrs;i++) {
7766 attname = defaults->values[4 * i];
7767 aprefix = defaults->values[4 * i + 1];
7768
7769 /*
7770 * special work for namespaces defaulted defs
7771 */
7772 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7773 /*
7774 * check that it's not a defined namespace
7775 */
7776 for (j = 1;j <= nbNs;j++)
7777 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7778 break;
7779 if (j <= nbNs) continue;
7780
7781 nsname = xmlGetNamespace(ctxt, NULL);
7782 if (nsname != defaults->values[4 * i + 2]) {
7783 if (nsPush(ctxt, NULL,
7784 defaults->values[4 * i + 2]) > 0)
7785 nbNs++;
7786 }
7787 } else if (aprefix == ctxt->str_xmlns) {
7788 /*
7789 * check that it's not a defined namespace
7790 */
7791 for (j = 1;j <= nbNs;j++)
7792 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7793 break;
7794 if (j <= nbNs) continue;
7795
7796 nsname = xmlGetNamespace(ctxt, attname);
7797 if (nsname != defaults->values[2]) {
7798 if (nsPush(ctxt, attname,
7799 defaults->values[4 * i + 2]) > 0)
7800 nbNs++;
7801 }
7802 } else {
7803 /*
7804 * check that it's not a defined attribute
7805 */
7806 for (j = 0;j < nbatts;j+=5) {
7807 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7808 break;
7809 }
7810 if (j < nbatts) continue;
7811
7812 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7813 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007814 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007815 }
7816 maxatts = ctxt->maxatts;
7817 atts = ctxt->atts;
7818 }
7819 atts[nbatts++] = attname;
7820 atts[nbatts++] = aprefix;
7821 if (aprefix == NULL)
7822 atts[nbatts++] = NULL;
7823 else
7824 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7825 atts[nbatts++] = defaults->values[4 * i + 2];
7826 atts[nbatts++] = defaults->values[4 * i + 3];
7827 nbdef++;
7828 }
7829 }
7830 }
7831 }
7832
Daniel Veillarde70c8772003-11-25 07:21:18 +00007833 /*
7834 * The attributes checkings
7835 */
7836 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00007837 /*
7838 * The default namespace does not apply to attribute names.
7839 */
7840 if (atts[i + 1] != NULL) {
7841 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7842 if (nsname == NULL) {
7843 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7844 "Namespace prefix %s for %s on %s is not defined\n",
7845 atts[i + 1], atts[i], localname);
7846 }
7847 atts[i + 2] = nsname;
7848 } else
7849 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00007850 /*
7851 * [ WFC: Unique Att Spec ]
7852 * No attribute name may appear more than once in the same
7853 * start-tag or empty-element tag.
7854 * As extended by the Namespace in XML REC.
7855 */
7856 for (j = 0; j < i;j += 5) {
7857 if (atts[i] == atts[j]) {
7858 if (atts[i+1] == atts[j+1]) {
7859 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7860 break;
7861 }
7862 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7863 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7864 "Namespaced Attribute %s in '%s' redefined\n",
7865 atts[i], nsname, NULL);
7866 break;
7867 }
7868 }
7869 }
7870 }
7871
Daniel Veillarde57ec792003-09-10 10:50:59 +00007872 nsname = xmlGetNamespace(ctxt, prefix);
7873 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007874 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7875 "Namespace prefix %s on %s is not defined\n",
7876 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007877 }
7878 *pref = prefix;
7879 *URI = nsname;
7880
7881 /*
7882 * SAX: Start of Element !
7883 */
7884 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7885 (!ctxt->disableSAX)) {
7886 if (nbNs > 0)
7887 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7888 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7889 nbatts / 5, nbdef, atts);
7890 else
7891 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7892 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7893 }
7894
7895 /*
7896 * Free up attribute allocated strings if needed
7897 */
7898 if (attval != 0) {
7899 for (i = 3,j = 0; j < nratts;i += 5,j++)
7900 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7901 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007902 }
7903
7904 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007905
7906base_changed:
7907 /*
7908 * the attribute strings are valid iif the base didn't changed
7909 */
7910 if (attval != 0) {
7911 for (i = 3,j = 0; j < nratts;i += 5,j++)
7912 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7913 xmlFree((xmlChar *) atts[i]);
7914 }
7915 ctxt->input->cur = ctxt->input->base + cur;
7916 if (ctxt->wellFormed == 1) {
7917 goto reparse;
7918 }
7919 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007920}
7921
7922/**
7923 * xmlParseEndTag2:
7924 * @ctxt: an XML parser context
7925 * @line: line of the start tag
7926 * @nsNr: number of namespaces on the start tag
7927 *
7928 * parse an end of tag
7929 *
7930 * [42] ETag ::= '</' Name S? '>'
7931 *
7932 * With namespace
7933 *
7934 * [NS 9] ETag ::= '</' QName S? '>'
7935 */
7936
7937static void
7938xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007939 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007940 const xmlChar *name;
7941
7942 GROW;
7943 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007944 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007945 return;
7946 }
7947 SKIP(2);
7948
William M. Brack13dfa872004-09-18 04:52:08 +00007949 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007950 if (ctxt->input->cur[tlen] == '>') {
7951 ctxt->input->cur += tlen + 1;
7952 goto done;
7953 }
7954 ctxt->input->cur += tlen;
7955 name = (xmlChar*)1;
7956 } else {
7957 if (prefix == NULL)
7958 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7959 else
7960 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7961 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007962
7963 /*
7964 * We should definitely be at the ending "S? '>'" part
7965 */
7966 GROW;
7967 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007968 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007969 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007970 } else
7971 NEXT1;
7972
7973 /*
7974 * [ WFC: Element Type Match ]
7975 * The Name in an element's end-tag must match the element type in the
7976 * start-tag.
7977 *
7978 */
7979 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007980 if (name == NULL) name = BAD_CAST "unparseable";
7981 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007982 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007983 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007984 }
7985
7986 /*
7987 * SAX: End of Tag
7988 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007989done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007990 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7991 (!ctxt->disableSAX))
7992 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7993
Daniel Veillard0fb18932003-09-07 09:14:37 +00007994 spacePop(ctxt);
7995 if (nsNr != 0)
7996 nsPop(ctxt, nsNr);
7997 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007998}
7999
8000/**
Owen Taylor3473f882001-02-23 17:55:21 +00008001 * xmlParseCDSect:
8002 * @ctxt: an XML parser context
8003 *
8004 * Parse escaped pure raw content.
8005 *
8006 * [18] CDSect ::= CDStart CData CDEnd
8007 *
8008 * [19] CDStart ::= '<![CDATA['
8009 *
8010 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8011 *
8012 * [21] CDEnd ::= ']]>'
8013 */
8014void
8015xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8016 xmlChar *buf = NULL;
8017 int len = 0;
8018 int size = XML_PARSER_BUFFER_SIZE;
8019 int r, rl;
8020 int s, sl;
8021 int cur, l;
8022 int count = 0;
8023
Daniel Veillard8f597c32003-10-06 08:19:27 +00008024 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008025 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008026 SKIP(9);
8027 } else
8028 return;
8029
8030 ctxt->instate = XML_PARSER_CDATA_SECTION;
8031 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008032 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008033 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008034 ctxt->instate = XML_PARSER_CONTENT;
8035 return;
8036 }
8037 NEXTL(rl);
8038 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008039 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008040 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008041 ctxt->instate = XML_PARSER_CONTENT;
8042 return;
8043 }
8044 NEXTL(sl);
8045 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008046 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008047 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008048 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008049 return;
8050 }
William M. Brack871611b2003-10-18 04:53:14 +00008051 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008052 ((r != ']') || (s != ']') || (cur != '>'))) {
8053 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008054 xmlChar *tmp;
8055
Owen Taylor3473f882001-02-23 17:55:21 +00008056 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008057 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8058 if (tmp == NULL) {
8059 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008060 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008061 return;
8062 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008063 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008064 }
8065 COPY_BUF(rl,buf,len,r);
8066 r = s;
8067 rl = sl;
8068 s = cur;
8069 sl = l;
8070 count++;
8071 if (count > 50) {
8072 GROW;
8073 count = 0;
8074 }
8075 NEXTL(l);
8076 cur = CUR_CHAR(l);
8077 }
8078 buf[len] = 0;
8079 ctxt->instate = XML_PARSER_CONTENT;
8080 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008081 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008082 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008083 xmlFree(buf);
8084 return;
8085 }
8086 NEXTL(l);
8087
8088 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008089 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008090 */
8091 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8092 if (ctxt->sax->cdataBlock != NULL)
8093 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008094 else if (ctxt->sax->characters != NULL)
8095 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008096 }
8097 xmlFree(buf);
8098}
8099
8100/**
8101 * xmlParseContent:
8102 * @ctxt: an XML parser context
8103 *
8104 * Parse a content:
8105 *
8106 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8107 */
8108
8109void
8110xmlParseContent(xmlParserCtxtPtr ctxt) {
8111 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008112 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008113 ((RAW != '<') || (NXT(1) != '/'))) {
8114 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008115 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008116 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008117
8118 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008119 * First case : a Processing Instruction.
8120 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008121 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008122 xmlParsePI(ctxt);
8123 }
8124
8125 /*
8126 * Second case : a CDSection
8127 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008128 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008129 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008130 xmlParseCDSect(ctxt);
8131 }
8132
8133 /*
8134 * Third case : a comment
8135 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008136 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008137 (NXT(2) == '-') && (NXT(3) == '-')) {
8138 xmlParseComment(ctxt);
8139 ctxt->instate = XML_PARSER_CONTENT;
8140 }
8141
8142 /*
8143 * Fourth case : a sub-element.
8144 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008145 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008146 xmlParseElement(ctxt);
8147 }
8148
8149 /*
8150 * Fifth case : a reference. If if has not been resolved,
8151 * parsing returns it's Name, create the node
8152 */
8153
Daniel Veillard21a0f912001-02-25 19:54:14 +00008154 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008155 xmlParseReference(ctxt);
8156 }
8157
8158 /*
8159 * Last case, text. Note that References are handled directly.
8160 */
8161 else {
8162 xmlParseCharData(ctxt, 0);
8163 }
8164
8165 GROW;
8166 /*
8167 * Pop-up of finished entities.
8168 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008169 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008170 xmlPopInput(ctxt);
8171 SHRINK;
8172
Daniel Veillardfdc91562002-07-01 21:52:03 +00008173 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008174 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8175 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008176 ctxt->instate = XML_PARSER_EOF;
8177 break;
8178 }
8179 }
8180}
8181
8182/**
8183 * xmlParseElement:
8184 * @ctxt: an XML parser context
8185 *
8186 * parse an XML element, this is highly recursive
8187 *
8188 * [39] element ::= EmptyElemTag | STag content ETag
8189 *
8190 * [ WFC: Element Type Match ]
8191 * The Name in an element's end-tag must match the element type in the
8192 * start-tag.
8193 *
Owen Taylor3473f882001-02-23 17:55:21 +00008194 */
8195
8196void
8197xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008198 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008199 const xmlChar *prefix;
8200 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008201 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008202 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008203 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008204 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008205
8206 /* Capture start position */
8207 if (ctxt->record_info) {
8208 node_info.begin_pos = ctxt->input->consumed +
8209 (CUR_PTR - ctxt->input->base);
8210 node_info.begin_line = ctxt->input->line;
8211 }
8212
8213 if (ctxt->spaceNr == 0)
8214 spacePush(ctxt, -1);
8215 else
8216 spacePush(ctxt, *ctxt->space);
8217
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008218 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008219#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008220 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008221#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008222 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008223#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008224 else
8225 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008226#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008227 if (name == NULL) {
8228 spacePop(ctxt);
8229 return;
8230 }
8231 namePush(ctxt, name);
8232 ret = ctxt->node;
8233
Daniel Veillard4432df22003-09-28 18:58:27 +00008234#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008235 /*
8236 * [ VC: Root Element Type ]
8237 * The Name in the document type declaration must match the element
8238 * type of the root element.
8239 */
8240 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8241 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8242 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008243#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008244
8245 /*
8246 * Check for an Empty Element.
8247 */
8248 if ((RAW == '/') && (NXT(1) == '>')) {
8249 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008250 if (ctxt->sax2) {
8251 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8252 (!ctxt->disableSAX))
8253 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008254#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008255 } else {
8256 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8257 (!ctxt->disableSAX))
8258 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008259#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008260 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008261 namePop(ctxt);
8262 spacePop(ctxt);
8263 if (nsNr != ctxt->nsNr)
8264 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008265 if ( ret != NULL && ctxt->record_info ) {
8266 node_info.end_pos = ctxt->input->consumed +
8267 (CUR_PTR - ctxt->input->base);
8268 node_info.end_line = ctxt->input->line;
8269 node_info.node = ret;
8270 xmlParserAddNodeInfo(ctxt, &node_info);
8271 }
8272 return;
8273 }
8274 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008275 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008276 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008277 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8278 "Couldn't find end of Start Tag %s line %d\n",
8279 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008280
8281 /*
8282 * end of parsing of this node.
8283 */
8284 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008285 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008286 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008287 if (nsNr != ctxt->nsNr)
8288 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008289
8290 /*
8291 * Capture end position and add node
8292 */
8293 if ( ret != NULL && ctxt->record_info ) {
8294 node_info.end_pos = ctxt->input->consumed +
8295 (CUR_PTR - ctxt->input->base);
8296 node_info.end_line = ctxt->input->line;
8297 node_info.node = ret;
8298 xmlParserAddNodeInfo(ctxt, &node_info);
8299 }
8300 return;
8301 }
8302
8303 /*
8304 * Parse the content of the element:
8305 */
8306 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008307 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008308 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008309 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008310 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008311
8312 /*
8313 * end of parsing of this node.
8314 */
8315 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008316 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008317 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008318 if (nsNr != ctxt->nsNr)
8319 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008320 return;
8321 }
8322
8323 /*
8324 * parse the end of tag: '</' should be here.
8325 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008326 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008327 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008328 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008329 }
8330#ifdef LIBXML_SAX1_ENABLED
8331 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008332 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008333#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008334
8335 /*
8336 * Capture end position and add node
8337 */
8338 if ( ret != NULL && ctxt->record_info ) {
8339 node_info.end_pos = ctxt->input->consumed +
8340 (CUR_PTR - ctxt->input->base);
8341 node_info.end_line = ctxt->input->line;
8342 node_info.node = ret;
8343 xmlParserAddNodeInfo(ctxt, &node_info);
8344 }
8345}
8346
8347/**
8348 * xmlParseVersionNum:
8349 * @ctxt: an XML parser context
8350 *
8351 * parse the XML version value.
8352 *
8353 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8354 *
8355 * Returns the string giving the XML version number, or NULL
8356 */
8357xmlChar *
8358xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8359 xmlChar *buf = NULL;
8360 int len = 0;
8361 int size = 10;
8362 xmlChar cur;
8363
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008364 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008365 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008366 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008367 return(NULL);
8368 }
8369 cur = CUR;
8370 while (((cur >= 'a') && (cur <= 'z')) ||
8371 ((cur >= 'A') && (cur <= 'Z')) ||
8372 ((cur >= '0') && (cur <= '9')) ||
8373 (cur == '_') || (cur == '.') ||
8374 (cur == ':') || (cur == '-')) {
8375 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008376 xmlChar *tmp;
8377
Owen Taylor3473f882001-02-23 17:55:21 +00008378 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008379 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8380 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008381 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008382 return(NULL);
8383 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008384 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008385 }
8386 buf[len++] = cur;
8387 NEXT;
8388 cur=CUR;
8389 }
8390 buf[len] = 0;
8391 return(buf);
8392}
8393
8394/**
8395 * xmlParseVersionInfo:
8396 * @ctxt: an XML parser context
8397 *
8398 * parse the XML version.
8399 *
8400 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8401 *
8402 * [25] Eq ::= S? '=' S?
8403 *
8404 * Returns the version string, e.g. "1.0"
8405 */
8406
8407xmlChar *
8408xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8409 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008410
Daniel Veillarda07050d2003-10-19 14:46:32 +00008411 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008412 SKIP(7);
8413 SKIP_BLANKS;
8414 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008415 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008416 return(NULL);
8417 }
8418 NEXT;
8419 SKIP_BLANKS;
8420 if (RAW == '"') {
8421 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008422 version = xmlParseVersionNum(ctxt);
8423 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008424 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008425 } else
8426 NEXT;
8427 } else if (RAW == '\''){
8428 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008429 version = xmlParseVersionNum(ctxt);
8430 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008431 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008432 } else
8433 NEXT;
8434 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008435 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008436 }
8437 }
8438 return(version);
8439}
8440
8441/**
8442 * xmlParseEncName:
8443 * @ctxt: an XML parser context
8444 *
8445 * parse the XML encoding name
8446 *
8447 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8448 *
8449 * Returns the encoding name value or NULL
8450 */
8451xmlChar *
8452xmlParseEncName(xmlParserCtxtPtr ctxt) {
8453 xmlChar *buf = NULL;
8454 int len = 0;
8455 int size = 10;
8456 xmlChar cur;
8457
8458 cur = CUR;
8459 if (((cur >= 'a') && (cur <= 'z')) ||
8460 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008461 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008462 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008463 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008464 return(NULL);
8465 }
8466
8467 buf[len++] = cur;
8468 NEXT;
8469 cur = CUR;
8470 while (((cur >= 'a') && (cur <= 'z')) ||
8471 ((cur >= 'A') && (cur <= 'Z')) ||
8472 ((cur >= '0') && (cur <= '9')) ||
8473 (cur == '.') || (cur == '_') ||
8474 (cur == '-')) {
8475 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008476 xmlChar *tmp;
8477
Owen Taylor3473f882001-02-23 17:55:21 +00008478 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008479 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8480 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008481 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008482 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008483 return(NULL);
8484 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008485 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008486 }
8487 buf[len++] = cur;
8488 NEXT;
8489 cur = CUR;
8490 if (cur == 0) {
8491 SHRINK;
8492 GROW;
8493 cur = CUR;
8494 }
8495 }
8496 buf[len] = 0;
8497 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008498 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008499 }
8500 return(buf);
8501}
8502
8503/**
8504 * xmlParseEncodingDecl:
8505 * @ctxt: an XML parser context
8506 *
8507 * parse the XML encoding declaration
8508 *
8509 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8510 *
8511 * this setups the conversion filters.
8512 *
8513 * Returns the encoding value or NULL
8514 */
8515
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008516const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008517xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8518 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008519
8520 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008521 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008522 SKIP(8);
8523 SKIP_BLANKS;
8524 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008525 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008526 return(NULL);
8527 }
8528 NEXT;
8529 SKIP_BLANKS;
8530 if (RAW == '"') {
8531 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008532 encoding = xmlParseEncName(ctxt);
8533 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008534 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008535 } else
8536 NEXT;
8537 } else if (RAW == '\''){
8538 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008539 encoding = xmlParseEncName(ctxt);
8540 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008541 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008542 } else
8543 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008544 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008545 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008546 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008547 /*
8548 * UTF-16 encoding stwich has already taken place at this stage,
8549 * more over the little-endian/big-endian selection is already done
8550 */
8551 if ((encoding != NULL) &&
8552 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8553 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008554 if (ctxt->encoding != NULL)
8555 xmlFree((xmlChar *) ctxt->encoding);
8556 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008557 }
8558 /*
8559 * UTF-8 encoding is handled natively
8560 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008561 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008562 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8563 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008564 if (ctxt->encoding != NULL)
8565 xmlFree((xmlChar *) ctxt->encoding);
8566 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008567 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008568 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008569 xmlCharEncodingHandlerPtr handler;
8570
8571 if (ctxt->input->encoding != NULL)
8572 xmlFree((xmlChar *) ctxt->input->encoding);
8573 ctxt->input->encoding = encoding;
8574
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008575 handler = xmlFindCharEncodingHandler((const char *) encoding);
8576 if (handler != NULL) {
8577 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008578 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008579 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008580 "Unsupported encoding %s\n", encoding);
8581 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008582 }
8583 }
8584 }
8585 return(encoding);
8586}
8587
8588/**
8589 * xmlParseSDDecl:
8590 * @ctxt: an XML parser context
8591 *
8592 * parse the XML standalone declaration
8593 *
8594 * [32] SDDecl ::= S 'standalone' Eq
8595 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8596 *
8597 * [ VC: Standalone Document Declaration ]
8598 * TODO The standalone document declaration must have the value "no"
8599 * if any external markup declarations contain declarations of:
8600 * - attributes with default values, if elements to which these
8601 * attributes apply appear in the document without specifications
8602 * of values for these attributes, or
8603 * - entities (other than amp, lt, gt, apos, quot), if references
8604 * to those entities appear in the document, or
8605 * - attributes with values subject to normalization, where the
8606 * attribute appears in the document with a value which will change
8607 * as a result of normalization, or
8608 * - element types with element content, if white space occurs directly
8609 * within any instance of those types.
8610 *
8611 * Returns 1 if standalone, 0 otherwise
8612 */
8613
8614int
8615xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8616 int standalone = -1;
8617
8618 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008619 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008620 SKIP(10);
8621 SKIP_BLANKS;
8622 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008623 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008624 return(standalone);
8625 }
8626 NEXT;
8627 SKIP_BLANKS;
8628 if (RAW == '\''){
8629 NEXT;
8630 if ((RAW == 'n') && (NXT(1) == 'o')) {
8631 standalone = 0;
8632 SKIP(2);
8633 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8634 (NXT(2) == 's')) {
8635 standalone = 1;
8636 SKIP(3);
8637 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008638 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008639 }
8640 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008641 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008642 } else
8643 NEXT;
8644 } else if (RAW == '"'){
8645 NEXT;
8646 if ((RAW == 'n') && (NXT(1) == 'o')) {
8647 standalone = 0;
8648 SKIP(2);
8649 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8650 (NXT(2) == 's')) {
8651 standalone = 1;
8652 SKIP(3);
8653 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008654 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008655 }
8656 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008657 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008658 } else
8659 NEXT;
8660 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008661 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008662 }
8663 }
8664 return(standalone);
8665}
8666
8667/**
8668 * xmlParseXMLDecl:
8669 * @ctxt: an XML parser context
8670 *
8671 * parse an XML declaration header
8672 *
8673 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8674 */
8675
8676void
8677xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8678 xmlChar *version;
8679
8680 /*
8681 * We know that '<?xml' is here.
8682 */
8683 SKIP(5);
8684
William M. Brack76e95df2003-10-18 16:20:14 +00008685 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008686 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8687 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008688 }
8689 SKIP_BLANKS;
8690
8691 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008692 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008693 */
8694 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008695 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008696 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008697 } else {
8698 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8699 /*
8700 * TODO: Blueberry should be detected here
8701 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008702 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8703 "Unsupported version '%s'\n",
8704 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008705 }
8706 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008707 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008708 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008709 }
Owen Taylor3473f882001-02-23 17:55:21 +00008710
8711 /*
8712 * We may have the encoding declaration
8713 */
William M. Brack76e95df2003-10-18 16:20:14 +00008714 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008715 if ((RAW == '?') && (NXT(1) == '>')) {
8716 SKIP(2);
8717 return;
8718 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008719 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008720 }
8721 xmlParseEncodingDecl(ctxt);
8722 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8723 /*
8724 * The XML REC instructs us to stop parsing right here
8725 */
8726 return;
8727 }
8728
8729 /*
8730 * We may have the standalone status.
8731 */
William M. Brack76e95df2003-10-18 16:20:14 +00008732 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008733 if ((RAW == '?') && (NXT(1) == '>')) {
8734 SKIP(2);
8735 return;
8736 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008737 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008738 }
8739 SKIP_BLANKS;
8740 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8741
8742 SKIP_BLANKS;
8743 if ((RAW == '?') && (NXT(1) == '>')) {
8744 SKIP(2);
8745 } else if (RAW == '>') {
8746 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008747 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008748 NEXT;
8749 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008750 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008751 MOVETO_ENDTAG(CUR_PTR);
8752 NEXT;
8753 }
8754}
8755
8756/**
8757 * xmlParseMisc:
8758 * @ctxt: an XML parser context
8759 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008760 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008761 *
8762 * [27] Misc ::= Comment | PI | S
8763 */
8764
8765void
8766xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008767 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008768 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008769 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008770 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008771 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008772 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008773 NEXT;
8774 } else
8775 xmlParseComment(ctxt);
8776 }
8777}
8778
8779/**
8780 * xmlParseDocument:
8781 * @ctxt: an XML parser context
8782 *
8783 * parse an XML document (and build a tree if using the standard SAX
8784 * interface).
8785 *
8786 * [1] document ::= prolog element Misc*
8787 *
8788 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8789 *
8790 * Returns 0, -1 in case of error. the parser context is augmented
8791 * as a result of the parsing.
8792 */
8793
8794int
8795xmlParseDocument(xmlParserCtxtPtr ctxt) {
8796 xmlChar start[4];
8797 xmlCharEncoding enc;
8798
8799 xmlInitParser();
8800
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008801 if ((ctxt == NULL) || (ctxt->input == NULL))
8802 return(-1);
8803
Owen Taylor3473f882001-02-23 17:55:21 +00008804 GROW;
8805
8806 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008807 * SAX: detecting the level.
8808 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008809 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008810
8811 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008812 * SAX: beginning of the document processing.
8813 */
8814 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8815 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8816
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008817 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8818 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008819 /*
8820 * Get the 4 first bytes and decode the charset
8821 * if enc != XML_CHAR_ENCODING_NONE
8822 * plug some encoding conversion routines.
8823 */
8824 start[0] = RAW;
8825 start[1] = NXT(1);
8826 start[2] = NXT(2);
8827 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008828 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008829 if (enc != XML_CHAR_ENCODING_NONE) {
8830 xmlSwitchEncoding(ctxt, enc);
8831 }
Owen Taylor3473f882001-02-23 17:55:21 +00008832 }
8833
8834
8835 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008836 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008837 }
8838
8839 /*
8840 * Check for the XMLDecl in the Prolog.
8841 */
8842 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008843 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008844
8845 /*
8846 * Note that we will switch encoding on the fly.
8847 */
8848 xmlParseXMLDecl(ctxt);
8849 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8850 /*
8851 * The XML REC instructs us to stop parsing right here
8852 */
8853 return(-1);
8854 }
8855 ctxt->standalone = ctxt->input->standalone;
8856 SKIP_BLANKS;
8857 } else {
8858 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8859 }
8860 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8861 ctxt->sax->startDocument(ctxt->userData);
8862
8863 /*
8864 * The Misc part of the Prolog
8865 */
8866 GROW;
8867 xmlParseMisc(ctxt);
8868
8869 /*
8870 * Then possibly doc type declaration(s) and more Misc
8871 * (doctypedecl Misc*)?
8872 */
8873 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008874 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008875
8876 ctxt->inSubset = 1;
8877 xmlParseDocTypeDecl(ctxt);
8878 if (RAW == '[') {
8879 ctxt->instate = XML_PARSER_DTD;
8880 xmlParseInternalSubset(ctxt);
8881 }
8882
8883 /*
8884 * Create and update the external subset.
8885 */
8886 ctxt->inSubset = 2;
8887 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8888 (!ctxt->disableSAX))
8889 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8890 ctxt->extSubSystem, ctxt->extSubURI);
8891 ctxt->inSubset = 0;
8892
8893
8894 ctxt->instate = XML_PARSER_PROLOG;
8895 xmlParseMisc(ctxt);
8896 }
8897
8898 /*
8899 * Time to start parsing the tree itself
8900 */
8901 GROW;
8902 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008903 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8904 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008905 } else {
8906 ctxt->instate = XML_PARSER_CONTENT;
8907 xmlParseElement(ctxt);
8908 ctxt->instate = XML_PARSER_EPILOG;
8909
8910
8911 /*
8912 * The Misc part at the end
8913 */
8914 xmlParseMisc(ctxt);
8915
Daniel Veillard561b7f82002-03-20 21:55:57 +00008916 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008917 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008918 }
8919 ctxt->instate = XML_PARSER_EOF;
8920 }
8921
8922 /*
8923 * SAX: end of the document processing.
8924 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008925 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008926 ctxt->sax->endDocument(ctxt->userData);
8927
Daniel Veillard5997aca2002-03-18 18:36:20 +00008928 /*
8929 * Remove locally kept entity definitions if the tree was not built
8930 */
8931 if ((ctxt->myDoc != NULL) &&
8932 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8933 xmlFreeDoc(ctxt->myDoc);
8934 ctxt->myDoc = NULL;
8935 }
8936
Daniel Veillardc7612992002-02-17 22:47:37 +00008937 if (! ctxt->wellFormed) {
8938 ctxt->valid = 0;
8939 return(-1);
8940 }
Owen Taylor3473f882001-02-23 17:55:21 +00008941 return(0);
8942}
8943
8944/**
8945 * xmlParseExtParsedEnt:
8946 * @ctxt: an XML parser context
8947 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008948 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008949 * An external general parsed entity is well-formed if it matches the
8950 * production labeled extParsedEnt.
8951 *
8952 * [78] extParsedEnt ::= TextDecl? content
8953 *
8954 * Returns 0, -1 in case of error. the parser context is augmented
8955 * as a result of the parsing.
8956 */
8957
8958int
8959xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8960 xmlChar start[4];
8961 xmlCharEncoding enc;
8962
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008963 if ((ctxt == NULL) || (ctxt->input == NULL))
8964 return(-1);
8965
Owen Taylor3473f882001-02-23 17:55:21 +00008966 xmlDefaultSAXHandlerInit();
8967
Daniel Veillard309f81d2003-09-23 09:02:53 +00008968 xmlDetectSAX2(ctxt);
8969
Owen Taylor3473f882001-02-23 17:55:21 +00008970 GROW;
8971
8972 /*
8973 * SAX: beginning of the document processing.
8974 */
8975 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8976 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8977
8978 /*
8979 * Get the 4 first bytes and decode the charset
8980 * if enc != XML_CHAR_ENCODING_NONE
8981 * plug some encoding conversion routines.
8982 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008983 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8984 start[0] = RAW;
8985 start[1] = NXT(1);
8986 start[2] = NXT(2);
8987 start[3] = NXT(3);
8988 enc = xmlDetectCharEncoding(start, 4);
8989 if (enc != XML_CHAR_ENCODING_NONE) {
8990 xmlSwitchEncoding(ctxt, enc);
8991 }
Owen Taylor3473f882001-02-23 17:55:21 +00008992 }
8993
8994
8995 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008996 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008997 }
8998
8999 /*
9000 * Check for the XMLDecl in the Prolog.
9001 */
9002 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009003 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009004
9005 /*
9006 * Note that we will switch encoding on the fly.
9007 */
9008 xmlParseXMLDecl(ctxt);
9009 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9010 /*
9011 * The XML REC instructs us to stop parsing right here
9012 */
9013 return(-1);
9014 }
9015 SKIP_BLANKS;
9016 } else {
9017 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9018 }
9019 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9020 ctxt->sax->startDocument(ctxt->userData);
9021
9022 /*
9023 * Doing validity checking on chunk doesn't make sense
9024 */
9025 ctxt->instate = XML_PARSER_CONTENT;
9026 ctxt->validate = 0;
9027 ctxt->loadsubset = 0;
9028 ctxt->depth = 0;
9029
9030 xmlParseContent(ctxt);
9031
9032 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009033 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009034 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009035 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009036 }
9037
9038 /*
9039 * SAX: end of the document processing.
9040 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009041 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009042 ctxt->sax->endDocument(ctxt->userData);
9043
9044 if (! ctxt->wellFormed) return(-1);
9045 return(0);
9046}
9047
Daniel Veillard73b013f2003-09-30 12:36:01 +00009048#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009049/************************************************************************
9050 * *
9051 * Progressive parsing interfaces *
9052 * *
9053 ************************************************************************/
9054
9055/**
9056 * xmlParseLookupSequence:
9057 * @ctxt: an XML parser context
9058 * @first: the first char to lookup
9059 * @next: the next char to lookup or zero
9060 * @third: the next char to lookup or zero
9061 *
9062 * Try to find if a sequence (first, next, third) or just (first next) or
9063 * (first) is available in the input stream.
9064 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9065 * to avoid rescanning sequences of bytes, it DOES change the state of the
9066 * parser, do not use liberally.
9067 *
9068 * Returns the index to the current parsing point if the full sequence
9069 * is available, -1 otherwise.
9070 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009071static int
Owen Taylor3473f882001-02-23 17:55:21 +00009072xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9073 xmlChar next, xmlChar third) {
9074 int base, len;
9075 xmlParserInputPtr in;
9076 const xmlChar *buf;
9077
9078 in = ctxt->input;
9079 if (in == NULL) return(-1);
9080 base = in->cur - in->base;
9081 if (base < 0) return(-1);
9082 if (ctxt->checkIndex > base)
9083 base = ctxt->checkIndex;
9084 if (in->buf == NULL) {
9085 buf = in->base;
9086 len = in->length;
9087 } else {
9088 buf = in->buf->buffer->content;
9089 len = in->buf->buffer->use;
9090 }
9091 /* take into account the sequence length */
9092 if (third) len -= 2;
9093 else if (next) len --;
9094 for (;base < len;base++) {
9095 if (buf[base] == first) {
9096 if (third != 0) {
9097 if ((buf[base + 1] != next) ||
9098 (buf[base + 2] != third)) continue;
9099 } else if (next != 0) {
9100 if (buf[base + 1] != next) continue;
9101 }
9102 ctxt->checkIndex = 0;
9103#ifdef DEBUG_PUSH
9104 if (next == 0)
9105 xmlGenericError(xmlGenericErrorContext,
9106 "PP: lookup '%c' found at %d\n",
9107 first, base);
9108 else if (third == 0)
9109 xmlGenericError(xmlGenericErrorContext,
9110 "PP: lookup '%c%c' found at %d\n",
9111 first, next, base);
9112 else
9113 xmlGenericError(xmlGenericErrorContext,
9114 "PP: lookup '%c%c%c' found at %d\n",
9115 first, next, third, base);
9116#endif
9117 return(base - (in->cur - in->base));
9118 }
9119 }
9120 ctxt->checkIndex = base;
9121#ifdef DEBUG_PUSH
9122 if (next == 0)
9123 xmlGenericError(xmlGenericErrorContext,
9124 "PP: lookup '%c' failed\n", first);
9125 else if (third == 0)
9126 xmlGenericError(xmlGenericErrorContext,
9127 "PP: lookup '%c%c' failed\n", first, next);
9128 else
9129 xmlGenericError(xmlGenericErrorContext,
9130 "PP: lookup '%c%c%c' failed\n", first, next, third);
9131#endif
9132 return(-1);
9133}
9134
9135/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009136 * xmlParseGetLasts:
9137 * @ctxt: an XML parser context
9138 * @lastlt: pointer to store the last '<' from the input
9139 * @lastgt: pointer to store the last '>' from the input
9140 *
9141 * Lookup the last < and > in the current chunk
9142 */
9143static void
9144xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9145 const xmlChar **lastgt) {
9146 const xmlChar *tmp;
9147
9148 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9149 xmlGenericError(xmlGenericErrorContext,
9150 "Internal error: xmlParseGetLasts\n");
9151 return;
9152 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009153 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009154 tmp = ctxt->input->end;
9155 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009156 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009157 if (tmp < ctxt->input->base) {
9158 *lastlt = NULL;
9159 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009160 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009161 *lastlt = tmp;
9162 tmp++;
9163 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9164 if (*tmp == '\'') {
9165 tmp++;
9166 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9167 if (tmp < ctxt->input->end) tmp++;
9168 } else if (*tmp == '"') {
9169 tmp++;
9170 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9171 if (tmp < ctxt->input->end) tmp++;
9172 } else
9173 tmp++;
9174 }
9175 if (tmp < ctxt->input->end)
9176 *lastgt = tmp;
9177 else {
9178 tmp = *lastlt;
9179 tmp--;
9180 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9181 if (tmp >= ctxt->input->base)
9182 *lastgt = tmp;
9183 else
9184 *lastgt = NULL;
9185 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009186 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009187 } else {
9188 *lastlt = NULL;
9189 *lastgt = NULL;
9190 }
9191}
9192/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009193 * xmlCheckCdataPush:
9194 * @cur: pointer to the bock of characters
9195 * @len: length of the block in bytes
9196 *
9197 * Check that the block of characters is okay as SCdata content [20]
9198 *
9199 * Returns the number of bytes to pass if okay, a negative index where an
9200 * UTF-8 error occured otherwise
9201 */
9202static int
9203xmlCheckCdataPush(const xmlChar *utf, int len) {
9204 int ix;
9205 unsigned char c;
9206 int codepoint;
9207
9208 if ((utf == NULL) || (len <= 0))
9209 return(0);
9210
9211 for (ix = 0; ix < len;) { /* string is 0-terminated */
9212 c = utf[ix];
9213 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9214 if (c >= 0x20)
9215 ix++;
9216 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9217 ix++;
9218 else
9219 return(-ix);
9220 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9221 if (ix + 2 > len) return(ix);
9222 if ((utf[ix+1] & 0xc0 ) != 0x80)
9223 return(-ix);
9224 codepoint = (utf[0] & 0x1f) << 6;
9225 codepoint |= utf[1] & 0x3f;
9226 if (!xmlIsCharQ(codepoint))
9227 return(-ix);
9228 ix += 2;
9229 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9230 if (ix + 3 > len) return(ix);
9231 if (((utf[ix+1] & 0xc0) != 0x80) ||
9232 ((utf[ix+2] & 0xc0) != 0x80))
9233 return(-ix);
9234 codepoint = (utf[0] & 0xf) << 12;
9235 codepoint |= (utf[1] & 0x3f) << 6;
9236 codepoint |= utf[2] & 0x3f;
9237 if (!xmlIsCharQ(codepoint))
9238 return(-ix);
9239 ix += 3;
9240 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9241 if (ix + 4 > len) return(ix);
9242 if (((utf[ix+1] & 0xc0) != 0x80) ||
9243 ((utf[ix+2] & 0xc0) != 0x80) ||
9244 ((utf[ix+3] & 0xc0) != 0x80))
9245 return(-ix);
9246 codepoint = (utf[0] & 0x7) << 18;
9247 codepoint |= (utf[1] & 0x3f) << 12;
9248 codepoint |= (utf[2] & 0x3f) << 6;
9249 codepoint |= utf[3] & 0x3f;
9250 if (!xmlIsCharQ(codepoint))
9251 return(-ix);
9252 ix += 4;
9253 } else /* unknown encoding */
9254 return(-ix);
9255 }
9256 return(ix);
9257}
9258
9259/**
Owen Taylor3473f882001-02-23 17:55:21 +00009260 * xmlParseTryOrFinish:
9261 * @ctxt: an XML parser context
9262 * @terminate: last chunk indicator
9263 *
9264 * Try to progress on parsing
9265 *
9266 * Returns zero if no parsing was possible
9267 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009268static int
Owen Taylor3473f882001-02-23 17:55:21 +00009269xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9270 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009271 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009272 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009273 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009274
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009275 if (ctxt->input == NULL)
9276 return(0);
9277
Owen Taylor3473f882001-02-23 17:55:21 +00009278#ifdef DEBUG_PUSH
9279 switch (ctxt->instate) {
9280 case XML_PARSER_EOF:
9281 xmlGenericError(xmlGenericErrorContext,
9282 "PP: try EOF\n"); break;
9283 case XML_PARSER_START:
9284 xmlGenericError(xmlGenericErrorContext,
9285 "PP: try START\n"); break;
9286 case XML_PARSER_MISC:
9287 xmlGenericError(xmlGenericErrorContext,
9288 "PP: try MISC\n");break;
9289 case XML_PARSER_COMMENT:
9290 xmlGenericError(xmlGenericErrorContext,
9291 "PP: try COMMENT\n");break;
9292 case XML_PARSER_PROLOG:
9293 xmlGenericError(xmlGenericErrorContext,
9294 "PP: try PROLOG\n");break;
9295 case XML_PARSER_START_TAG:
9296 xmlGenericError(xmlGenericErrorContext,
9297 "PP: try START_TAG\n");break;
9298 case XML_PARSER_CONTENT:
9299 xmlGenericError(xmlGenericErrorContext,
9300 "PP: try CONTENT\n");break;
9301 case XML_PARSER_CDATA_SECTION:
9302 xmlGenericError(xmlGenericErrorContext,
9303 "PP: try CDATA_SECTION\n");break;
9304 case XML_PARSER_END_TAG:
9305 xmlGenericError(xmlGenericErrorContext,
9306 "PP: try END_TAG\n");break;
9307 case XML_PARSER_ENTITY_DECL:
9308 xmlGenericError(xmlGenericErrorContext,
9309 "PP: try ENTITY_DECL\n");break;
9310 case XML_PARSER_ENTITY_VALUE:
9311 xmlGenericError(xmlGenericErrorContext,
9312 "PP: try ENTITY_VALUE\n");break;
9313 case XML_PARSER_ATTRIBUTE_VALUE:
9314 xmlGenericError(xmlGenericErrorContext,
9315 "PP: try ATTRIBUTE_VALUE\n");break;
9316 case XML_PARSER_DTD:
9317 xmlGenericError(xmlGenericErrorContext,
9318 "PP: try DTD\n");break;
9319 case XML_PARSER_EPILOG:
9320 xmlGenericError(xmlGenericErrorContext,
9321 "PP: try EPILOG\n");break;
9322 case XML_PARSER_PI:
9323 xmlGenericError(xmlGenericErrorContext,
9324 "PP: try PI\n");break;
9325 case XML_PARSER_IGNORE:
9326 xmlGenericError(xmlGenericErrorContext,
9327 "PP: try IGNORE\n");break;
9328 }
9329#endif
9330
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009331 if ((ctxt->input != NULL) &&
9332 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009333 xmlSHRINK(ctxt);
9334 ctxt->checkIndex = 0;
9335 }
9336 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009337
Daniel Veillarda880b122003-04-21 21:36:41 +00009338 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009339 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009340 return(0);
9341
9342
Owen Taylor3473f882001-02-23 17:55:21 +00009343 /*
9344 * Pop-up of finished entities.
9345 */
9346 while ((RAW == 0) && (ctxt->inputNr > 1))
9347 xmlPopInput(ctxt);
9348
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009349 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009350 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009351 avail = ctxt->input->length -
9352 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009353 else {
9354 /*
9355 * If we are operating on converted input, try to flush
9356 * remainng chars to avoid them stalling in the non-converted
9357 * buffer.
9358 */
9359 if ((ctxt->input->buf->raw != NULL) &&
9360 (ctxt->input->buf->raw->use > 0)) {
9361 int base = ctxt->input->base -
9362 ctxt->input->buf->buffer->content;
9363 int current = ctxt->input->cur - ctxt->input->base;
9364
9365 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9366 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9367 ctxt->input->cur = ctxt->input->base + current;
9368 ctxt->input->end =
9369 &ctxt->input->buf->buffer->content[
9370 ctxt->input->buf->buffer->use];
9371 }
9372 avail = ctxt->input->buf->buffer->use -
9373 (ctxt->input->cur - ctxt->input->base);
9374 }
Owen Taylor3473f882001-02-23 17:55:21 +00009375 if (avail < 1)
9376 goto done;
9377 switch (ctxt->instate) {
9378 case XML_PARSER_EOF:
9379 /*
9380 * Document parsing is done !
9381 */
9382 goto done;
9383 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009384 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9385 xmlChar start[4];
9386 xmlCharEncoding enc;
9387
9388 /*
9389 * Very first chars read from the document flow.
9390 */
9391 if (avail < 4)
9392 goto done;
9393
9394 /*
9395 * Get the 4 first bytes and decode the charset
9396 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009397 * plug some encoding conversion routines,
9398 * else xmlSwitchEncoding will set to (default)
9399 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009400 */
9401 start[0] = RAW;
9402 start[1] = NXT(1);
9403 start[2] = NXT(2);
9404 start[3] = NXT(3);
9405 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009406 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009407 break;
9408 }
Owen Taylor3473f882001-02-23 17:55:21 +00009409
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009410 if (avail < 2)
9411 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009412 cur = ctxt->input->cur[0];
9413 next = ctxt->input->cur[1];
9414 if (cur == 0) {
9415 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9416 ctxt->sax->setDocumentLocator(ctxt->userData,
9417 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009418 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009419 ctxt->instate = XML_PARSER_EOF;
9420#ifdef DEBUG_PUSH
9421 xmlGenericError(xmlGenericErrorContext,
9422 "PP: entering EOF\n");
9423#endif
9424 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9425 ctxt->sax->endDocument(ctxt->userData);
9426 goto done;
9427 }
9428 if ((cur == '<') && (next == '?')) {
9429 /* PI or XML decl */
9430 if (avail < 5) return(ret);
9431 if ((!terminate) &&
9432 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9433 return(ret);
9434 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9435 ctxt->sax->setDocumentLocator(ctxt->userData,
9436 &xmlDefaultSAXLocator);
9437 if ((ctxt->input->cur[2] == 'x') &&
9438 (ctxt->input->cur[3] == 'm') &&
9439 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009440 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009441 ret += 5;
9442#ifdef DEBUG_PUSH
9443 xmlGenericError(xmlGenericErrorContext,
9444 "PP: Parsing XML Decl\n");
9445#endif
9446 xmlParseXMLDecl(ctxt);
9447 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9448 /*
9449 * The XML REC instructs us to stop parsing right
9450 * here
9451 */
9452 ctxt->instate = XML_PARSER_EOF;
9453 return(0);
9454 }
9455 ctxt->standalone = ctxt->input->standalone;
9456 if ((ctxt->encoding == NULL) &&
9457 (ctxt->input->encoding != NULL))
9458 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9459 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9460 (!ctxt->disableSAX))
9461 ctxt->sax->startDocument(ctxt->userData);
9462 ctxt->instate = XML_PARSER_MISC;
9463#ifdef DEBUG_PUSH
9464 xmlGenericError(xmlGenericErrorContext,
9465 "PP: entering MISC\n");
9466#endif
9467 } else {
9468 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9469 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9470 (!ctxt->disableSAX))
9471 ctxt->sax->startDocument(ctxt->userData);
9472 ctxt->instate = XML_PARSER_MISC;
9473#ifdef DEBUG_PUSH
9474 xmlGenericError(xmlGenericErrorContext,
9475 "PP: entering MISC\n");
9476#endif
9477 }
9478 } else {
9479 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9480 ctxt->sax->setDocumentLocator(ctxt->userData,
9481 &xmlDefaultSAXLocator);
9482 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009483 if (ctxt->version == NULL) {
9484 xmlErrMemory(ctxt, NULL);
9485 break;
9486 }
Owen Taylor3473f882001-02-23 17:55:21 +00009487 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9488 (!ctxt->disableSAX))
9489 ctxt->sax->startDocument(ctxt->userData);
9490 ctxt->instate = XML_PARSER_MISC;
9491#ifdef DEBUG_PUSH
9492 xmlGenericError(xmlGenericErrorContext,
9493 "PP: entering MISC\n");
9494#endif
9495 }
9496 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009497 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009498 const xmlChar *name;
9499 const xmlChar *prefix;
9500 const xmlChar *URI;
9501 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009502
9503 if ((avail < 2) && (ctxt->inputNr == 1))
9504 goto done;
9505 cur = ctxt->input->cur[0];
9506 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009507 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009508 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009509 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9510 ctxt->sax->endDocument(ctxt->userData);
9511 goto done;
9512 }
9513 if (!terminate) {
9514 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009515 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009516 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009517 goto done;
9518 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9519 goto done;
9520 }
9521 }
9522 if (ctxt->spaceNr == 0)
9523 spacePush(ctxt, -1);
9524 else
9525 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009526#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009527 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009528#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009529 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009530#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009531 else
9532 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009533#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009534 if (name == NULL) {
9535 spacePop(ctxt);
9536 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009537 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9538 ctxt->sax->endDocument(ctxt->userData);
9539 goto done;
9540 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009541#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009542 /*
9543 * [ VC: Root Element Type ]
9544 * The Name in the document type declaration must match
9545 * the element type of the root element.
9546 */
9547 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9548 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9549 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009550#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009551
9552 /*
9553 * Check for an Empty Element.
9554 */
9555 if ((RAW == '/') && (NXT(1) == '>')) {
9556 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009557
9558 if (ctxt->sax2) {
9559 if ((ctxt->sax != NULL) &&
9560 (ctxt->sax->endElementNs != NULL) &&
9561 (!ctxt->disableSAX))
9562 ctxt->sax->endElementNs(ctxt->userData, name,
9563 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009564 if (ctxt->nsNr - nsNr > 0)
9565 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009566#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009567 } else {
9568 if ((ctxt->sax != NULL) &&
9569 (ctxt->sax->endElement != NULL) &&
9570 (!ctxt->disableSAX))
9571 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009572#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009573 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009574 spacePop(ctxt);
9575 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009576 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009577 } else {
9578 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009579 }
9580 break;
9581 }
9582 if (RAW == '>') {
9583 NEXT;
9584 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009585 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009586 "Couldn't find end of Start Tag %s\n",
9587 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009588 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009589 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009590 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009591 if (ctxt->sax2)
9592 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009593#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009594 else
9595 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009596#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009597
Daniel Veillarda880b122003-04-21 21:36:41 +00009598 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009599 break;
9600 }
9601 case XML_PARSER_CONTENT: {
9602 const xmlChar *test;
9603 unsigned int cons;
9604 if ((avail < 2) && (ctxt->inputNr == 1))
9605 goto done;
9606 cur = ctxt->input->cur[0];
9607 next = ctxt->input->cur[1];
9608
9609 test = CUR_PTR;
9610 cons = ctxt->input->consumed;
9611 if ((cur == '<') && (next == '/')) {
9612 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009613 break;
9614 } else if ((cur == '<') && (next == '?')) {
9615 if ((!terminate) &&
9616 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9617 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009618 xmlParsePI(ctxt);
9619 } else if ((cur == '<') && (next != '!')) {
9620 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009621 break;
9622 } else if ((cur == '<') && (next == '!') &&
9623 (ctxt->input->cur[2] == '-') &&
9624 (ctxt->input->cur[3] == '-')) {
9625 if ((!terminate) &&
9626 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9627 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009628 xmlParseComment(ctxt);
9629 ctxt->instate = XML_PARSER_CONTENT;
9630 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9631 (ctxt->input->cur[2] == '[') &&
9632 (ctxt->input->cur[3] == 'C') &&
9633 (ctxt->input->cur[4] == 'D') &&
9634 (ctxt->input->cur[5] == 'A') &&
9635 (ctxt->input->cur[6] == 'T') &&
9636 (ctxt->input->cur[7] == 'A') &&
9637 (ctxt->input->cur[8] == '[')) {
9638 SKIP(9);
9639 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009640 break;
9641 } else if ((cur == '<') && (next == '!') &&
9642 (avail < 9)) {
9643 goto done;
9644 } else if (cur == '&') {
9645 if ((!terminate) &&
9646 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9647 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009648 xmlParseReference(ctxt);
9649 } else {
9650 /* TODO Avoid the extra copy, handle directly !!! */
9651 /*
9652 * Goal of the following test is:
9653 * - minimize calls to the SAX 'character' callback
9654 * when they are mergeable
9655 * - handle an problem for isBlank when we only parse
9656 * a sequence of blank chars and the next one is
9657 * not available to check against '<' presence.
9658 * - tries to homogenize the differences in SAX
9659 * callbacks between the push and pull versions
9660 * of the parser.
9661 */
9662 if ((ctxt->inputNr == 1) &&
9663 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9664 if (!terminate) {
9665 if (ctxt->progressive) {
9666 if ((lastlt == NULL) ||
9667 (ctxt->input->cur > lastlt))
9668 goto done;
9669 } else if (xmlParseLookupSequence(ctxt,
9670 '<', 0, 0) < 0) {
9671 goto done;
9672 }
9673 }
9674 }
9675 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009676 xmlParseCharData(ctxt, 0);
9677 }
9678 /*
9679 * Pop-up of finished entities.
9680 */
9681 while ((RAW == 0) && (ctxt->inputNr > 1))
9682 xmlPopInput(ctxt);
9683 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009684 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9685 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009686 ctxt->instate = XML_PARSER_EOF;
9687 break;
9688 }
9689 break;
9690 }
9691 case XML_PARSER_END_TAG:
9692 if (avail < 2)
9693 goto done;
9694 if (!terminate) {
9695 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009696 /* > can be found unescaped in attribute values */
9697 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009698 goto done;
9699 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9700 goto done;
9701 }
9702 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009703 if (ctxt->sax2) {
9704 xmlParseEndTag2(ctxt,
9705 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9706 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009707 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009708 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009709 }
9710#ifdef LIBXML_SAX1_ENABLED
9711 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009712 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009713#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009714 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009715 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009716 } else {
9717 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009718 }
9719 break;
9720 case XML_PARSER_CDATA_SECTION: {
9721 /*
9722 * The Push mode need to have the SAX callback for
9723 * cdataBlock merge back contiguous callbacks.
9724 */
9725 int base;
9726
9727 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9728 if (base < 0) {
9729 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009730 int tmp;
9731
9732 tmp = xmlCheckCdataPush(ctxt->input->cur,
9733 XML_PARSER_BIG_BUFFER_SIZE);
9734 if (tmp < 0) {
9735 tmp = -tmp;
9736 ctxt->input->cur += tmp;
9737 goto encoding_error;
9738 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009739 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9740 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009741 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009742 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009743 else if (ctxt->sax->characters != NULL)
9744 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009745 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009746 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009747 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009748 ctxt->checkIndex = 0;
9749 }
9750 goto done;
9751 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009752 int tmp;
9753
9754 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
9755 if ((tmp < 0) || (tmp != base)) {
9756 tmp = -tmp;
9757 ctxt->input->cur += tmp;
9758 goto encoding_error;
9759 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009760 if ((ctxt->sax != NULL) && (base > 0) &&
9761 (!ctxt->disableSAX)) {
9762 if (ctxt->sax->cdataBlock != NULL)
9763 ctxt->sax->cdataBlock(ctxt->userData,
9764 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009765 else if (ctxt->sax->characters != NULL)
9766 ctxt->sax->characters(ctxt->userData,
9767 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009768 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009769 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009770 ctxt->checkIndex = 0;
9771 ctxt->instate = XML_PARSER_CONTENT;
9772#ifdef DEBUG_PUSH
9773 xmlGenericError(xmlGenericErrorContext,
9774 "PP: entering CONTENT\n");
9775#endif
9776 }
9777 break;
9778 }
Owen Taylor3473f882001-02-23 17:55:21 +00009779 case XML_PARSER_MISC:
9780 SKIP_BLANKS;
9781 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009782 avail = ctxt->input->length -
9783 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009784 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009785 avail = ctxt->input->buf->buffer->use -
9786 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009787 if (avail < 2)
9788 goto done;
9789 cur = ctxt->input->cur[0];
9790 next = ctxt->input->cur[1];
9791 if ((cur == '<') && (next == '?')) {
9792 if ((!terminate) &&
9793 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9794 goto done;
9795#ifdef DEBUG_PUSH
9796 xmlGenericError(xmlGenericErrorContext,
9797 "PP: Parsing PI\n");
9798#endif
9799 xmlParsePI(ctxt);
9800 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009801 (ctxt->input->cur[2] == '-') &&
9802 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009803 if ((!terminate) &&
9804 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9805 goto done;
9806#ifdef DEBUG_PUSH
9807 xmlGenericError(xmlGenericErrorContext,
9808 "PP: Parsing Comment\n");
9809#endif
9810 xmlParseComment(ctxt);
9811 ctxt->instate = XML_PARSER_MISC;
9812 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009813 (ctxt->input->cur[2] == 'D') &&
9814 (ctxt->input->cur[3] == 'O') &&
9815 (ctxt->input->cur[4] == 'C') &&
9816 (ctxt->input->cur[5] == 'T') &&
9817 (ctxt->input->cur[6] == 'Y') &&
9818 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009819 (ctxt->input->cur[8] == 'E')) {
9820 if ((!terminate) &&
9821 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9822 goto done;
9823#ifdef DEBUG_PUSH
9824 xmlGenericError(xmlGenericErrorContext,
9825 "PP: Parsing internal subset\n");
9826#endif
9827 ctxt->inSubset = 1;
9828 xmlParseDocTypeDecl(ctxt);
9829 if (RAW == '[') {
9830 ctxt->instate = XML_PARSER_DTD;
9831#ifdef DEBUG_PUSH
9832 xmlGenericError(xmlGenericErrorContext,
9833 "PP: entering DTD\n");
9834#endif
9835 } else {
9836 /*
9837 * Create and update the external subset.
9838 */
9839 ctxt->inSubset = 2;
9840 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9841 (ctxt->sax->externalSubset != NULL))
9842 ctxt->sax->externalSubset(ctxt->userData,
9843 ctxt->intSubName, ctxt->extSubSystem,
9844 ctxt->extSubURI);
9845 ctxt->inSubset = 0;
9846 ctxt->instate = XML_PARSER_PROLOG;
9847#ifdef DEBUG_PUSH
9848 xmlGenericError(xmlGenericErrorContext,
9849 "PP: entering PROLOG\n");
9850#endif
9851 }
9852 } else if ((cur == '<') && (next == '!') &&
9853 (avail < 9)) {
9854 goto done;
9855 } else {
9856 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009857 ctxt->progressive = 1;
9858 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009859#ifdef DEBUG_PUSH
9860 xmlGenericError(xmlGenericErrorContext,
9861 "PP: entering START_TAG\n");
9862#endif
9863 }
9864 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009865 case XML_PARSER_PROLOG:
9866 SKIP_BLANKS;
9867 if (ctxt->input->buf == NULL)
9868 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9869 else
9870 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9871 if (avail < 2)
9872 goto done;
9873 cur = ctxt->input->cur[0];
9874 next = ctxt->input->cur[1];
9875 if ((cur == '<') && (next == '?')) {
9876 if ((!terminate) &&
9877 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9878 goto done;
9879#ifdef DEBUG_PUSH
9880 xmlGenericError(xmlGenericErrorContext,
9881 "PP: Parsing PI\n");
9882#endif
9883 xmlParsePI(ctxt);
9884 } else if ((cur == '<') && (next == '!') &&
9885 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9886 if ((!terminate) &&
9887 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9888 goto done;
9889#ifdef DEBUG_PUSH
9890 xmlGenericError(xmlGenericErrorContext,
9891 "PP: Parsing Comment\n");
9892#endif
9893 xmlParseComment(ctxt);
9894 ctxt->instate = XML_PARSER_PROLOG;
9895 } else if ((cur == '<') && (next == '!') &&
9896 (avail < 4)) {
9897 goto done;
9898 } else {
9899 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009900 if (ctxt->progressive == 0)
9901 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009902 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009903#ifdef DEBUG_PUSH
9904 xmlGenericError(xmlGenericErrorContext,
9905 "PP: entering START_TAG\n");
9906#endif
9907 }
9908 break;
9909 case XML_PARSER_EPILOG:
9910 SKIP_BLANKS;
9911 if (ctxt->input->buf == NULL)
9912 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9913 else
9914 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9915 if (avail < 2)
9916 goto done;
9917 cur = ctxt->input->cur[0];
9918 next = ctxt->input->cur[1];
9919 if ((cur == '<') && (next == '?')) {
9920 if ((!terminate) &&
9921 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9922 goto done;
9923#ifdef DEBUG_PUSH
9924 xmlGenericError(xmlGenericErrorContext,
9925 "PP: Parsing PI\n");
9926#endif
9927 xmlParsePI(ctxt);
9928 ctxt->instate = XML_PARSER_EPILOG;
9929 } else if ((cur == '<') && (next == '!') &&
9930 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9931 if ((!terminate) &&
9932 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9933 goto done;
9934#ifdef DEBUG_PUSH
9935 xmlGenericError(xmlGenericErrorContext,
9936 "PP: Parsing Comment\n");
9937#endif
9938 xmlParseComment(ctxt);
9939 ctxt->instate = XML_PARSER_EPILOG;
9940 } else if ((cur == '<') && (next == '!') &&
9941 (avail < 4)) {
9942 goto done;
9943 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009944 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009945 ctxt->instate = XML_PARSER_EOF;
9946#ifdef DEBUG_PUSH
9947 xmlGenericError(xmlGenericErrorContext,
9948 "PP: entering EOF\n");
9949#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009950 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009951 ctxt->sax->endDocument(ctxt->userData);
9952 goto done;
9953 }
9954 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009955 case XML_PARSER_DTD: {
9956 /*
9957 * Sorry but progressive parsing of the internal subset
9958 * is not expected to be supported. We first check that
9959 * the full content of the internal subset is available and
9960 * the parsing is launched only at that point.
9961 * Internal subset ends up with "']' S? '>'" in an unescaped
9962 * section and not in a ']]>' sequence which are conditional
9963 * sections (whoever argued to keep that crap in XML deserve
9964 * a place in hell !).
9965 */
9966 int base, i;
9967 xmlChar *buf;
9968 xmlChar quote = 0;
9969
9970 base = ctxt->input->cur - ctxt->input->base;
9971 if (base < 0) return(0);
9972 if (ctxt->checkIndex > base)
9973 base = ctxt->checkIndex;
9974 buf = ctxt->input->buf->buffer->content;
9975 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9976 base++) {
9977 if (quote != 0) {
9978 if (buf[base] == quote)
9979 quote = 0;
9980 continue;
9981 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009982 if ((quote == 0) && (buf[base] == '<')) {
9983 int found = 0;
9984 /* special handling of comments */
9985 if (((unsigned int) base + 4 <
9986 ctxt->input->buf->buffer->use) &&
9987 (buf[base + 1] == '!') &&
9988 (buf[base + 2] == '-') &&
9989 (buf[base + 3] == '-')) {
9990 for (;(unsigned int) base + 3 <
9991 ctxt->input->buf->buffer->use; base++) {
9992 if ((buf[base] == '-') &&
9993 (buf[base + 1] == '-') &&
9994 (buf[base + 2] == '>')) {
9995 found = 1;
9996 base += 2;
9997 break;
9998 }
9999 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010000 if (!found) {
10001#if 0
10002 fprintf(stderr, "unfinished comment\n");
10003#endif
10004 break; /* for */
10005 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010006 continue;
10007 }
10008 }
Owen Taylor3473f882001-02-23 17:55:21 +000010009 if (buf[base] == '"') {
10010 quote = '"';
10011 continue;
10012 }
10013 if (buf[base] == '\'') {
10014 quote = '\'';
10015 continue;
10016 }
10017 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010018#if 0
10019 fprintf(stderr, "%c%c%c%c: ", buf[base],
10020 buf[base + 1], buf[base + 2], buf[base + 3]);
10021#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010022 if ((unsigned int) base +1 >=
10023 ctxt->input->buf->buffer->use)
10024 break;
10025 if (buf[base + 1] == ']') {
10026 /* conditional crap, skip both ']' ! */
10027 base++;
10028 continue;
10029 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010030 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010031 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10032 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010033 if (buf[base + i] == '>') {
10034#if 0
10035 fprintf(stderr, "found\n");
10036#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010037 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010038 }
10039 if (!IS_BLANK_CH(buf[base + i])) {
10040#if 0
10041 fprintf(stderr, "not found\n");
10042#endif
10043 goto not_end_of_int_subset;
10044 }
Owen Taylor3473f882001-02-23 17:55:21 +000010045 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010046#if 0
10047 fprintf(stderr, "end of stream\n");
10048#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010049 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010050
Owen Taylor3473f882001-02-23 17:55:21 +000010051 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010052not_end_of_int_subset:
10053 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010054 }
10055 /*
10056 * We didn't found the end of the Internal subset
10057 */
Owen Taylor3473f882001-02-23 17:55:21 +000010058#ifdef DEBUG_PUSH
10059 if (next == 0)
10060 xmlGenericError(xmlGenericErrorContext,
10061 "PP: lookup of int subset end filed\n");
10062#endif
10063 goto done;
10064
10065found_end_int_subset:
10066 xmlParseInternalSubset(ctxt);
10067 ctxt->inSubset = 2;
10068 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10069 (ctxt->sax->externalSubset != NULL))
10070 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10071 ctxt->extSubSystem, ctxt->extSubURI);
10072 ctxt->inSubset = 0;
10073 ctxt->instate = XML_PARSER_PROLOG;
10074 ctxt->checkIndex = 0;
10075#ifdef DEBUG_PUSH
10076 xmlGenericError(xmlGenericErrorContext,
10077 "PP: entering PROLOG\n");
10078#endif
10079 break;
10080 }
10081 case XML_PARSER_COMMENT:
10082 xmlGenericError(xmlGenericErrorContext,
10083 "PP: internal error, state == COMMENT\n");
10084 ctxt->instate = XML_PARSER_CONTENT;
10085#ifdef DEBUG_PUSH
10086 xmlGenericError(xmlGenericErrorContext,
10087 "PP: entering CONTENT\n");
10088#endif
10089 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010090 case XML_PARSER_IGNORE:
10091 xmlGenericError(xmlGenericErrorContext,
10092 "PP: internal error, state == IGNORE");
10093 ctxt->instate = XML_PARSER_DTD;
10094#ifdef DEBUG_PUSH
10095 xmlGenericError(xmlGenericErrorContext,
10096 "PP: entering DTD\n");
10097#endif
10098 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010099 case XML_PARSER_PI:
10100 xmlGenericError(xmlGenericErrorContext,
10101 "PP: internal error, state == PI\n");
10102 ctxt->instate = XML_PARSER_CONTENT;
10103#ifdef DEBUG_PUSH
10104 xmlGenericError(xmlGenericErrorContext,
10105 "PP: entering CONTENT\n");
10106#endif
10107 break;
10108 case XML_PARSER_ENTITY_DECL:
10109 xmlGenericError(xmlGenericErrorContext,
10110 "PP: internal error, state == ENTITY_DECL\n");
10111 ctxt->instate = XML_PARSER_DTD;
10112#ifdef DEBUG_PUSH
10113 xmlGenericError(xmlGenericErrorContext,
10114 "PP: entering DTD\n");
10115#endif
10116 break;
10117 case XML_PARSER_ENTITY_VALUE:
10118 xmlGenericError(xmlGenericErrorContext,
10119 "PP: internal error, state == ENTITY_VALUE\n");
10120 ctxt->instate = XML_PARSER_CONTENT;
10121#ifdef DEBUG_PUSH
10122 xmlGenericError(xmlGenericErrorContext,
10123 "PP: entering DTD\n");
10124#endif
10125 break;
10126 case XML_PARSER_ATTRIBUTE_VALUE:
10127 xmlGenericError(xmlGenericErrorContext,
10128 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10129 ctxt->instate = XML_PARSER_START_TAG;
10130#ifdef DEBUG_PUSH
10131 xmlGenericError(xmlGenericErrorContext,
10132 "PP: entering START_TAG\n");
10133#endif
10134 break;
10135 case XML_PARSER_SYSTEM_LITERAL:
10136 xmlGenericError(xmlGenericErrorContext,
10137 "PP: internal error, state == SYSTEM_LITERAL\n");
10138 ctxt->instate = XML_PARSER_START_TAG;
10139#ifdef DEBUG_PUSH
10140 xmlGenericError(xmlGenericErrorContext,
10141 "PP: entering START_TAG\n");
10142#endif
10143 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010144 case XML_PARSER_PUBLIC_LITERAL:
10145 xmlGenericError(xmlGenericErrorContext,
10146 "PP: internal error, state == PUBLIC_LITERAL\n");
10147 ctxt->instate = XML_PARSER_START_TAG;
10148#ifdef DEBUG_PUSH
10149 xmlGenericError(xmlGenericErrorContext,
10150 "PP: entering START_TAG\n");
10151#endif
10152 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010153 }
10154 }
10155done:
10156#ifdef DEBUG_PUSH
10157 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10158#endif
10159 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010160encoding_error:
10161 {
10162 char buffer[150];
10163
10164 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10165 ctxt->input->cur[0], ctxt->input->cur[1],
10166 ctxt->input->cur[2], ctxt->input->cur[3]);
10167 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10168 "Input is not proper UTF-8, indicate encoding !\n%s",
10169 BAD_CAST buffer, NULL);
10170 }
10171 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010172}
10173
10174/**
Owen Taylor3473f882001-02-23 17:55:21 +000010175 * xmlParseChunk:
10176 * @ctxt: an XML parser context
10177 * @chunk: an char array
10178 * @size: the size in byte of the chunk
10179 * @terminate: last chunk indicator
10180 *
10181 * Parse a Chunk of memory
10182 *
10183 * Returns zero if no error, the xmlParserErrors otherwise.
10184 */
10185int
10186xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10187 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010188 if (ctxt == NULL)
10189 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010190 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010191 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010192 if (ctxt->instate == XML_PARSER_START)
10193 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010194 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10195 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10196 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10197 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010198 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010199
William M. Bracka3215c72004-07-31 16:24:01 +000010200 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10201 if (res < 0) {
10202 ctxt->errNo = XML_PARSER_EOF;
10203 ctxt->disableSAX = 1;
10204 return (XML_PARSER_EOF);
10205 }
Owen Taylor3473f882001-02-23 17:55:21 +000010206 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10207 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010208 ctxt->input->end =
10209 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010210#ifdef DEBUG_PUSH
10211 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10212#endif
10213
Owen Taylor3473f882001-02-23 17:55:21 +000010214 } else if (ctxt->instate != XML_PARSER_EOF) {
10215 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10216 xmlParserInputBufferPtr in = ctxt->input->buf;
10217 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10218 (in->raw != NULL)) {
10219 int nbchars;
10220
10221 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10222 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010223 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010224 xmlGenericError(xmlGenericErrorContext,
10225 "xmlParseChunk: encoder error\n");
10226 return(XML_ERR_INVALID_ENCODING);
10227 }
10228 }
10229 }
10230 }
10231 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillard14412512005-01-21 23:53:26 +000010232 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010233 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010234 if (terminate) {
10235 /*
10236 * Check for termination
10237 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010238 int avail = 0;
10239
10240 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010241 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010242 avail = ctxt->input->length -
10243 (ctxt->input->cur - ctxt->input->base);
10244 else
10245 avail = ctxt->input->buf->buffer->use -
10246 (ctxt->input->cur - ctxt->input->base);
10247 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010248
Owen Taylor3473f882001-02-23 17:55:21 +000010249 if ((ctxt->instate != XML_PARSER_EOF) &&
10250 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010251 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010252 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010253 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010254 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010255 }
Owen Taylor3473f882001-02-23 17:55:21 +000010256 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010257 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010258 ctxt->sax->endDocument(ctxt->userData);
10259 }
10260 ctxt->instate = XML_PARSER_EOF;
10261 }
10262 return((xmlParserErrors) ctxt->errNo);
10263}
10264
10265/************************************************************************
10266 * *
10267 * I/O front end functions to the parser *
10268 * *
10269 ************************************************************************/
10270
10271/**
Owen Taylor3473f882001-02-23 17:55:21 +000010272 * xmlCreatePushParserCtxt:
10273 * @sax: a SAX handler
10274 * @user_data: The user data returned on SAX callbacks
10275 * @chunk: a pointer to an array of chars
10276 * @size: number of chars in the array
10277 * @filename: an optional file name or URI
10278 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010279 * Create a parser context for using the XML parser in push mode.
10280 * If @buffer and @size are non-NULL, the data is used to detect
10281 * the encoding. The remaining characters will be parsed so they
10282 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010283 * To allow content encoding detection, @size should be >= 4
10284 * The value of @filename is used for fetching external entities
10285 * and error/warning reports.
10286 *
10287 * Returns the new parser context or NULL
10288 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010289
Owen Taylor3473f882001-02-23 17:55:21 +000010290xmlParserCtxtPtr
10291xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10292 const char *chunk, int size, const char *filename) {
10293 xmlParserCtxtPtr ctxt;
10294 xmlParserInputPtr inputStream;
10295 xmlParserInputBufferPtr buf;
10296 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10297
10298 /*
10299 * plug some encoding conversion routines
10300 */
10301 if ((chunk != NULL) && (size >= 4))
10302 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10303
10304 buf = xmlAllocParserInputBuffer(enc);
10305 if (buf == NULL) return(NULL);
10306
10307 ctxt = xmlNewParserCtxt();
10308 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010309 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010310 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010311 return(NULL);
10312 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010313 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010314 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10315 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010316 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010317 xmlFreeParserInputBuffer(buf);
10318 xmlFreeParserCtxt(ctxt);
10319 return(NULL);
10320 }
Owen Taylor3473f882001-02-23 17:55:21 +000010321 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010322#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010323 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010324#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010325 xmlFree(ctxt->sax);
10326 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10327 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010328 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010329 xmlFreeParserInputBuffer(buf);
10330 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010331 return(NULL);
10332 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010333 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10334 if (sax->initialized == XML_SAX2_MAGIC)
10335 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10336 else
10337 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010338 if (user_data != NULL)
10339 ctxt->userData = user_data;
10340 }
10341 if (filename == NULL) {
10342 ctxt->directory = NULL;
10343 } else {
10344 ctxt->directory = xmlParserGetDirectory(filename);
10345 }
10346
10347 inputStream = xmlNewInputStream(ctxt);
10348 if (inputStream == NULL) {
10349 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010350 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010351 return(NULL);
10352 }
10353
10354 if (filename == NULL)
10355 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010356 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010357 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010358 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010359 if (inputStream->filename == NULL) {
10360 xmlFreeParserCtxt(ctxt);
10361 xmlFreeParserInputBuffer(buf);
10362 return(NULL);
10363 }
10364 }
Owen Taylor3473f882001-02-23 17:55:21 +000010365 inputStream->buf = buf;
10366 inputStream->base = inputStream->buf->buffer->content;
10367 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010368 inputStream->end =
10369 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010370
10371 inputPush(ctxt, inputStream);
10372
William M. Brack3a1cd212005-02-11 14:35:54 +000010373 /*
10374 * If the caller didn't provide an initial 'chunk' for determining
10375 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10376 * that it can be automatically determined later
10377 */
10378 if ((size == 0) || (chunk == NULL)) {
10379 ctxt->charset = XML_CHAR_ENCODING_NONE;
10380 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010381 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10382 int cur = ctxt->input->cur - ctxt->input->base;
10383
Owen Taylor3473f882001-02-23 17:55:21 +000010384 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010385
10386 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10387 ctxt->input->cur = ctxt->input->base + cur;
10388 ctxt->input->end =
10389 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010390#ifdef DEBUG_PUSH
10391 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10392#endif
10393 }
10394
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010395 if (enc != XML_CHAR_ENCODING_NONE) {
10396 xmlSwitchEncoding(ctxt, enc);
10397 }
10398
Owen Taylor3473f882001-02-23 17:55:21 +000010399 return(ctxt);
10400}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010401#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010402
10403/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010404 * xmlStopParser:
10405 * @ctxt: an XML parser context
10406 *
10407 * Blocks further parser processing
10408 */
10409void
10410xmlStopParser(xmlParserCtxtPtr ctxt) {
10411 if (ctxt == NULL)
10412 return;
10413 ctxt->instate = XML_PARSER_EOF;
10414 ctxt->disableSAX = 1;
10415 if (ctxt->input != NULL) {
10416 ctxt->input->cur = BAD_CAST"";
10417 ctxt->input->base = ctxt->input->cur;
10418 }
10419}
10420
10421/**
Owen Taylor3473f882001-02-23 17:55:21 +000010422 * xmlCreateIOParserCtxt:
10423 * @sax: a SAX handler
10424 * @user_data: The user data returned on SAX callbacks
10425 * @ioread: an I/O read function
10426 * @ioclose: an I/O close function
10427 * @ioctx: an I/O handler
10428 * @enc: the charset encoding if known
10429 *
10430 * Create a parser context for using the XML parser with an existing
10431 * I/O stream
10432 *
10433 * Returns the new parser context or NULL
10434 */
10435xmlParserCtxtPtr
10436xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10437 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10438 void *ioctx, xmlCharEncoding enc) {
10439 xmlParserCtxtPtr ctxt;
10440 xmlParserInputPtr inputStream;
10441 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010442
10443 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010444
10445 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10446 if (buf == NULL) return(NULL);
10447
10448 ctxt = xmlNewParserCtxt();
10449 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010450 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010451 return(NULL);
10452 }
10453 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010454#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010455 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010456#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010457 xmlFree(ctxt->sax);
10458 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10459 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010460 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010461 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010462 return(NULL);
10463 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010464 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10465 if (sax->initialized == XML_SAX2_MAGIC)
10466 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10467 else
10468 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010469 if (user_data != NULL)
10470 ctxt->userData = user_data;
10471 }
10472
10473 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10474 if (inputStream == NULL) {
10475 xmlFreeParserCtxt(ctxt);
10476 return(NULL);
10477 }
10478 inputPush(ctxt, inputStream);
10479
10480 return(ctxt);
10481}
10482
Daniel Veillard4432df22003-09-28 18:58:27 +000010483#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010484/************************************************************************
10485 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010486 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010487 * *
10488 ************************************************************************/
10489
10490/**
10491 * xmlIOParseDTD:
10492 * @sax: the SAX handler block or NULL
10493 * @input: an Input Buffer
10494 * @enc: the charset encoding if known
10495 *
10496 * Load and parse a DTD
10497 *
10498 * Returns the resulting xmlDtdPtr or NULL in case of error.
10499 * @input will be freed at parsing end.
10500 */
10501
10502xmlDtdPtr
10503xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10504 xmlCharEncoding enc) {
10505 xmlDtdPtr ret = NULL;
10506 xmlParserCtxtPtr ctxt;
10507 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010508 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010509
10510 if (input == NULL)
10511 return(NULL);
10512
10513 ctxt = xmlNewParserCtxt();
10514 if (ctxt == NULL) {
10515 return(NULL);
10516 }
10517
10518 /*
10519 * Set-up the SAX context
10520 */
10521 if (sax != NULL) {
10522 if (ctxt->sax != NULL)
10523 xmlFree(ctxt->sax);
10524 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010525 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010526 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010527 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010528
10529 /*
10530 * generate a parser input from the I/O handler
10531 */
10532
Daniel Veillard43caefb2003-12-07 19:32:22 +000010533 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010534 if (pinput == NULL) {
10535 if (sax != NULL) ctxt->sax = NULL;
10536 xmlFreeParserCtxt(ctxt);
10537 return(NULL);
10538 }
10539
10540 /*
10541 * plug some encoding conversion routines here.
10542 */
10543 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010544 if (enc != XML_CHAR_ENCODING_NONE) {
10545 xmlSwitchEncoding(ctxt, enc);
10546 }
Owen Taylor3473f882001-02-23 17:55:21 +000010547
10548 pinput->filename = NULL;
10549 pinput->line = 1;
10550 pinput->col = 1;
10551 pinput->base = ctxt->input->cur;
10552 pinput->cur = ctxt->input->cur;
10553 pinput->free = NULL;
10554
10555 /*
10556 * let's parse that entity knowing it's an external subset.
10557 */
10558 ctxt->inSubset = 2;
10559 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10560 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10561 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010562
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010563 if ((enc == XML_CHAR_ENCODING_NONE) &&
10564 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010565 /*
10566 * Get the 4 first bytes and decode the charset
10567 * if enc != XML_CHAR_ENCODING_NONE
10568 * plug some encoding conversion routines.
10569 */
10570 start[0] = RAW;
10571 start[1] = NXT(1);
10572 start[2] = NXT(2);
10573 start[3] = NXT(3);
10574 enc = xmlDetectCharEncoding(start, 4);
10575 if (enc != XML_CHAR_ENCODING_NONE) {
10576 xmlSwitchEncoding(ctxt, enc);
10577 }
10578 }
10579
Owen Taylor3473f882001-02-23 17:55:21 +000010580 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10581
10582 if (ctxt->myDoc != NULL) {
10583 if (ctxt->wellFormed) {
10584 ret = ctxt->myDoc->extSubset;
10585 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010586 if (ret != NULL) {
10587 xmlNodePtr tmp;
10588
10589 ret->doc = NULL;
10590 tmp = ret->children;
10591 while (tmp != NULL) {
10592 tmp->doc = NULL;
10593 tmp = tmp->next;
10594 }
10595 }
Owen Taylor3473f882001-02-23 17:55:21 +000010596 } else {
10597 ret = NULL;
10598 }
10599 xmlFreeDoc(ctxt->myDoc);
10600 ctxt->myDoc = NULL;
10601 }
10602 if (sax != NULL) ctxt->sax = NULL;
10603 xmlFreeParserCtxt(ctxt);
10604
10605 return(ret);
10606}
10607
10608/**
10609 * xmlSAXParseDTD:
10610 * @sax: the SAX handler block
10611 * @ExternalID: a NAME* containing the External ID of the DTD
10612 * @SystemID: a NAME* containing the URL to the DTD
10613 *
10614 * Load and parse an external subset.
10615 *
10616 * Returns the resulting xmlDtdPtr or NULL in case of error.
10617 */
10618
10619xmlDtdPtr
10620xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10621 const xmlChar *SystemID) {
10622 xmlDtdPtr ret = NULL;
10623 xmlParserCtxtPtr ctxt;
10624 xmlParserInputPtr input = NULL;
10625 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010626 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010627
10628 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10629
10630 ctxt = xmlNewParserCtxt();
10631 if (ctxt == NULL) {
10632 return(NULL);
10633 }
10634
10635 /*
10636 * Set-up the SAX context
10637 */
10638 if (sax != NULL) {
10639 if (ctxt->sax != NULL)
10640 xmlFree(ctxt->sax);
10641 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010642 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010643 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010644
10645 /*
10646 * Canonicalise the system ID
10647 */
10648 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010649 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010650 xmlFreeParserCtxt(ctxt);
10651 return(NULL);
10652 }
Owen Taylor3473f882001-02-23 17:55:21 +000010653
10654 /*
10655 * Ask the Entity resolver to load the damn thing
10656 */
10657
10658 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010659 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010660 if (input == NULL) {
10661 if (sax != NULL) ctxt->sax = NULL;
10662 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010663 if (systemIdCanonic != NULL)
10664 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010665 return(NULL);
10666 }
10667
10668 /*
10669 * plug some encoding conversion routines here.
10670 */
10671 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010672 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10673 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10674 xmlSwitchEncoding(ctxt, enc);
10675 }
Owen Taylor3473f882001-02-23 17:55:21 +000010676
10677 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010678 input->filename = (char *) systemIdCanonic;
10679 else
10680 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010681 input->line = 1;
10682 input->col = 1;
10683 input->base = ctxt->input->cur;
10684 input->cur = ctxt->input->cur;
10685 input->free = NULL;
10686
10687 /*
10688 * let's parse that entity knowing it's an external subset.
10689 */
10690 ctxt->inSubset = 2;
10691 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10692 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10693 ExternalID, SystemID);
10694 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10695
10696 if (ctxt->myDoc != NULL) {
10697 if (ctxt->wellFormed) {
10698 ret = ctxt->myDoc->extSubset;
10699 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010700 if (ret != NULL) {
10701 xmlNodePtr tmp;
10702
10703 ret->doc = NULL;
10704 tmp = ret->children;
10705 while (tmp != NULL) {
10706 tmp->doc = NULL;
10707 tmp = tmp->next;
10708 }
10709 }
Owen Taylor3473f882001-02-23 17:55:21 +000010710 } else {
10711 ret = NULL;
10712 }
10713 xmlFreeDoc(ctxt->myDoc);
10714 ctxt->myDoc = NULL;
10715 }
10716 if (sax != NULL) ctxt->sax = NULL;
10717 xmlFreeParserCtxt(ctxt);
10718
10719 return(ret);
10720}
10721
Daniel Veillard4432df22003-09-28 18:58:27 +000010722
Owen Taylor3473f882001-02-23 17:55:21 +000010723/**
10724 * xmlParseDTD:
10725 * @ExternalID: a NAME* containing the External ID of the DTD
10726 * @SystemID: a NAME* containing the URL to the DTD
10727 *
10728 * Load and parse an external subset.
10729 *
10730 * Returns the resulting xmlDtdPtr or NULL in case of error.
10731 */
10732
10733xmlDtdPtr
10734xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10735 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10736}
Daniel Veillard4432df22003-09-28 18:58:27 +000010737#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010738
10739/************************************************************************
10740 * *
10741 * Front ends when parsing an Entity *
10742 * *
10743 ************************************************************************/
10744
10745/**
Owen Taylor3473f882001-02-23 17:55:21 +000010746 * xmlParseCtxtExternalEntity:
10747 * @ctx: the existing parsing context
10748 * @URL: the URL for the entity to load
10749 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010750 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010751 *
10752 * Parse an external general entity within an existing parsing context
10753 * An external general parsed entity is well-formed if it matches the
10754 * production labeled extParsedEnt.
10755 *
10756 * [78] extParsedEnt ::= TextDecl? content
10757 *
10758 * Returns 0 if the entity is well formed, -1 in case of args problem and
10759 * the parser error code otherwise
10760 */
10761
10762int
10763xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010764 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010765 xmlParserCtxtPtr ctxt;
10766 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010767 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010768 xmlSAXHandlerPtr oldsax = NULL;
10769 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010770 xmlChar start[4];
10771 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010772
Daniel Veillardce682bc2004-11-05 17:22:25 +000010773 if (ctx == NULL) return(-1);
10774
Owen Taylor3473f882001-02-23 17:55:21 +000010775 if (ctx->depth > 40) {
10776 return(XML_ERR_ENTITY_LOOP);
10777 }
10778
Daniel Veillardcda96922001-08-21 10:56:31 +000010779 if (lst != NULL)
10780 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010781 if ((URL == NULL) && (ID == NULL))
10782 return(-1);
10783 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10784 return(-1);
10785
10786
10787 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10788 if (ctxt == NULL) return(-1);
10789 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010790 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010791 oldsax = ctxt->sax;
10792 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010793 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010794 newDoc = xmlNewDoc(BAD_CAST "1.0");
10795 if (newDoc == NULL) {
10796 xmlFreeParserCtxt(ctxt);
10797 return(-1);
10798 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010799 if (ctx->myDoc->dict) {
10800 newDoc->dict = ctx->myDoc->dict;
10801 xmlDictReference(newDoc->dict);
10802 }
Owen Taylor3473f882001-02-23 17:55:21 +000010803 if (ctx->myDoc != NULL) {
10804 newDoc->intSubset = ctx->myDoc->intSubset;
10805 newDoc->extSubset = ctx->myDoc->extSubset;
10806 }
10807 if (ctx->myDoc->URL != NULL) {
10808 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10809 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010810 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10811 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010812 ctxt->sax = oldsax;
10813 xmlFreeParserCtxt(ctxt);
10814 newDoc->intSubset = NULL;
10815 newDoc->extSubset = NULL;
10816 xmlFreeDoc(newDoc);
10817 return(-1);
10818 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010819 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010820 nodePush(ctxt, newDoc->children);
10821 if (ctx->myDoc == NULL) {
10822 ctxt->myDoc = newDoc;
10823 } else {
10824 ctxt->myDoc = ctx->myDoc;
10825 newDoc->children->doc = ctx->myDoc;
10826 }
10827
Daniel Veillard87a764e2001-06-20 17:41:10 +000010828 /*
10829 * Get the 4 first bytes and decode the charset
10830 * if enc != XML_CHAR_ENCODING_NONE
10831 * plug some encoding conversion routines.
10832 */
10833 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010834 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10835 start[0] = RAW;
10836 start[1] = NXT(1);
10837 start[2] = NXT(2);
10838 start[3] = NXT(3);
10839 enc = xmlDetectCharEncoding(start, 4);
10840 if (enc != XML_CHAR_ENCODING_NONE) {
10841 xmlSwitchEncoding(ctxt, enc);
10842 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010843 }
10844
Owen Taylor3473f882001-02-23 17:55:21 +000010845 /*
10846 * Parse a possible text declaration first
10847 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010848 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010849 xmlParseTextDecl(ctxt);
10850 }
10851
10852 /*
10853 * Doing validity checking on chunk doesn't make sense
10854 */
10855 ctxt->instate = XML_PARSER_CONTENT;
10856 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010857 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010858 ctxt->loadsubset = ctx->loadsubset;
10859 ctxt->depth = ctx->depth + 1;
10860 ctxt->replaceEntities = ctx->replaceEntities;
10861 if (ctxt->validate) {
10862 ctxt->vctxt.error = ctx->vctxt.error;
10863 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010864 } else {
10865 ctxt->vctxt.error = NULL;
10866 ctxt->vctxt.warning = NULL;
10867 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010868 ctxt->vctxt.nodeTab = NULL;
10869 ctxt->vctxt.nodeNr = 0;
10870 ctxt->vctxt.nodeMax = 0;
10871 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010872 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10873 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010874 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10875 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10876 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010877 ctxt->dictNames = ctx->dictNames;
10878 ctxt->attsDefault = ctx->attsDefault;
10879 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010880 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010881
10882 xmlParseContent(ctxt);
10883
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010884 ctx->validate = ctxt->validate;
10885 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010886 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010887 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010888 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010889 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010890 }
10891 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010892 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010893 }
10894
10895 if (!ctxt->wellFormed) {
10896 if (ctxt->errNo == 0)
10897 ret = 1;
10898 else
10899 ret = ctxt->errNo;
10900 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010901 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010902 xmlNodePtr cur;
10903
10904 /*
10905 * Return the newly created nodeset after unlinking it from
10906 * they pseudo parent.
10907 */
10908 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010909 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010910 while (cur != NULL) {
10911 cur->parent = NULL;
10912 cur = cur->next;
10913 }
10914 newDoc->children->children = NULL;
10915 }
10916 ret = 0;
10917 }
10918 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010919 ctxt->dict = NULL;
10920 ctxt->attsDefault = NULL;
10921 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010922 xmlFreeParserCtxt(ctxt);
10923 newDoc->intSubset = NULL;
10924 newDoc->extSubset = NULL;
10925 xmlFreeDoc(newDoc);
10926
10927 return(ret);
10928}
10929
10930/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010931 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010932 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010933 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010934 * @sax: the SAX handler bloc (possibly NULL)
10935 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10936 * @depth: Used for loop detection, use 0
10937 * @URL: the URL for the entity to load
10938 * @ID: the System ID for the entity to load
10939 * @list: the return value for the set of parsed nodes
10940 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010941 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010942 *
10943 * Returns 0 if the entity is well formed, -1 in case of args problem and
10944 * the parser error code otherwise
10945 */
10946
Daniel Veillard7d515752003-09-26 19:12:37 +000010947static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010948xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10949 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010950 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010951 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010952 xmlParserCtxtPtr ctxt;
10953 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010954 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010955 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010956 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010957 xmlChar start[4];
10958 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010959
10960 if (depth > 40) {
10961 return(XML_ERR_ENTITY_LOOP);
10962 }
10963
10964
10965
10966 if (list != NULL)
10967 *list = NULL;
10968 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010969 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010970 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010971 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010972
10973
10974 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010975 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010976 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010977 if (oldctxt != NULL) {
10978 ctxt->_private = oldctxt->_private;
10979 ctxt->loadsubset = oldctxt->loadsubset;
10980 ctxt->validate = oldctxt->validate;
10981 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010982 ctxt->record_info = oldctxt->record_info;
10983 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10984 ctxt->node_seq.length = oldctxt->node_seq.length;
10985 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010986 } else {
10987 /*
10988 * Doing validity checking on chunk without context
10989 * doesn't make sense
10990 */
10991 ctxt->_private = NULL;
10992 ctxt->validate = 0;
10993 ctxt->external = 2;
10994 ctxt->loadsubset = 0;
10995 }
Owen Taylor3473f882001-02-23 17:55:21 +000010996 if (sax != NULL) {
10997 oldsax = ctxt->sax;
10998 ctxt->sax = sax;
10999 if (user_data != NULL)
11000 ctxt->userData = user_data;
11001 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011002 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011003 newDoc = xmlNewDoc(BAD_CAST "1.0");
11004 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011005 ctxt->node_seq.maximum = 0;
11006 ctxt->node_seq.length = 0;
11007 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011008 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011009 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011010 }
11011 if (doc != NULL) {
11012 newDoc->intSubset = doc->intSubset;
11013 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011014 newDoc->dict = doc->dict;
11015 } else if (oldctxt != NULL) {
11016 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000011017 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011018 xmlDictReference(newDoc->dict);
11019
Owen Taylor3473f882001-02-23 17:55:21 +000011020 if (doc->URL != NULL) {
11021 newDoc->URL = xmlStrdup(doc->URL);
11022 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011023 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11024 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011025 if (sax != NULL)
11026 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011027 ctxt->node_seq.maximum = 0;
11028 ctxt->node_seq.length = 0;
11029 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011030 xmlFreeParserCtxt(ctxt);
11031 newDoc->intSubset = NULL;
11032 newDoc->extSubset = NULL;
11033 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011034 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011035 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011036 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011037 nodePush(ctxt, newDoc->children);
11038 if (doc == NULL) {
11039 ctxt->myDoc = newDoc;
11040 } else {
11041 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011042 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011043 }
11044
Daniel Veillard87a764e2001-06-20 17:41:10 +000011045 /*
11046 * Get the 4 first bytes and decode the charset
11047 * if enc != XML_CHAR_ENCODING_NONE
11048 * plug some encoding conversion routines.
11049 */
11050 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011051 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11052 start[0] = RAW;
11053 start[1] = NXT(1);
11054 start[2] = NXT(2);
11055 start[3] = NXT(3);
11056 enc = xmlDetectCharEncoding(start, 4);
11057 if (enc != XML_CHAR_ENCODING_NONE) {
11058 xmlSwitchEncoding(ctxt, enc);
11059 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011060 }
11061
Owen Taylor3473f882001-02-23 17:55:21 +000011062 /*
11063 * Parse a possible text declaration first
11064 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011065 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011066 xmlParseTextDecl(ctxt);
11067 }
11068
Owen Taylor3473f882001-02-23 17:55:21 +000011069 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011070 ctxt->depth = depth;
11071
11072 xmlParseContent(ctxt);
11073
Daniel Veillard561b7f82002-03-20 21:55:57 +000011074 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011075 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011076 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011077 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011078 }
11079 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011080 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011081 }
11082
11083 if (!ctxt->wellFormed) {
11084 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011085 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011086 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011087 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011088 } else {
11089 if (list != NULL) {
11090 xmlNodePtr cur;
11091
11092 /*
11093 * Return the newly created nodeset after unlinking it from
11094 * they pseudo parent.
11095 */
11096 cur = newDoc->children->children;
11097 *list = cur;
11098 while (cur != NULL) {
11099 cur->parent = NULL;
11100 cur = cur->next;
11101 }
11102 newDoc->children->children = NULL;
11103 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011104 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011105 }
11106 if (sax != NULL)
11107 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011108 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11109 oldctxt->node_seq.length = ctxt->node_seq.length;
11110 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011111 ctxt->node_seq.maximum = 0;
11112 ctxt->node_seq.length = 0;
11113 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011114 xmlFreeParserCtxt(ctxt);
11115 newDoc->intSubset = NULL;
11116 newDoc->extSubset = NULL;
11117 xmlFreeDoc(newDoc);
11118
11119 return(ret);
11120}
11121
Daniel Veillard81273902003-09-30 00:43:48 +000011122#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011123/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011124 * xmlParseExternalEntity:
11125 * @doc: the document the chunk pertains to
11126 * @sax: the SAX handler bloc (possibly NULL)
11127 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11128 * @depth: Used for loop detection, use 0
11129 * @URL: the URL for the entity to load
11130 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011131 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011132 *
11133 * Parse an external general entity
11134 * An external general parsed entity is well-formed if it matches the
11135 * production labeled extParsedEnt.
11136 *
11137 * [78] extParsedEnt ::= TextDecl? content
11138 *
11139 * Returns 0 if the entity is well formed, -1 in case of args problem and
11140 * the parser error code otherwise
11141 */
11142
11143int
11144xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011145 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011146 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011147 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011148}
11149
11150/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011151 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011152 * @doc: the document the chunk pertains to
11153 * @sax: the SAX handler bloc (possibly NULL)
11154 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11155 * @depth: Used for loop detection, use 0
11156 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011157 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011158 *
11159 * Parse a well-balanced chunk of an XML document
11160 * called by the parser
11161 * The allowed sequence for the Well Balanced Chunk is the one defined by
11162 * the content production in the XML grammar:
11163 *
11164 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11165 *
11166 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11167 * the parser error code otherwise
11168 */
11169
11170int
11171xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011172 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011173 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11174 depth, string, lst, 0 );
11175}
Daniel Veillard81273902003-09-30 00:43:48 +000011176#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011177
11178/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011179 * xmlParseBalancedChunkMemoryInternal:
11180 * @oldctxt: the existing parsing context
11181 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11182 * @user_data: the user data field for the parser context
11183 * @lst: the return value for the set of parsed nodes
11184 *
11185 *
11186 * Parse a well-balanced chunk of an XML document
11187 * called by the parser
11188 * The allowed sequence for the Well Balanced Chunk is the one defined by
11189 * the content production in the XML grammar:
11190 *
11191 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11192 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011193 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11194 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011195 *
11196 * In case recover is set to 1, the nodelist will not be empty even if
11197 * the parsed chunk is not well balanced.
11198 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011199static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011200xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11201 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11202 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011203 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011204 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011205 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011206 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011207 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011208 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011209 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011210
11211 if (oldctxt->depth > 40) {
11212 return(XML_ERR_ENTITY_LOOP);
11213 }
11214
11215
11216 if (lst != NULL)
11217 *lst = NULL;
11218 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011219 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011220
11221 size = xmlStrlen(string);
11222
11223 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011224 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011225 if (user_data != NULL)
11226 ctxt->userData = user_data;
11227 else
11228 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011229 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11230 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011231 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11232 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11233 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011234
11235 oldsax = ctxt->sax;
11236 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011237 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011238 ctxt->replaceEntities = oldctxt->replaceEntities;
11239 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011240
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011241 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011242 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011243 newDoc = xmlNewDoc(BAD_CAST "1.0");
11244 if (newDoc == NULL) {
11245 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011246 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011247 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011248 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011249 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011250 newDoc->dict = ctxt->dict;
11251 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011252 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011253 } else {
11254 ctxt->myDoc = oldctxt->myDoc;
11255 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011256 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011257 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011258 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11259 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011260 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011261 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011262 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011263 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011264 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011265 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011266 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011267 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011268 ctxt->myDoc->children = NULL;
11269 ctxt->myDoc->last = NULL;
11270 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011271 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011272 ctxt->instate = XML_PARSER_CONTENT;
11273 ctxt->depth = oldctxt->depth + 1;
11274
Daniel Veillard328f48c2002-11-15 15:24:34 +000011275 ctxt->validate = 0;
11276 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011277 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11278 /*
11279 * ID/IDREF registration will be done in xmlValidateElement below
11280 */
11281 ctxt->loadsubset |= XML_SKIP_IDS;
11282 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011283 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011284 ctxt->attsDefault = oldctxt->attsDefault;
11285 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011286
Daniel Veillard68e9e742002-11-16 15:35:11 +000011287 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011288 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011289 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011290 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011291 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011292 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011293 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011294 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011295 }
11296
11297 if (!ctxt->wellFormed) {
11298 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011299 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011300 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011301 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011302 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011303 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011304 }
11305
William M. Brack7b9154b2003-09-27 19:23:50 +000011306 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011307 xmlNodePtr cur;
11308
11309 /*
11310 * Return the newly created nodeset after unlinking it from
11311 * they pseudo parent.
11312 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011313 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011314 *lst = cur;
11315 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011316#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011317 if (oldctxt->validate && oldctxt->wellFormed &&
11318 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11319 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11320 oldctxt->myDoc, cur);
11321 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011322#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011323 cur->parent = NULL;
11324 cur = cur->next;
11325 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011326 ctxt->myDoc->children->children = NULL;
11327 }
11328 if (ctxt->myDoc != NULL) {
11329 xmlFreeNode(ctxt->myDoc->children);
11330 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011331 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011332 }
11333
11334 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011335 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011336 ctxt->attsDefault = NULL;
11337 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011338 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011339 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011340 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011341 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011342
11343 return(ret);
11344}
11345
Daniel Veillard29b17482004-08-16 00:39:03 +000011346/**
11347 * xmlParseInNodeContext:
11348 * @node: the context node
11349 * @data: the input string
11350 * @datalen: the input string length in bytes
11351 * @options: a combination of xmlParserOption
11352 * @lst: the return value for the set of parsed nodes
11353 *
11354 * Parse a well-balanced chunk of an XML document
11355 * within the context (DTD, namespaces, etc ...) of the given node.
11356 *
11357 * The allowed sequence for the data is a Well Balanced Chunk defined by
11358 * the content production in the XML grammar:
11359 *
11360 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11361 *
11362 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11363 * error code otherwise
11364 */
11365xmlParserErrors
11366xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11367 int options, xmlNodePtr *lst) {
11368#ifdef SAX2
11369 xmlParserCtxtPtr ctxt;
11370 xmlDocPtr doc = NULL;
11371 xmlNodePtr fake, cur;
11372 int nsnr = 0;
11373
11374 xmlParserErrors ret = XML_ERR_OK;
11375
11376 /*
11377 * check all input parameters, grab the document
11378 */
11379 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11380 return(XML_ERR_INTERNAL_ERROR);
11381 switch (node->type) {
11382 case XML_ELEMENT_NODE:
11383 case XML_ATTRIBUTE_NODE:
11384 case XML_TEXT_NODE:
11385 case XML_CDATA_SECTION_NODE:
11386 case XML_ENTITY_REF_NODE:
11387 case XML_PI_NODE:
11388 case XML_COMMENT_NODE:
11389 case XML_DOCUMENT_NODE:
11390 case XML_HTML_DOCUMENT_NODE:
11391 break;
11392 default:
11393 return(XML_ERR_INTERNAL_ERROR);
11394
11395 }
11396 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11397 (node->type != XML_DOCUMENT_NODE) &&
11398 (node->type != XML_HTML_DOCUMENT_NODE))
11399 node = node->parent;
11400 if (node == NULL)
11401 return(XML_ERR_INTERNAL_ERROR);
11402 if (node->type == XML_ELEMENT_NODE)
11403 doc = node->doc;
11404 else
11405 doc = (xmlDocPtr) node;
11406 if (doc == NULL)
11407 return(XML_ERR_INTERNAL_ERROR);
11408
11409 /*
11410 * allocate a context and set-up everything not related to the
11411 * node position in the tree
11412 */
11413 if (doc->type == XML_DOCUMENT_NODE)
11414 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11415#ifdef LIBXML_HTML_ENABLED
11416 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11417 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11418#endif
11419 else
11420 return(XML_ERR_INTERNAL_ERROR);
11421
11422 if (ctxt == NULL)
11423 return(XML_ERR_NO_MEMORY);
11424 fake = xmlNewComment(NULL);
11425 if (fake == NULL) {
11426 xmlFreeParserCtxt(ctxt);
11427 return(XML_ERR_NO_MEMORY);
11428 }
11429 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011430
11431 /*
11432 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11433 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11434 * we must wait until the last moment to free the original one.
11435 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011436 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011437 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011438 xmlDictFree(ctxt->dict);
11439 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011440 } else
11441 options |= XML_PARSE_NODICT;
11442
11443 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011444 xmlDetectSAX2(ctxt);
11445 ctxt->myDoc = doc;
11446
11447 if (node->type == XML_ELEMENT_NODE) {
11448 nodePush(ctxt, node);
11449 /*
11450 * initialize the SAX2 namespaces stack
11451 */
11452 cur = node;
11453 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11454 xmlNsPtr ns = cur->nsDef;
11455 const xmlChar *iprefix, *ihref;
11456
11457 while (ns != NULL) {
11458 if (ctxt->dict) {
11459 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11460 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11461 } else {
11462 iprefix = ns->prefix;
11463 ihref = ns->href;
11464 }
11465
11466 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11467 nsPush(ctxt, iprefix, ihref);
11468 nsnr++;
11469 }
11470 ns = ns->next;
11471 }
11472 cur = cur->parent;
11473 }
11474 ctxt->instate = XML_PARSER_CONTENT;
11475 }
11476
11477 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11478 /*
11479 * ID/IDREF registration will be done in xmlValidateElement below
11480 */
11481 ctxt->loadsubset |= XML_SKIP_IDS;
11482 }
11483
11484 xmlParseContent(ctxt);
11485 nsPop(ctxt, nsnr);
11486 if ((RAW == '<') && (NXT(1) == '/')) {
11487 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11488 } else if (RAW != 0) {
11489 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11490 }
11491 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11492 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11493 ctxt->wellFormed = 0;
11494 }
11495
11496 if (!ctxt->wellFormed) {
11497 if (ctxt->errNo == 0)
11498 ret = XML_ERR_INTERNAL_ERROR;
11499 else
11500 ret = (xmlParserErrors)ctxt->errNo;
11501 } else {
11502 ret = XML_ERR_OK;
11503 }
11504
11505 /*
11506 * Return the newly created nodeset after unlinking it from
11507 * the pseudo sibling.
11508 */
11509
11510 cur = fake->next;
11511 fake->next = NULL;
11512 node->last = fake;
11513
11514 if (cur != NULL) {
11515 cur->prev = NULL;
11516 }
11517
11518 *lst = cur;
11519
11520 while (cur != NULL) {
11521 cur->parent = NULL;
11522 cur = cur->next;
11523 }
11524
11525 xmlUnlinkNode(fake);
11526 xmlFreeNode(fake);
11527
11528
11529 if (ret != XML_ERR_OK) {
11530 xmlFreeNodeList(*lst);
11531 *lst = NULL;
11532 }
William M. Brackc3f81342004-10-03 01:22:44 +000011533
William M. Brackb7b54de2004-10-06 16:38:01 +000011534 if (doc->dict != NULL)
11535 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011536 xmlFreeParserCtxt(ctxt);
11537
11538 return(ret);
11539#else /* !SAX2 */
11540 return(XML_ERR_INTERNAL_ERROR);
11541#endif
11542}
11543
Daniel Veillard81273902003-09-30 00:43:48 +000011544#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011545/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011546 * xmlParseBalancedChunkMemoryRecover:
11547 * @doc: the document the chunk pertains to
11548 * @sax: the SAX handler bloc (possibly NULL)
11549 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11550 * @depth: Used for loop detection, use 0
11551 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11552 * @lst: the return value for the set of parsed nodes
11553 * @recover: return nodes even if the data is broken (use 0)
11554 *
11555 *
11556 * Parse a well-balanced chunk of an XML document
11557 * called by the parser
11558 * The allowed sequence for the Well Balanced Chunk is the one defined by
11559 * the content production in the XML grammar:
11560 *
11561 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11562 *
11563 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11564 * the parser error code otherwise
11565 *
11566 * In case recover is set to 1, the nodelist will not be empty even if
11567 * the parsed chunk is not well balanced.
11568 */
11569int
11570xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11571 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11572 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011573 xmlParserCtxtPtr ctxt;
11574 xmlDocPtr newDoc;
11575 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011576 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011577 int size;
11578 int ret = 0;
11579
11580 if (depth > 40) {
11581 return(XML_ERR_ENTITY_LOOP);
11582 }
11583
11584
Daniel Veillardcda96922001-08-21 10:56:31 +000011585 if (lst != NULL)
11586 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011587 if (string == NULL)
11588 return(-1);
11589
11590 size = xmlStrlen(string);
11591
11592 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11593 if (ctxt == NULL) return(-1);
11594 ctxt->userData = ctxt;
11595 if (sax != NULL) {
11596 oldsax = ctxt->sax;
11597 ctxt->sax = sax;
11598 if (user_data != NULL)
11599 ctxt->userData = user_data;
11600 }
11601 newDoc = xmlNewDoc(BAD_CAST "1.0");
11602 if (newDoc == NULL) {
11603 xmlFreeParserCtxt(ctxt);
11604 return(-1);
11605 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011606 if ((doc != NULL) && (doc->dict != NULL)) {
11607 xmlDictFree(ctxt->dict);
11608 ctxt->dict = doc->dict;
11609 xmlDictReference(ctxt->dict);
11610 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11611 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11612 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11613 ctxt->dictNames = 1;
11614 } else {
11615 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11616 }
Owen Taylor3473f882001-02-23 17:55:21 +000011617 if (doc != NULL) {
11618 newDoc->intSubset = doc->intSubset;
11619 newDoc->extSubset = doc->extSubset;
11620 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011621 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11622 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011623 if (sax != NULL)
11624 ctxt->sax = oldsax;
11625 xmlFreeParserCtxt(ctxt);
11626 newDoc->intSubset = NULL;
11627 newDoc->extSubset = NULL;
11628 xmlFreeDoc(newDoc);
11629 return(-1);
11630 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011631 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11632 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011633 if (doc == NULL) {
11634 ctxt->myDoc = newDoc;
11635 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011636 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011637 newDoc->children->doc = doc;
11638 }
11639 ctxt->instate = XML_PARSER_CONTENT;
11640 ctxt->depth = depth;
11641
11642 /*
11643 * Doing validity checking on chunk doesn't make sense
11644 */
11645 ctxt->validate = 0;
11646 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011647 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011648
Daniel Veillardb39bc392002-10-26 19:29:51 +000011649 if ( doc != NULL ){
11650 content = doc->children;
11651 doc->children = NULL;
11652 xmlParseContent(ctxt);
11653 doc->children = content;
11654 }
11655 else {
11656 xmlParseContent(ctxt);
11657 }
Owen Taylor3473f882001-02-23 17:55:21 +000011658 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011659 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011660 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011661 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011662 }
11663 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011664 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011665 }
11666
11667 if (!ctxt->wellFormed) {
11668 if (ctxt->errNo == 0)
11669 ret = 1;
11670 else
11671 ret = ctxt->errNo;
11672 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011673 ret = 0;
11674 }
11675
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011676 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11677 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011678
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011679 /*
11680 * Return the newly created nodeset after unlinking it from
11681 * they pseudo parent.
11682 */
11683 cur = newDoc->children->children;
11684 *lst = cur;
11685 while (cur != NULL) {
11686 xmlSetTreeDoc(cur, doc);
11687 cur->parent = NULL;
11688 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011689 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011690 newDoc->children->children = NULL;
11691 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011692
Owen Taylor3473f882001-02-23 17:55:21 +000011693 if (sax != NULL)
11694 ctxt->sax = oldsax;
11695 xmlFreeParserCtxt(ctxt);
11696 newDoc->intSubset = NULL;
11697 newDoc->extSubset = NULL;
11698 xmlFreeDoc(newDoc);
11699
11700 return(ret);
11701}
11702
11703/**
11704 * xmlSAXParseEntity:
11705 * @sax: the SAX handler block
11706 * @filename: the filename
11707 *
11708 * parse an XML external entity out of context and build a tree.
11709 * It use the given SAX function block to handle the parsing callback.
11710 * If sax is NULL, fallback to the default DOM tree building routines.
11711 *
11712 * [78] extParsedEnt ::= TextDecl? content
11713 *
11714 * This correspond to a "Well Balanced" chunk
11715 *
11716 * Returns the resulting document tree
11717 */
11718
11719xmlDocPtr
11720xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11721 xmlDocPtr ret;
11722 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011723
11724 ctxt = xmlCreateFileParserCtxt(filename);
11725 if (ctxt == NULL) {
11726 return(NULL);
11727 }
11728 if (sax != NULL) {
11729 if (ctxt->sax != NULL)
11730 xmlFree(ctxt->sax);
11731 ctxt->sax = sax;
11732 ctxt->userData = NULL;
11733 }
11734
Owen Taylor3473f882001-02-23 17:55:21 +000011735 xmlParseExtParsedEnt(ctxt);
11736
11737 if (ctxt->wellFormed)
11738 ret = ctxt->myDoc;
11739 else {
11740 ret = NULL;
11741 xmlFreeDoc(ctxt->myDoc);
11742 ctxt->myDoc = NULL;
11743 }
11744 if (sax != NULL)
11745 ctxt->sax = NULL;
11746 xmlFreeParserCtxt(ctxt);
11747
11748 return(ret);
11749}
11750
11751/**
11752 * xmlParseEntity:
11753 * @filename: the filename
11754 *
11755 * parse an XML external entity out of context and build a tree.
11756 *
11757 * [78] extParsedEnt ::= TextDecl? content
11758 *
11759 * This correspond to a "Well Balanced" chunk
11760 *
11761 * Returns the resulting document tree
11762 */
11763
11764xmlDocPtr
11765xmlParseEntity(const char *filename) {
11766 return(xmlSAXParseEntity(NULL, filename));
11767}
Daniel Veillard81273902003-09-30 00:43:48 +000011768#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011769
11770/**
11771 * xmlCreateEntityParserCtxt:
11772 * @URL: the entity URL
11773 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011774 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011775 *
11776 * Create a parser context for an external entity
11777 * Automatic support for ZLIB/Compress compressed document is provided
11778 * by default if found at compile-time.
11779 *
11780 * Returns the new parser context or NULL
11781 */
11782xmlParserCtxtPtr
11783xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11784 const xmlChar *base) {
11785 xmlParserCtxtPtr ctxt;
11786 xmlParserInputPtr inputStream;
11787 char *directory = NULL;
11788 xmlChar *uri;
11789
11790 ctxt = xmlNewParserCtxt();
11791 if (ctxt == NULL) {
11792 return(NULL);
11793 }
11794
11795 uri = xmlBuildURI(URL, base);
11796
11797 if (uri == NULL) {
11798 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11799 if (inputStream == NULL) {
11800 xmlFreeParserCtxt(ctxt);
11801 return(NULL);
11802 }
11803
11804 inputPush(ctxt, inputStream);
11805
11806 if ((ctxt->directory == NULL) && (directory == NULL))
11807 directory = xmlParserGetDirectory((char *)URL);
11808 if ((ctxt->directory == NULL) && (directory != NULL))
11809 ctxt->directory = directory;
11810 } else {
11811 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11812 if (inputStream == NULL) {
11813 xmlFree(uri);
11814 xmlFreeParserCtxt(ctxt);
11815 return(NULL);
11816 }
11817
11818 inputPush(ctxt, inputStream);
11819
11820 if ((ctxt->directory == NULL) && (directory == NULL))
11821 directory = xmlParserGetDirectory((char *)uri);
11822 if ((ctxt->directory == NULL) && (directory != NULL))
11823 ctxt->directory = directory;
11824 xmlFree(uri);
11825 }
Owen Taylor3473f882001-02-23 17:55:21 +000011826 return(ctxt);
11827}
11828
11829/************************************************************************
11830 * *
11831 * Front ends when parsing from a file *
11832 * *
11833 ************************************************************************/
11834
11835/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011836 * xmlCreateURLParserCtxt:
11837 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011838 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011839 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011840 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011841 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011842 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011843 *
11844 * Returns the new parser context or NULL
11845 */
11846xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011847xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011848{
11849 xmlParserCtxtPtr ctxt;
11850 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011851 char *directory = NULL;
11852
Owen Taylor3473f882001-02-23 17:55:21 +000011853 ctxt = xmlNewParserCtxt();
11854 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011855 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011856 return(NULL);
11857 }
11858
Daniel Veillarddf292f72005-01-16 19:00:15 +000011859 if (options)
11860 xmlCtxtUseOptions(ctxt, options);
11861 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000011862
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011863 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011864 if (inputStream == NULL) {
11865 xmlFreeParserCtxt(ctxt);
11866 return(NULL);
11867 }
11868
Owen Taylor3473f882001-02-23 17:55:21 +000011869 inputPush(ctxt, inputStream);
11870 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011871 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011872 if ((ctxt->directory == NULL) && (directory != NULL))
11873 ctxt->directory = directory;
11874
11875 return(ctxt);
11876}
11877
Daniel Veillard61b93382003-11-03 14:28:31 +000011878/**
11879 * xmlCreateFileParserCtxt:
11880 * @filename: the filename
11881 *
11882 * Create a parser context for a file content.
11883 * Automatic support for ZLIB/Compress compressed document is provided
11884 * by default if found at compile-time.
11885 *
11886 * Returns the new parser context or NULL
11887 */
11888xmlParserCtxtPtr
11889xmlCreateFileParserCtxt(const char *filename)
11890{
11891 return(xmlCreateURLParserCtxt(filename, 0));
11892}
11893
Daniel Veillard81273902003-09-30 00:43:48 +000011894#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011895/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011896 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011897 * @sax: the SAX handler block
11898 * @filename: the filename
11899 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11900 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011901 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011902 *
11903 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11904 * compressed document is provided by default if found at compile-time.
11905 * It use the given SAX function block to handle the parsing callback.
11906 * If sax is NULL, fallback to the default DOM tree building routines.
11907 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011908 * User data (void *) is stored within the parser context in the
11909 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011910 *
Owen Taylor3473f882001-02-23 17:55:21 +000011911 * Returns the resulting document tree
11912 */
11913
11914xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011915xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11916 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011917 xmlDocPtr ret;
11918 xmlParserCtxtPtr ctxt;
11919 char *directory = NULL;
11920
Daniel Veillard635ef722001-10-29 11:48:19 +000011921 xmlInitParser();
11922
Owen Taylor3473f882001-02-23 17:55:21 +000011923 ctxt = xmlCreateFileParserCtxt(filename);
11924 if (ctxt == NULL) {
11925 return(NULL);
11926 }
11927 if (sax != NULL) {
11928 if (ctxt->sax != NULL)
11929 xmlFree(ctxt->sax);
11930 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011931 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011932 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011933 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011934 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011935 }
Owen Taylor3473f882001-02-23 17:55:21 +000011936
11937 if ((ctxt->directory == NULL) && (directory == NULL))
11938 directory = xmlParserGetDirectory(filename);
11939 if ((ctxt->directory == NULL) && (directory != NULL))
11940 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11941
Daniel Veillarddad3f682002-11-17 16:47:27 +000011942 ctxt->recovery = recovery;
11943
Owen Taylor3473f882001-02-23 17:55:21 +000011944 xmlParseDocument(ctxt);
11945
William M. Brackc07329e2003-09-08 01:57:30 +000011946 if ((ctxt->wellFormed) || recovery) {
11947 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011948 if (ret != NULL) {
11949 if (ctxt->input->buf->compressed > 0)
11950 ret->compression = 9;
11951 else
11952 ret->compression = ctxt->input->buf->compressed;
11953 }
William M. Brackc07329e2003-09-08 01:57:30 +000011954 }
Owen Taylor3473f882001-02-23 17:55:21 +000011955 else {
11956 ret = NULL;
11957 xmlFreeDoc(ctxt->myDoc);
11958 ctxt->myDoc = NULL;
11959 }
11960 if (sax != NULL)
11961 ctxt->sax = NULL;
11962 xmlFreeParserCtxt(ctxt);
11963
11964 return(ret);
11965}
11966
11967/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011968 * xmlSAXParseFile:
11969 * @sax: the SAX handler block
11970 * @filename: the filename
11971 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11972 * documents
11973 *
11974 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11975 * compressed document is provided by default if found at compile-time.
11976 * It use the given SAX function block to handle the parsing callback.
11977 * If sax is NULL, fallback to the default DOM tree building routines.
11978 *
11979 * Returns the resulting document tree
11980 */
11981
11982xmlDocPtr
11983xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11984 int recovery) {
11985 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11986}
11987
11988/**
Owen Taylor3473f882001-02-23 17:55:21 +000011989 * xmlRecoverDoc:
11990 * @cur: a pointer to an array of xmlChar
11991 *
11992 * parse an XML in-memory document and build a tree.
11993 * In the case the document is not Well Formed, a tree is built anyway
11994 *
11995 * Returns the resulting document tree
11996 */
11997
11998xmlDocPtr
11999xmlRecoverDoc(xmlChar *cur) {
12000 return(xmlSAXParseDoc(NULL, cur, 1));
12001}
12002
12003/**
12004 * xmlParseFile:
12005 * @filename: the filename
12006 *
12007 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12008 * compressed document is provided by default if found at compile-time.
12009 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012010 * Returns the resulting document tree if the file was wellformed,
12011 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012012 */
12013
12014xmlDocPtr
12015xmlParseFile(const char *filename) {
12016 return(xmlSAXParseFile(NULL, filename, 0));
12017}
12018
12019/**
12020 * xmlRecoverFile:
12021 * @filename: the filename
12022 *
12023 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12024 * compressed document is provided by default if found at compile-time.
12025 * In the case the document is not Well Formed, a tree is built anyway
12026 *
12027 * Returns the resulting document tree
12028 */
12029
12030xmlDocPtr
12031xmlRecoverFile(const char *filename) {
12032 return(xmlSAXParseFile(NULL, filename, 1));
12033}
12034
12035
12036/**
12037 * xmlSetupParserForBuffer:
12038 * @ctxt: an XML parser context
12039 * @buffer: a xmlChar * buffer
12040 * @filename: a file name
12041 *
12042 * Setup the parser context to parse a new buffer; Clears any prior
12043 * contents from the parser context. The buffer parameter must not be
12044 * NULL, but the filename parameter can be
12045 */
12046void
12047xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12048 const char* filename)
12049{
12050 xmlParserInputPtr input;
12051
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012052 if ((ctxt == NULL) || (buffer == NULL))
12053 return;
12054
Owen Taylor3473f882001-02-23 17:55:21 +000012055 input = xmlNewInputStream(ctxt);
12056 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012057 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012058 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012059 return;
12060 }
12061
12062 xmlClearParserCtxt(ctxt);
12063 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012064 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012065 input->base = buffer;
12066 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012067 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012068 inputPush(ctxt, input);
12069}
12070
12071/**
12072 * xmlSAXUserParseFile:
12073 * @sax: a SAX handler
12074 * @user_data: The user data returned on SAX callbacks
12075 * @filename: a file name
12076 *
12077 * parse an XML file and call the given SAX handler routines.
12078 * Automatic support for ZLIB/Compress compressed document is provided
12079 *
12080 * Returns 0 in case of success or a error number otherwise
12081 */
12082int
12083xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12084 const char *filename) {
12085 int ret = 0;
12086 xmlParserCtxtPtr ctxt;
12087
12088 ctxt = xmlCreateFileParserCtxt(filename);
12089 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012090#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012091 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012092#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012093 xmlFree(ctxt->sax);
12094 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012095 xmlDetectSAX2(ctxt);
12096
Owen Taylor3473f882001-02-23 17:55:21 +000012097 if (user_data != NULL)
12098 ctxt->userData = user_data;
12099
12100 xmlParseDocument(ctxt);
12101
12102 if (ctxt->wellFormed)
12103 ret = 0;
12104 else {
12105 if (ctxt->errNo != 0)
12106 ret = ctxt->errNo;
12107 else
12108 ret = -1;
12109 }
12110 if (sax != NULL)
12111 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012112 if (ctxt->myDoc != NULL) {
12113 xmlFreeDoc(ctxt->myDoc);
12114 ctxt->myDoc = NULL;
12115 }
Owen Taylor3473f882001-02-23 17:55:21 +000012116 xmlFreeParserCtxt(ctxt);
12117
12118 return ret;
12119}
Daniel Veillard81273902003-09-30 00:43:48 +000012120#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012121
12122/************************************************************************
12123 * *
12124 * Front ends when parsing from memory *
12125 * *
12126 ************************************************************************/
12127
12128/**
12129 * xmlCreateMemoryParserCtxt:
12130 * @buffer: a pointer to a char array
12131 * @size: the size of the array
12132 *
12133 * Create a parser context for an XML in-memory document.
12134 *
12135 * Returns the new parser context or NULL
12136 */
12137xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012138xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012139 xmlParserCtxtPtr ctxt;
12140 xmlParserInputPtr input;
12141 xmlParserInputBufferPtr buf;
12142
12143 if (buffer == NULL)
12144 return(NULL);
12145 if (size <= 0)
12146 return(NULL);
12147
12148 ctxt = xmlNewParserCtxt();
12149 if (ctxt == NULL)
12150 return(NULL);
12151
Daniel Veillard53350552003-09-18 13:35:51 +000012152 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012153 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012154 if (buf == NULL) {
12155 xmlFreeParserCtxt(ctxt);
12156 return(NULL);
12157 }
Owen Taylor3473f882001-02-23 17:55:21 +000012158
12159 input = xmlNewInputStream(ctxt);
12160 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012161 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012162 xmlFreeParserCtxt(ctxt);
12163 return(NULL);
12164 }
12165
12166 input->filename = NULL;
12167 input->buf = buf;
12168 input->base = input->buf->buffer->content;
12169 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012170 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012171
12172 inputPush(ctxt, input);
12173 return(ctxt);
12174}
12175
Daniel Veillard81273902003-09-30 00:43:48 +000012176#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012177/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012178 * xmlSAXParseMemoryWithData:
12179 * @sax: the SAX handler block
12180 * @buffer: an pointer to a char array
12181 * @size: the size of the array
12182 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12183 * documents
12184 * @data: the userdata
12185 *
12186 * parse an XML in-memory block and use the given SAX function block
12187 * to handle the parsing callback. If sax is NULL, fallback to the default
12188 * DOM tree building routines.
12189 *
12190 * User data (void *) is stored within the parser context in the
12191 * context's _private member, so it is available nearly everywhere in libxml
12192 *
12193 * Returns the resulting document tree
12194 */
12195
12196xmlDocPtr
12197xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12198 int size, int recovery, void *data) {
12199 xmlDocPtr ret;
12200 xmlParserCtxtPtr ctxt;
12201
12202 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12203 if (ctxt == NULL) return(NULL);
12204 if (sax != NULL) {
12205 if (ctxt->sax != NULL)
12206 xmlFree(ctxt->sax);
12207 ctxt->sax = sax;
12208 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012209 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012210 if (data!=NULL) {
12211 ctxt->_private=data;
12212 }
12213
Daniel Veillardadba5f12003-04-04 16:09:01 +000012214 ctxt->recovery = recovery;
12215
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012216 xmlParseDocument(ctxt);
12217
12218 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12219 else {
12220 ret = NULL;
12221 xmlFreeDoc(ctxt->myDoc);
12222 ctxt->myDoc = NULL;
12223 }
12224 if (sax != NULL)
12225 ctxt->sax = NULL;
12226 xmlFreeParserCtxt(ctxt);
12227
12228 return(ret);
12229}
12230
12231/**
Owen Taylor3473f882001-02-23 17:55:21 +000012232 * xmlSAXParseMemory:
12233 * @sax: the SAX handler block
12234 * @buffer: an pointer to a char array
12235 * @size: the size of the array
12236 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12237 * documents
12238 *
12239 * parse an XML in-memory block and use the given SAX function block
12240 * to handle the parsing callback. If sax is NULL, fallback to the default
12241 * DOM tree building routines.
12242 *
12243 * Returns the resulting document tree
12244 */
12245xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012246xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12247 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012248 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012249}
12250
12251/**
12252 * xmlParseMemory:
12253 * @buffer: an pointer to a char array
12254 * @size: the size of the array
12255 *
12256 * parse an XML in-memory block and build a tree.
12257 *
12258 * Returns the resulting document tree
12259 */
12260
Daniel Veillard50822cb2001-07-26 20:05:51 +000012261xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012262 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12263}
12264
12265/**
12266 * xmlRecoverMemory:
12267 * @buffer: an pointer to a char array
12268 * @size: the size of the array
12269 *
12270 * parse an XML in-memory block and build a tree.
12271 * In the case the document is not Well Formed, a tree is built anyway
12272 *
12273 * Returns the resulting document tree
12274 */
12275
Daniel Veillard50822cb2001-07-26 20:05:51 +000012276xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012277 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12278}
12279
12280/**
12281 * xmlSAXUserParseMemory:
12282 * @sax: a SAX handler
12283 * @user_data: The user data returned on SAX callbacks
12284 * @buffer: an in-memory XML document input
12285 * @size: the length of the XML document in bytes
12286 *
12287 * A better SAX parsing routine.
12288 * parse an XML in-memory buffer and call the given SAX handler routines.
12289 *
12290 * Returns 0 in case of success or a error number otherwise
12291 */
12292int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012293 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012294 int ret = 0;
12295 xmlParserCtxtPtr ctxt;
12296 xmlSAXHandlerPtr oldsax = NULL;
12297
Daniel Veillard9e923512002-08-14 08:48:52 +000012298 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012299 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12300 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012301 oldsax = ctxt->sax;
12302 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012303 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012304 if (user_data != NULL)
12305 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012306
12307 xmlParseDocument(ctxt);
12308
12309 if (ctxt->wellFormed)
12310 ret = 0;
12311 else {
12312 if (ctxt->errNo != 0)
12313 ret = ctxt->errNo;
12314 else
12315 ret = -1;
12316 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012317 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012318 if (ctxt->myDoc != NULL) {
12319 xmlFreeDoc(ctxt->myDoc);
12320 ctxt->myDoc = NULL;
12321 }
Owen Taylor3473f882001-02-23 17:55:21 +000012322 xmlFreeParserCtxt(ctxt);
12323
12324 return ret;
12325}
Daniel Veillard81273902003-09-30 00:43:48 +000012326#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012327
12328/**
12329 * xmlCreateDocParserCtxt:
12330 * @cur: a pointer to an array of xmlChar
12331 *
12332 * Creates a parser context for an XML in-memory document.
12333 *
12334 * Returns the new parser context or NULL
12335 */
12336xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012337xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012338 int len;
12339
12340 if (cur == NULL)
12341 return(NULL);
12342 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012343 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012344}
12345
Daniel Veillard81273902003-09-30 00:43:48 +000012346#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012347/**
12348 * xmlSAXParseDoc:
12349 * @sax: the SAX handler block
12350 * @cur: a pointer to an array of xmlChar
12351 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12352 * documents
12353 *
12354 * parse an XML in-memory document and build a tree.
12355 * It use the given SAX function block to handle the parsing callback.
12356 * If sax is NULL, fallback to the default DOM tree building routines.
12357 *
12358 * Returns the resulting document tree
12359 */
12360
12361xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012362xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012363 xmlDocPtr ret;
12364 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012365 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012366
Daniel Veillard38936062004-11-04 17:45:11 +000012367 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012368
12369
12370 ctxt = xmlCreateDocParserCtxt(cur);
12371 if (ctxt == NULL) return(NULL);
12372 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012373 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012374 ctxt->sax = sax;
12375 ctxt->userData = NULL;
12376 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012377 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012378
12379 xmlParseDocument(ctxt);
12380 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12381 else {
12382 ret = NULL;
12383 xmlFreeDoc(ctxt->myDoc);
12384 ctxt->myDoc = NULL;
12385 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012386 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012387 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012388 xmlFreeParserCtxt(ctxt);
12389
12390 return(ret);
12391}
12392
12393/**
12394 * xmlParseDoc:
12395 * @cur: a pointer to an array of xmlChar
12396 *
12397 * parse an XML in-memory document and build a tree.
12398 *
12399 * Returns the resulting document tree
12400 */
12401
12402xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012403xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012404 return(xmlSAXParseDoc(NULL, cur, 0));
12405}
Daniel Veillard81273902003-09-30 00:43:48 +000012406#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012407
Daniel Veillard81273902003-09-30 00:43:48 +000012408#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012409/************************************************************************
12410 * *
12411 * Specific function to keep track of entities references *
12412 * and used by the XSLT debugger *
12413 * *
12414 ************************************************************************/
12415
12416static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12417
12418/**
12419 * xmlAddEntityReference:
12420 * @ent : A valid entity
12421 * @firstNode : A valid first node for children of entity
12422 * @lastNode : A valid last node of children entity
12423 *
12424 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12425 */
12426static void
12427xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12428 xmlNodePtr lastNode)
12429{
12430 if (xmlEntityRefFunc != NULL) {
12431 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12432 }
12433}
12434
12435
12436/**
12437 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012438 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012439 *
12440 * Set the function to call call back when a xml reference has been made
12441 */
12442void
12443xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12444{
12445 xmlEntityRefFunc = func;
12446}
Daniel Veillard81273902003-09-30 00:43:48 +000012447#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012448
12449/************************************************************************
12450 * *
12451 * Miscellaneous *
12452 * *
12453 ************************************************************************/
12454
12455#ifdef LIBXML_XPATH_ENABLED
12456#include <libxml/xpath.h>
12457#endif
12458
Daniel Veillardffa3c742005-07-21 13:24:09 +000012459extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012460static int xmlParserInitialized = 0;
12461
12462/**
12463 * xmlInitParser:
12464 *
12465 * Initialization function for the XML parser.
12466 * This is not reentrant. Call once before processing in case of
12467 * use in multithreaded programs.
12468 */
12469
12470void
12471xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012472 if (xmlParserInitialized != 0)
12473 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012474
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012475 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12476 (xmlGenericError == NULL))
12477 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012478 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012479 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012480 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012481 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012482 xmlDefaultSAXHandlerInit();
12483 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012484#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012485 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012486#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012487#ifdef LIBXML_HTML_ENABLED
12488 htmlInitAutoClose();
12489 htmlDefaultSAXHandlerInit();
12490#endif
12491#ifdef LIBXML_XPATH_ENABLED
12492 xmlXPathInit();
12493#endif
12494 xmlParserInitialized = 1;
12495}
12496
12497/**
12498 * xmlCleanupParser:
12499 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012500 * Cleanup function for the XML library. It tries to reclaim all
12501 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012502 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012503 * function should not prevent reusing the library but one should
12504 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012505 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012506 */
12507
12508void
12509xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012510 if (!xmlParserInitialized)
12511 return;
12512
Owen Taylor3473f882001-02-23 17:55:21 +000012513 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012514#ifdef LIBXML_CATALOG_ENABLED
12515 xmlCatalogCleanup();
12516#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012517 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012518 xmlCleanupInputCallbacks();
12519#ifdef LIBXML_OUTPUT_ENABLED
12520 xmlCleanupOutputCallbacks();
12521#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012522#ifdef LIBXML_SCHEMAS_ENABLED
12523 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012524 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012525#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012526 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012527 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012528 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012529 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012530 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012531}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012532
12533/************************************************************************
12534 * *
12535 * New set (2.6.0) of simpler and more flexible APIs *
12536 * *
12537 ************************************************************************/
12538
12539/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012540 * DICT_FREE:
12541 * @str: a string
12542 *
12543 * Free a string if it is not owned by the "dict" dictionnary in the
12544 * current scope
12545 */
12546#define DICT_FREE(str) \
12547 if ((str) && ((!dict) || \
12548 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12549 xmlFree((char *)(str));
12550
12551/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012552 * xmlCtxtReset:
12553 * @ctxt: an XML parser context
12554 *
12555 * Reset a parser context
12556 */
12557void
12558xmlCtxtReset(xmlParserCtxtPtr ctxt)
12559{
12560 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012561 xmlDictPtr dict;
12562
12563 if (ctxt == NULL)
12564 return;
12565
12566 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012567
12568 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12569 xmlFreeInputStream(input);
12570 }
12571 ctxt->inputNr = 0;
12572 ctxt->input = NULL;
12573
12574 ctxt->spaceNr = 0;
12575 ctxt->spaceTab[0] = -1;
12576 ctxt->space = &ctxt->spaceTab[0];
12577
12578
12579 ctxt->nodeNr = 0;
12580 ctxt->node = NULL;
12581
12582 ctxt->nameNr = 0;
12583 ctxt->name = NULL;
12584
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012585 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012586 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012587 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012588 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012589 DICT_FREE(ctxt->directory);
12590 ctxt->directory = NULL;
12591 DICT_FREE(ctxt->extSubURI);
12592 ctxt->extSubURI = NULL;
12593 DICT_FREE(ctxt->extSubSystem);
12594 ctxt->extSubSystem = NULL;
12595 if (ctxt->myDoc != NULL)
12596 xmlFreeDoc(ctxt->myDoc);
12597 ctxt->myDoc = NULL;
12598
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012599 ctxt->standalone = -1;
12600 ctxt->hasExternalSubset = 0;
12601 ctxt->hasPErefs = 0;
12602 ctxt->html = 0;
12603 ctxt->external = 0;
12604 ctxt->instate = XML_PARSER_START;
12605 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012606
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012607 ctxt->wellFormed = 1;
12608 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012609 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012610 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012611#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012612 ctxt->vctxt.userData = ctxt;
12613 ctxt->vctxt.error = xmlParserValidityError;
12614 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012615#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012616 ctxt->record_info = 0;
12617 ctxt->nbChars = 0;
12618 ctxt->checkIndex = 0;
12619 ctxt->inSubset = 0;
12620 ctxt->errNo = XML_ERR_OK;
12621 ctxt->depth = 0;
12622 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12623 ctxt->catalogs = NULL;
12624 xmlInitNodeInfoSeq(&ctxt->node_seq);
12625
12626 if (ctxt->attsDefault != NULL) {
12627 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12628 ctxt->attsDefault = NULL;
12629 }
12630 if (ctxt->attsSpecial != NULL) {
12631 xmlHashFree(ctxt->attsSpecial, NULL);
12632 ctxt->attsSpecial = NULL;
12633 }
12634
Daniel Veillard4432df22003-09-28 18:58:27 +000012635#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012636 if (ctxt->catalogs != NULL)
12637 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012638#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012639 if (ctxt->lastError.code != XML_ERR_OK)
12640 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012641}
12642
12643/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012644 * xmlCtxtResetPush:
12645 * @ctxt: an XML parser context
12646 * @chunk: a pointer to an array of chars
12647 * @size: number of chars in the array
12648 * @filename: an optional file name or URI
12649 * @encoding: the document encoding, or NULL
12650 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012651 * Reset a push parser context
12652 *
12653 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012654 */
12655int
12656xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12657 int size, const char *filename, const char *encoding)
12658{
12659 xmlParserInputPtr inputStream;
12660 xmlParserInputBufferPtr buf;
12661 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12662
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012663 if (ctxt == NULL)
12664 return(1);
12665
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012666 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12667 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12668
12669 buf = xmlAllocParserInputBuffer(enc);
12670 if (buf == NULL)
12671 return(1);
12672
12673 if (ctxt == NULL) {
12674 xmlFreeParserInputBuffer(buf);
12675 return(1);
12676 }
12677
12678 xmlCtxtReset(ctxt);
12679
12680 if (ctxt->pushTab == NULL) {
12681 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12682 sizeof(xmlChar *));
12683 if (ctxt->pushTab == NULL) {
12684 xmlErrMemory(ctxt, NULL);
12685 xmlFreeParserInputBuffer(buf);
12686 return(1);
12687 }
12688 }
12689
12690 if (filename == NULL) {
12691 ctxt->directory = NULL;
12692 } else {
12693 ctxt->directory = xmlParserGetDirectory(filename);
12694 }
12695
12696 inputStream = xmlNewInputStream(ctxt);
12697 if (inputStream == NULL) {
12698 xmlFreeParserInputBuffer(buf);
12699 return(1);
12700 }
12701
12702 if (filename == NULL)
12703 inputStream->filename = NULL;
12704 else
12705 inputStream->filename = (char *)
12706 xmlCanonicPath((const xmlChar *) filename);
12707 inputStream->buf = buf;
12708 inputStream->base = inputStream->buf->buffer->content;
12709 inputStream->cur = inputStream->buf->buffer->content;
12710 inputStream->end =
12711 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12712
12713 inputPush(ctxt, inputStream);
12714
12715 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12716 (ctxt->input->buf != NULL)) {
12717 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12718 int cur = ctxt->input->cur - ctxt->input->base;
12719
12720 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12721
12722 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12723 ctxt->input->cur = ctxt->input->base + cur;
12724 ctxt->input->end =
12725 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12726 use];
12727#ifdef DEBUG_PUSH
12728 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12729#endif
12730 }
12731
12732 if (encoding != NULL) {
12733 xmlCharEncodingHandlerPtr hdlr;
12734
12735 hdlr = xmlFindCharEncodingHandler(encoding);
12736 if (hdlr != NULL) {
12737 xmlSwitchToEncoding(ctxt, hdlr);
12738 } else {
12739 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12740 "Unsupported encoding %s\n", BAD_CAST encoding);
12741 }
12742 } else if (enc != XML_CHAR_ENCODING_NONE) {
12743 xmlSwitchEncoding(ctxt, enc);
12744 }
12745
12746 return(0);
12747}
12748
12749/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012750 * xmlCtxtUseOptions:
12751 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012752 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012753 *
12754 * Applies the options to the parser context
12755 *
12756 * Returns 0 in case of success, the set of unknown or unimplemented options
12757 * in case of error.
12758 */
12759int
12760xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12761{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012762 if (ctxt == NULL)
12763 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012764 if (options & XML_PARSE_RECOVER) {
12765 ctxt->recovery = 1;
12766 options -= XML_PARSE_RECOVER;
12767 } else
12768 ctxt->recovery = 0;
12769 if (options & XML_PARSE_DTDLOAD) {
12770 ctxt->loadsubset = XML_DETECT_IDS;
12771 options -= XML_PARSE_DTDLOAD;
12772 } else
12773 ctxt->loadsubset = 0;
12774 if (options & XML_PARSE_DTDATTR) {
12775 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12776 options -= XML_PARSE_DTDATTR;
12777 }
12778 if (options & XML_PARSE_NOENT) {
12779 ctxt->replaceEntities = 1;
12780 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12781 options -= XML_PARSE_NOENT;
12782 } else
12783 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012784 if (options & XML_PARSE_PEDANTIC) {
12785 ctxt->pedantic = 1;
12786 options -= XML_PARSE_PEDANTIC;
12787 } else
12788 ctxt->pedantic = 0;
12789 if (options & XML_PARSE_NOBLANKS) {
12790 ctxt->keepBlanks = 0;
12791 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12792 options -= XML_PARSE_NOBLANKS;
12793 } else
12794 ctxt->keepBlanks = 1;
12795 if (options & XML_PARSE_DTDVALID) {
12796 ctxt->validate = 1;
12797 if (options & XML_PARSE_NOWARNING)
12798 ctxt->vctxt.warning = NULL;
12799 if (options & XML_PARSE_NOERROR)
12800 ctxt->vctxt.error = NULL;
12801 options -= XML_PARSE_DTDVALID;
12802 } else
12803 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000012804 if (options & XML_PARSE_NOWARNING) {
12805 ctxt->sax->warning = NULL;
12806 options -= XML_PARSE_NOWARNING;
12807 }
12808 if (options & XML_PARSE_NOERROR) {
12809 ctxt->sax->error = NULL;
12810 ctxt->sax->fatalError = NULL;
12811 options -= XML_PARSE_NOERROR;
12812 }
Daniel Veillard81273902003-09-30 00:43:48 +000012813#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012814 if (options & XML_PARSE_SAX1) {
12815 ctxt->sax->startElement = xmlSAX2StartElement;
12816 ctxt->sax->endElement = xmlSAX2EndElement;
12817 ctxt->sax->startElementNs = NULL;
12818 ctxt->sax->endElementNs = NULL;
12819 ctxt->sax->initialized = 1;
12820 options -= XML_PARSE_SAX1;
12821 }
Daniel Veillard81273902003-09-30 00:43:48 +000012822#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012823 if (options & XML_PARSE_NODICT) {
12824 ctxt->dictNames = 0;
12825 options -= XML_PARSE_NODICT;
12826 } else {
12827 ctxt->dictNames = 1;
12828 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012829 if (options & XML_PARSE_NOCDATA) {
12830 ctxt->sax->cdataBlock = NULL;
12831 options -= XML_PARSE_NOCDATA;
12832 }
12833 if (options & XML_PARSE_NSCLEAN) {
12834 ctxt->options |= XML_PARSE_NSCLEAN;
12835 options -= XML_PARSE_NSCLEAN;
12836 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012837 if (options & XML_PARSE_NONET) {
12838 ctxt->options |= XML_PARSE_NONET;
12839 options -= XML_PARSE_NONET;
12840 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012841 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012842 return (options);
12843}
12844
12845/**
12846 * xmlDoRead:
12847 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012848 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012849 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012850 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012851 * @reuse: keep the context for reuse
12852 *
12853 * Common front-end for the xmlRead functions
12854 *
12855 * Returns the resulting document tree or NULL
12856 */
12857static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012858xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12859 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012860{
12861 xmlDocPtr ret;
12862
12863 xmlCtxtUseOptions(ctxt, options);
12864 if (encoding != NULL) {
12865 xmlCharEncodingHandlerPtr hdlr;
12866
12867 hdlr = xmlFindCharEncodingHandler(encoding);
12868 if (hdlr != NULL)
12869 xmlSwitchToEncoding(ctxt, hdlr);
12870 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012871 if ((URL != NULL) && (ctxt->input != NULL) &&
12872 (ctxt->input->filename == NULL))
12873 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012874 xmlParseDocument(ctxt);
12875 if ((ctxt->wellFormed) || ctxt->recovery)
12876 ret = ctxt->myDoc;
12877 else {
12878 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012879 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012880 xmlFreeDoc(ctxt->myDoc);
12881 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012882 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012883 ctxt->myDoc = NULL;
12884 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012885 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012886 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012887
12888 return (ret);
12889}
12890
12891/**
12892 * xmlReadDoc:
12893 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012894 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012895 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012896 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012897 *
12898 * parse an XML in-memory document and build a tree.
12899 *
12900 * Returns the resulting document tree
12901 */
12902xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012903xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012904{
12905 xmlParserCtxtPtr ctxt;
12906
12907 if (cur == NULL)
12908 return (NULL);
12909
12910 ctxt = xmlCreateDocParserCtxt(cur);
12911 if (ctxt == NULL)
12912 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012913 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012914}
12915
12916/**
12917 * xmlReadFile:
12918 * @filename: a file or URL
12919 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012920 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012921 *
12922 * parse an XML file from the filesystem or the network.
12923 *
12924 * Returns the resulting document tree
12925 */
12926xmlDocPtr
12927xmlReadFile(const char *filename, const char *encoding, int options)
12928{
12929 xmlParserCtxtPtr ctxt;
12930
Daniel Veillard61b93382003-11-03 14:28:31 +000012931 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012932 if (ctxt == NULL)
12933 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012934 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012935}
12936
12937/**
12938 * xmlReadMemory:
12939 * @buffer: a pointer to a char array
12940 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012941 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012942 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012943 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012944 *
12945 * parse an XML in-memory document and build a tree.
12946 *
12947 * Returns the resulting document tree
12948 */
12949xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012950xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012951{
12952 xmlParserCtxtPtr ctxt;
12953
12954 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12955 if (ctxt == NULL)
12956 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012957 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012958}
12959
12960/**
12961 * xmlReadFd:
12962 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012963 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012964 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012965 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012966 *
12967 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012968 * NOTE that the file descriptor will not be closed when the
12969 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012970 *
12971 * Returns the resulting document tree
12972 */
12973xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012974xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012975{
12976 xmlParserCtxtPtr ctxt;
12977 xmlParserInputBufferPtr input;
12978 xmlParserInputPtr stream;
12979
12980 if (fd < 0)
12981 return (NULL);
12982
12983 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12984 if (input == NULL)
12985 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012986 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012987 ctxt = xmlNewParserCtxt();
12988 if (ctxt == NULL) {
12989 xmlFreeParserInputBuffer(input);
12990 return (NULL);
12991 }
12992 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12993 if (stream == NULL) {
12994 xmlFreeParserInputBuffer(input);
12995 xmlFreeParserCtxt(ctxt);
12996 return (NULL);
12997 }
12998 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012999 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013000}
13001
13002/**
13003 * xmlReadIO:
13004 * @ioread: an I/O read function
13005 * @ioclose: an I/O close function
13006 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013007 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013008 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013009 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013010 *
13011 * parse an XML document from I/O functions and source and build a tree.
13012 *
13013 * Returns the resulting document tree
13014 */
13015xmlDocPtr
13016xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013017 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013018{
13019 xmlParserCtxtPtr ctxt;
13020 xmlParserInputBufferPtr input;
13021 xmlParserInputPtr stream;
13022
13023 if (ioread == NULL)
13024 return (NULL);
13025
13026 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13027 XML_CHAR_ENCODING_NONE);
13028 if (input == NULL)
13029 return (NULL);
13030 ctxt = xmlNewParserCtxt();
13031 if (ctxt == NULL) {
13032 xmlFreeParserInputBuffer(input);
13033 return (NULL);
13034 }
13035 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13036 if (stream == NULL) {
13037 xmlFreeParserInputBuffer(input);
13038 xmlFreeParserCtxt(ctxt);
13039 return (NULL);
13040 }
13041 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013042 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013043}
13044
13045/**
13046 * xmlCtxtReadDoc:
13047 * @ctxt: an XML parser context
13048 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013049 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013050 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013051 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013052 *
13053 * parse an XML in-memory document and build a tree.
13054 * This reuses the existing @ctxt parser context
13055 *
13056 * Returns the resulting document tree
13057 */
13058xmlDocPtr
13059xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013060 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013061{
13062 xmlParserInputPtr stream;
13063
13064 if (cur == NULL)
13065 return (NULL);
13066 if (ctxt == NULL)
13067 return (NULL);
13068
13069 xmlCtxtReset(ctxt);
13070
13071 stream = xmlNewStringInputStream(ctxt, cur);
13072 if (stream == NULL) {
13073 return (NULL);
13074 }
13075 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013076 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013077}
13078
13079/**
13080 * xmlCtxtReadFile:
13081 * @ctxt: an XML parser context
13082 * @filename: a file or URL
13083 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013084 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013085 *
13086 * parse an XML file from the filesystem or the network.
13087 * This reuses the existing @ctxt parser context
13088 *
13089 * Returns the resulting document tree
13090 */
13091xmlDocPtr
13092xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13093 const char *encoding, int options)
13094{
13095 xmlParserInputPtr stream;
13096
13097 if (filename == NULL)
13098 return (NULL);
13099 if (ctxt == NULL)
13100 return (NULL);
13101
13102 xmlCtxtReset(ctxt);
13103
Daniel Veillard29614c72004-11-26 10:47:26 +000013104 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013105 if (stream == NULL) {
13106 return (NULL);
13107 }
13108 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013109 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013110}
13111
13112/**
13113 * xmlCtxtReadMemory:
13114 * @ctxt: an XML parser context
13115 * @buffer: a pointer to a char array
13116 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013117 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013118 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013119 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013120 *
13121 * parse an XML in-memory document and build a tree.
13122 * This reuses the existing @ctxt parser context
13123 *
13124 * Returns the resulting document tree
13125 */
13126xmlDocPtr
13127xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013128 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013129{
13130 xmlParserInputBufferPtr input;
13131 xmlParserInputPtr stream;
13132
13133 if (ctxt == NULL)
13134 return (NULL);
13135 if (buffer == NULL)
13136 return (NULL);
13137
13138 xmlCtxtReset(ctxt);
13139
13140 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13141 if (input == NULL) {
13142 return(NULL);
13143 }
13144
13145 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13146 if (stream == NULL) {
13147 xmlFreeParserInputBuffer(input);
13148 return(NULL);
13149 }
13150
13151 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013152 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013153}
13154
13155/**
13156 * xmlCtxtReadFd:
13157 * @ctxt: an XML parser context
13158 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013159 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013160 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013161 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013162 *
13163 * parse an XML from a file descriptor and build a tree.
13164 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013165 * NOTE that the file descriptor will not be closed when the
13166 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013167 *
13168 * Returns the resulting document tree
13169 */
13170xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013171xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13172 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013173{
13174 xmlParserInputBufferPtr input;
13175 xmlParserInputPtr stream;
13176
13177 if (fd < 0)
13178 return (NULL);
13179 if (ctxt == NULL)
13180 return (NULL);
13181
13182 xmlCtxtReset(ctxt);
13183
13184
13185 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13186 if (input == NULL)
13187 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013188 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013189 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13190 if (stream == NULL) {
13191 xmlFreeParserInputBuffer(input);
13192 return (NULL);
13193 }
13194 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013195 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013196}
13197
13198/**
13199 * xmlCtxtReadIO:
13200 * @ctxt: an XML parser context
13201 * @ioread: an I/O read function
13202 * @ioclose: an I/O close function
13203 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013204 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013205 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013206 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013207 *
13208 * parse an XML document from I/O functions and source and build a tree.
13209 * This reuses the existing @ctxt parser context
13210 *
13211 * Returns the resulting document tree
13212 */
13213xmlDocPtr
13214xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13215 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013216 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013217 const char *encoding, int options)
13218{
13219 xmlParserInputBufferPtr input;
13220 xmlParserInputPtr stream;
13221
13222 if (ioread == NULL)
13223 return (NULL);
13224 if (ctxt == NULL)
13225 return (NULL);
13226
13227 xmlCtxtReset(ctxt);
13228
13229 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13230 XML_CHAR_ENCODING_NONE);
13231 if (input == NULL)
13232 return (NULL);
13233 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13234 if (stream == NULL) {
13235 xmlFreeParserInputBuffer(input);
13236 return (NULL);
13237 }
13238 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013239 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013240}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013241
13242#define bottom_parser
13243#include "elfgcchack.h"