blob: 44e6180208f125de0317b86f794bf315d68279e6 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000586 * SAX2 defaulted attributes handling *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlDetectSAX2:
592 * @ctxt: an XML parser context
593 *
594 * Do the SAX2 detection and specific intialization
595 */
596static void
597xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
598 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000599#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000600 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
601 ((ctxt->sax->startElementNs != NULL) ||
602 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000603#else
604 ctxt->sax2 = 1;
605#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000606
607 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
608 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
609 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000610 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
611 (ctxt->str_xml_ns == NULL)) {
612 xmlErrMemory(ctxt, NULL);
613 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000614}
615
Daniel Veillarde57ec792003-09-10 10:50:59 +0000616typedef struct _xmlDefAttrs xmlDefAttrs;
617typedef xmlDefAttrs *xmlDefAttrsPtr;
618struct _xmlDefAttrs {
619 int nbAttrs; /* number of defaulted attributes on that element */
620 int maxAttrs; /* the size of the array */
621 const xmlChar *values[4]; /* array of localname/prefix/values */
622};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000623
624/**
625 * xmlAddDefAttrs:
626 * @ctxt: an XML parser context
627 * @fullname: the element fullname
628 * @fullattr: the attribute fullname
629 * @value: the attribute value
630 *
631 * Add a defaulted attribute for an element
632 */
633static void
634xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
635 const xmlChar *fullname,
636 const xmlChar *fullattr,
637 const xmlChar *value) {
638 xmlDefAttrsPtr defaults;
639 int len;
640 const xmlChar *name;
641 const xmlChar *prefix;
642
643 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000644 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000645 if (ctxt->attsDefault == NULL)
646 goto mem_error;
647 }
648
649 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000650 * split the element name into prefix:localname , the string found
651 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000652 */
653 name = xmlSplitQName3(fullname, &len);
654 if (name == NULL) {
655 name = xmlDictLookup(ctxt->dict, fullname, -1);
656 prefix = NULL;
657 } else {
658 name = xmlDictLookup(ctxt->dict, name, -1);
659 prefix = xmlDictLookup(ctxt->dict, fullname, len);
660 }
661
662 /*
663 * make sure there is some storage
664 */
665 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
666 if (defaults == NULL) {
667 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000668 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000669 if (defaults == NULL)
670 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000672 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000673 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
674 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000675 xmlDefAttrsPtr temp;
676
677 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000678 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000679 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000680 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000681 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000682 defaults->maxAttrs *= 2;
683 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
684 }
685
686 /*
687 * plit the element name into prefix:localname , the string found
688 * are within the DTD and hen not associated to namespace names.
689 */
690 name = xmlSplitQName3(fullattr, &len);
691 if (name == NULL) {
692 name = xmlDictLookup(ctxt->dict, fullattr, -1);
693 prefix = NULL;
694 } else {
695 name = xmlDictLookup(ctxt->dict, name, -1);
696 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
697 }
698
699 defaults->values[4 * defaults->nbAttrs] = name;
700 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
701 /* intern the string and precompute the end */
702 len = xmlStrlen(value);
703 value = xmlDictLookup(ctxt->dict, value, len);
704 defaults->values[4 * defaults->nbAttrs + 2] = value;
705 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
706 defaults->nbAttrs++;
707
708 return;
709
710mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000711 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000712 return;
713}
714
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000715/**
716 * xmlAddSpecialAttr:
717 * @ctxt: an XML parser context
718 * @fullname: the element fullname
719 * @fullattr: the attribute fullname
720 * @type: the attribute type
721 *
722 * Register that this attribute is not CDATA
723 */
724static void
725xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
726 const xmlChar *fullname,
727 const xmlChar *fullattr,
728 int type)
729{
730 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000731 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000732 if (ctxt->attsSpecial == NULL)
733 goto mem_error;
734 }
735
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000736 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
737 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000738 return;
739
740mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000741 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000742 return;
743}
744
Daniel Veillard4432df22003-09-28 18:58:27 +0000745/**
746 * xmlCheckLanguageID:
747 * @lang: pointer to the string value
748 *
749 * Checks that the value conforms to the LanguageID production:
750 *
751 * NOTE: this is somewhat deprecated, those productions were removed from
752 * the XML Second edition.
753 *
754 * [33] LanguageID ::= Langcode ('-' Subcode)*
755 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
756 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
757 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
758 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
759 * [38] Subcode ::= ([a-z] | [A-Z])+
760 *
761 * Returns 1 if correct 0 otherwise
762 **/
763int
764xmlCheckLanguageID(const xmlChar * lang)
765{
766 const xmlChar *cur = lang;
767
768 if (cur == NULL)
769 return (0);
770 if (((cur[0] == 'i') && (cur[1] == '-')) ||
771 ((cur[0] == 'I') && (cur[1] == '-'))) {
772 /*
773 * IANA code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
780 ((cur[0] == 'X') && (cur[1] == '-'))) {
781 /*
782 * User code
783 */
784 cur += 2;
785 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
789 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
790 /*
791 * ISO639
792 */
793 cur++;
794 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
795 ((cur[0] >= 'a') && (cur[0] <= 'z')))
796 cur++;
797 else
798 return (0);
799 } else
800 return (0);
801 while (cur[0] != 0) { /* non input consuming */
802 if (cur[0] != '-')
803 return (0);
804 cur++;
805 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
806 ((cur[0] >= 'a') && (cur[0] <= 'z')))
807 cur++;
808 else
809 return (0);
810 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
811 ((cur[0] >= 'a') && (cur[0] <= 'z')))
812 cur++;
813 }
814 return (1);
815}
816
Owen Taylor3473f882001-02-23 17:55:21 +0000817/************************************************************************
818 * *
819 * Parser stacks related functions and macros *
820 * *
821 ************************************************************************/
822
823xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
824 const xmlChar ** str);
825
Daniel Veillard0fb18932003-09-07 09:14:37 +0000826#ifdef SAX2
827/**
828 * nsPush:
829 * @ctxt: an XML parser context
830 * @prefix: the namespace prefix or NULL
831 * @URL: the namespace name
832 *
833 * Pushes a new parser namespace on top of the ns stack
834 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000835 * Returns -1 in case of error, -2 if the namespace should be discarded
836 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000837 */
838static int
839nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
840{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000841 if (ctxt->options & XML_PARSE_NSCLEAN) {
842 int i;
843 for (i = 0;i < ctxt->nsNr;i += 2) {
844 if (ctxt->nsTab[i] == prefix) {
845 /* in scope */
846 if (ctxt->nsTab[i + 1] == URL)
847 return(-2);
848 /* out of scope keep it */
849 break;
850 }
851 }
852 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000853 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
854 ctxt->nsMax = 10;
855 ctxt->nsNr = 0;
856 ctxt->nsTab = (const xmlChar **)
857 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
858 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000859 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000860 ctxt->nsMax = 0;
861 return (-1);
862 }
863 } else if (ctxt->nsNr >= ctxt->nsMax) {
864 ctxt->nsMax *= 2;
865 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000866 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000867 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
868 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000869 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000870 ctxt->nsMax /= 2;
871 return (-1);
872 }
873 }
874 ctxt->nsTab[ctxt->nsNr++] = prefix;
875 ctxt->nsTab[ctxt->nsNr++] = URL;
876 return (ctxt->nsNr);
877}
878/**
879 * nsPop:
880 * @ctxt: an XML parser context
881 * @nr: the number to pop
882 *
883 * Pops the top @nr parser prefix/namespace from the ns stack
884 *
885 * Returns the number of namespaces removed
886 */
887static int
888nsPop(xmlParserCtxtPtr ctxt, int nr)
889{
890 int i;
891
892 if (ctxt->nsTab == NULL) return(0);
893 if (ctxt->nsNr < nr) {
894 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
895 nr = ctxt->nsNr;
896 }
897 if (ctxt->nsNr <= 0)
898 return (0);
899
900 for (i = 0;i < nr;i++) {
901 ctxt->nsNr--;
902 ctxt->nsTab[ctxt->nsNr] = NULL;
903 }
904 return(nr);
905}
906#endif
907
908static int
909xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
910 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000912 int maxatts;
913
914 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000916 atts = (const xmlChar **)
917 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000919 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
921 if (attallocs == NULL) goto mem_error;
922 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000923 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000924 } else if (nr + 5 > ctxt->maxatts) {
925 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000926 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
927 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000929 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
931 (maxatts / 5) * sizeof(int));
932 if (attallocs == NULL) goto mem_error;
933 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000934 ctxt->maxatts = maxatts;
935 }
936 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000937mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000938 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000939 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000940}
941
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000942/**
943 * inputPush:
944 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000945 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000946 *
947 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000948 *
949 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000950 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000951int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000952inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
953{
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000954 if ((ctxt == NULL) || (value == NULL))
955 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000956 if (ctxt->inputNr >= ctxt->inputMax) {
957 ctxt->inputMax *= 2;
958 ctxt->inputTab =
959 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
960 ctxt->inputMax *
961 sizeof(ctxt->inputTab[0]));
962 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000963 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 return (0);
965 }
966 }
967 ctxt->inputTab[ctxt->inputNr] = value;
968 ctxt->input = value;
969 return (ctxt->inputNr++);
970}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000971/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000972 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000973 * @ctxt: an XML parser context
974 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000975 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000976 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000977 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000978 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000979xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000980inputPop(xmlParserCtxtPtr ctxt)
981{
982 xmlParserInputPtr ret;
983
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000984 if (ctxt == NULL)
985 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000986 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +0000987 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000988 ctxt->inputNr--;
989 if (ctxt->inputNr > 0)
990 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
991 else
992 ctxt->input = NULL;
993 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +0000994 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +0000995 return (ret);
996}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000997/**
998 * nodePush:
999 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001000 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001001 *
1002 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001003 *
1004 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001005 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001006int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001007nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1008{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001009 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001010 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001011 xmlNodePtr *tmp;
1012
1013 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1014 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001015 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001016 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001017 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001018 return (0);
1019 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001020 ctxt->nodeTab = tmp;
1021 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001022 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001023 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001024 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001025 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1026 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001027 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001028 return(0);
1029 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001030 ctxt->nodeTab[ctxt->nodeNr] = value;
1031 ctxt->node = value;
1032 return (ctxt->nodeNr++);
1033}
1034/**
1035 * nodePop:
1036 * @ctxt: an XML parser context
1037 *
1038 * Pops the top element node from the node stack
1039 *
1040 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001041 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001042xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001043nodePop(xmlParserCtxtPtr ctxt)
1044{
1045 xmlNodePtr ret;
1046
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001047 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001048 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001049 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001050 ctxt->nodeNr--;
1051 if (ctxt->nodeNr > 0)
1052 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1053 else
1054 ctxt->node = NULL;
1055 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001056 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001057 return (ret);
1058}
Daniel Veillarda2351322004-06-27 12:08:10 +00001059
1060#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001061/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 * nameNsPush:
1063 * @ctxt: an XML parser context
1064 * @value: the element name
1065 * @prefix: the element prefix
1066 * @URI: the element namespace name
1067 *
1068 * Pushes a new element name/prefix/URL on top of the name stack
1069 *
1070 * Returns -1 in case of error, the index in the stack otherwise
1071 */
1072static int
1073nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1074 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1075{
1076 if (ctxt->nameNr >= ctxt->nameMax) {
1077 const xmlChar * *tmp;
1078 void **tmp2;
1079 ctxt->nameMax *= 2;
1080 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1081 ctxt->nameMax *
1082 sizeof(ctxt->nameTab[0]));
1083 if (tmp == NULL) {
1084 ctxt->nameMax /= 2;
1085 goto mem_error;
1086 }
1087 ctxt->nameTab = tmp;
1088 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1089 ctxt->nameMax * 3 *
1090 sizeof(ctxt->pushTab[0]));
1091 if (tmp2 == NULL) {
1092 ctxt->nameMax /= 2;
1093 goto mem_error;
1094 }
1095 ctxt->pushTab = tmp2;
1096 }
1097 ctxt->nameTab[ctxt->nameNr] = value;
1098 ctxt->name = value;
1099 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1100 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001101 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102 return (ctxt->nameNr++);
1103mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001104 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001105 return (-1);
1106}
1107/**
1108 * nameNsPop:
1109 * @ctxt: an XML parser context
1110 *
1111 * Pops the top element/prefix/URI name from the name stack
1112 *
1113 * Returns the name just removed
1114 */
1115static const xmlChar *
1116nameNsPop(xmlParserCtxtPtr ctxt)
1117{
1118 const xmlChar *ret;
1119
1120 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001121 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122 ctxt->nameNr--;
1123 if (ctxt->nameNr > 0)
1124 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1125 else
1126 ctxt->name = NULL;
1127 ret = ctxt->nameTab[ctxt->nameNr];
1128 ctxt->nameTab[ctxt->nameNr] = NULL;
1129 return (ret);
1130}
Daniel Veillarda2351322004-06-27 12:08:10 +00001131#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001132
1133/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 * namePush:
1135 * @ctxt: an XML parser context
1136 * @value: the element name
1137 *
1138 * Pushes a new element name on top of the name stack
1139 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001141 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001142int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001143namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001144{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001145 if (ctxt == NULL) return (-1);
1146
Daniel Veillard1c732d22002-11-30 11:22:59 +00001147 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001149 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001150 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001151 ctxt->nameMax *
1152 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 if (tmp == NULL) {
1154 ctxt->nameMax /= 2;
1155 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158 }
1159 ctxt->nameTab[ctxt->nameNr] = value;
1160 ctxt->name = value;
1161 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001163 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001164 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001165}
1166/**
1167 * namePop:
1168 * @ctxt: an XML parser context
1169 *
1170 * Pops the top element name from the name stack
1171 *
1172 * Returns the name just removed
1173 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001174const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001175namePop(xmlParserCtxtPtr ctxt)
1176{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001177 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001179 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1180 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 ctxt->nameNr--;
1182 if (ctxt->nameNr > 0)
1183 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1184 else
1185 ctxt->name = NULL;
1186 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001187 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001188 return (ret);
1189}
Owen Taylor3473f882001-02-23 17:55:21 +00001190
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001191static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001192 if (ctxt->spaceNr >= ctxt->spaceMax) {
1193 ctxt->spaceMax *= 2;
1194 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1195 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1196 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001197 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001198 return(0);
1199 }
1200 }
1201 ctxt->spaceTab[ctxt->spaceNr] = val;
1202 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1203 return(ctxt->spaceNr++);
1204}
1205
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001206static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001207 int ret;
1208 if (ctxt->spaceNr <= 0) return(0);
1209 ctxt->spaceNr--;
1210 if (ctxt->spaceNr > 0)
1211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1212 else
1213 ctxt->space = NULL;
1214 ret = ctxt->spaceTab[ctxt->spaceNr];
1215 ctxt->spaceTab[ctxt->spaceNr] = -1;
1216 return(ret);
1217}
1218
1219/*
1220 * Macros for accessing the content. Those should be used only by the parser,
1221 * and not exported.
1222 *
1223 * Dirty macros, i.e. one often need to make assumption on the context to
1224 * use them
1225 *
1226 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1227 * To be used with extreme caution since operations consuming
1228 * characters may move the input buffer to a different location !
1229 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1230 * This should be used internally by the parser
1231 * only to compare to ASCII values otherwise it would break when
1232 * running with UTF-8 encoding.
1233 * RAW same as CUR but in the input buffer, bypass any token
1234 * extraction that may have been done
1235 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1236 * to compare on ASCII based substring.
1237 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001238 * strings without newlines within the parser.
1239 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1240 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001241 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1242 *
1243 * NEXT Skip to the next character, this does the proper decoding
1244 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001245 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001246 * CUR_CHAR(l) returns the current unicode character (int), set l
1247 * to the number of xmlChars used for the encoding [0-5].
1248 * CUR_SCHAR same but operate on a string instead of the context
1249 * COPY_BUF copy the current unicode char to the target buffer, increment
1250 * the index
1251 * GROW, SHRINK handling of input buffers
1252 */
1253
Daniel Veillardfdc91562002-07-01 21:52:03 +00001254#define RAW (*ctxt->input->cur)
1255#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001256#define NXT(val) ctxt->input->cur[(val)]
1257#define CUR_PTR ctxt->input->cur
1258
Daniel Veillarda07050d2003-10-19 14:46:32 +00001259#define CMP4( s, c1, c2, c3, c4 ) \
1260 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1261 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1262#define CMP5( s, c1, c2, c3, c4, c5 ) \
1263 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1264#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1265 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1266#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1267 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1268#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1269 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1270#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1271 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1272 ((unsigned char *) s)[ 8 ] == c9 )
1273#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1274 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1275 ((unsigned char *) s)[ 9 ] == c10 )
1276
Owen Taylor3473f882001-02-23 17:55:21 +00001277#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001278 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001280 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001281 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1282 xmlPopInput(ctxt); \
1283 } while (0)
1284
Daniel Veillard0b787f32004-03-26 17:29:53 +00001285#define SKIPL(val) do { \
1286 int skipl; \
1287 for(skipl=0; skipl<val; skipl++) { \
1288 if (*(ctxt->input->cur) == '\n') { \
1289 ctxt->input->line++; ctxt->input->col = 1; \
1290 } else ctxt->input->col++; \
1291 ctxt->nbChars++; \
1292 ctxt->input->cur++; \
1293 } \
1294 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1295 if ((*ctxt->input->cur == 0) && \
1296 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1297 xmlPopInput(ctxt); \
1298 } while (0)
1299
Daniel Veillarda880b122003-04-21 21:36:41 +00001300#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001301 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1302 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001303 xmlSHRINK (ctxt);
1304
1305static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1306 xmlParserInputShrink(ctxt->input);
1307 if ((*ctxt->input->cur == 0) &&
1308 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1309 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001310 }
Owen Taylor3473f882001-02-23 17:55:21 +00001311
Daniel Veillarda880b122003-04-21 21:36:41 +00001312#define GROW if ((ctxt->progressive == 0) && \
1313 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001314 xmlGROW (ctxt);
1315
1316static void xmlGROW (xmlParserCtxtPtr ctxt) {
1317 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1318 if ((*ctxt->input->cur == 0) &&
1319 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1320 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001321}
Owen Taylor3473f882001-02-23 17:55:21 +00001322
1323#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1324
1325#define NEXT xmlNextChar(ctxt)
1326
Daniel Veillard21a0f912001-02-25 19:54:14 +00001327#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001328 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001329 ctxt->input->cur++; \
1330 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001331 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001332 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1333 }
1334
Owen Taylor3473f882001-02-23 17:55:21 +00001335#define NEXTL(l) do { \
1336 if (*(ctxt->input->cur) == '\n') { \
1337 ctxt->input->line++; ctxt->input->col = 1; \
1338 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001339 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001340 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001341 } while (0)
1342
1343#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1344#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1345
1346#define COPY_BUF(l,b,i,v) \
1347 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001348 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001349
1350/**
1351 * xmlSkipBlankChars:
1352 * @ctxt: the XML parser context
1353 *
1354 * skip all blanks character found at that point in the input streams.
1355 * It pops up finished entities in the process if allowable at that point.
1356 *
1357 * Returns the number of space chars skipped
1358 */
1359
1360int
1361xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001362 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001363
1364 /*
1365 * It's Okay to use CUR/NEXT here since all the blanks are on
1366 * the ASCII range.
1367 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001368 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1369 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001370 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001371 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001372 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001373 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001374 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001375 if (*cur == '\n') {
1376 ctxt->input->line++; ctxt->input->col = 1;
1377 }
1378 cur++;
1379 res++;
1380 if (*cur == 0) {
1381 ctxt->input->cur = cur;
1382 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1383 cur = ctxt->input->cur;
1384 }
1385 }
1386 ctxt->input->cur = cur;
1387 } else {
1388 int cur;
1389 do {
1390 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001391 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001392 NEXT;
1393 cur = CUR;
1394 res++;
1395 }
1396 while ((cur == 0) && (ctxt->inputNr > 1) &&
1397 (ctxt->instate != XML_PARSER_COMMENT)) {
1398 xmlPopInput(ctxt);
1399 cur = CUR;
1400 }
1401 /*
1402 * Need to handle support of entities branching here
1403 */
1404 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1405 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1406 }
Owen Taylor3473f882001-02-23 17:55:21 +00001407 return(res);
1408}
1409
1410/************************************************************************
1411 * *
1412 * Commodity functions to handle entities *
1413 * *
1414 ************************************************************************/
1415
1416/**
1417 * xmlPopInput:
1418 * @ctxt: an XML parser context
1419 *
1420 * xmlPopInput: the current input pointed by ctxt->input came to an end
1421 * pop it and return the next char.
1422 *
1423 * Returns the current xmlChar in the parser context
1424 */
1425xmlChar
1426xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001427 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001428 if (xmlParserDebugEntities)
1429 xmlGenericError(xmlGenericErrorContext,
1430 "Popping input %d\n", ctxt->inputNr);
1431 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001432 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001433 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1434 return(xmlPopInput(ctxt));
1435 return(CUR);
1436}
1437
1438/**
1439 * xmlPushInput:
1440 * @ctxt: an XML parser context
1441 * @input: an XML parser input fragment (entity, XML fragment ...).
1442 *
1443 * xmlPushInput: switch to a new input stream which is stacked on top
1444 * of the previous one(s).
1445 */
1446void
1447xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1448 if (input == NULL) return;
1449
1450 if (xmlParserDebugEntities) {
1451 if ((ctxt->input != NULL) && (ctxt->input->filename))
1452 xmlGenericError(xmlGenericErrorContext,
1453 "%s(%d): ", ctxt->input->filename,
1454 ctxt->input->line);
1455 xmlGenericError(xmlGenericErrorContext,
1456 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1457 }
1458 inputPush(ctxt, input);
1459 GROW;
1460}
1461
1462/**
1463 * xmlParseCharRef:
1464 * @ctxt: an XML parser context
1465 *
1466 * parse Reference declarations
1467 *
1468 * [66] CharRef ::= '&#' [0-9]+ ';' |
1469 * '&#x' [0-9a-fA-F]+ ';'
1470 *
1471 * [ WFC: Legal Character ]
1472 * Characters referred to using character references must match the
1473 * production for Char.
1474 *
1475 * Returns the value parsed (as an int), 0 in case of error
1476 */
1477int
1478xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001479 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001480 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001481 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001482
Owen Taylor3473f882001-02-23 17:55:21 +00001483 /*
1484 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1485 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (NXT(2) == 'x')) {
1488 SKIP(3);
1489 GROW;
1490 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001491 if (count++ > 20) {
1492 count = 0;
1493 GROW;
1494 }
1495 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001496 val = val * 16 + (CUR - '0');
1497 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1498 val = val * 16 + (CUR - 'a') + 10;
1499 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1500 val = val * 16 + (CUR - 'A') + 10;
1501 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 val = 0;
1504 break;
1505 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001506 if (val > 0x10FFFF)
1507 outofrange = val;
1508
Owen Taylor3473f882001-02-23 17:55:21 +00001509 NEXT;
1510 count++;
1511 }
1512 if (RAW == ';') {
1513 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001514 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001515 ctxt->nbChars ++;
1516 ctxt->input->cur++;
1517 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001519 SKIP(2);
1520 GROW;
1521 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001522 if (count++ > 20) {
1523 count = 0;
1524 GROW;
1525 }
1526 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001527 val = val * 10 + (CUR - '0');
1528 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001529 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001530 val = 0;
1531 break;
1532 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001533 if (val > 0x10FFFF)
1534 outofrange = val;
1535
Owen Taylor3473f882001-02-23 17:55:21 +00001536 NEXT;
1537 count++;
1538 }
1539 if (RAW == ';') {
1540 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001541 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001542 ctxt->nbChars ++;
1543 ctxt->input->cur++;
1544 }
1545 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001546 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001547 }
1548
1549 /*
1550 * [ WFC: Legal Character ]
1551 * Characters referred to using character references must match the
1552 * production for Char.
1553 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001554 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001555 return(val);
1556 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001557 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1558 "xmlParseCharRef: invalid xmlChar value %d\n",
1559 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001560 }
1561 return(0);
1562}
1563
1564/**
1565 * xmlParseStringCharRef:
1566 * @ctxt: an XML parser context
1567 * @str: a pointer to an index in the string
1568 *
1569 * parse Reference declarations, variant parsing from a string rather
1570 * than an an input flow.
1571 *
1572 * [66] CharRef ::= '&#' [0-9]+ ';' |
1573 * '&#x' [0-9a-fA-F]+ ';'
1574 *
1575 * [ WFC: Legal Character ]
1576 * Characters referred to using character references must match the
1577 * production for Char.
1578 *
1579 * Returns the value parsed (as an int), 0 in case of error, str will be
1580 * updated to the current value of the index
1581 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001582static int
Owen Taylor3473f882001-02-23 17:55:21 +00001583xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1584 const xmlChar *ptr;
1585 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001586 unsigned int val = 0;
1587 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001588
1589 if ((str == NULL) || (*str == NULL)) return(0);
1590 ptr = *str;
1591 cur = *ptr;
1592 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1593 ptr += 3;
1594 cur = *ptr;
1595 while (cur != ';') { /* Non input consuming loop */
1596 if ((cur >= '0') && (cur <= '9'))
1597 val = val * 16 + (cur - '0');
1598 else if ((cur >= 'a') && (cur <= 'f'))
1599 val = val * 16 + (cur - 'a') + 10;
1600 else if ((cur >= 'A') && (cur <= 'F'))
1601 val = val * 16 + (cur - 'A') + 10;
1602 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001603 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001604 val = 0;
1605 break;
1606 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001607 if (val > 0x10FFFF)
1608 outofrange = val;
1609
Owen Taylor3473f882001-02-23 17:55:21 +00001610 ptr++;
1611 cur = *ptr;
1612 }
1613 if (cur == ';')
1614 ptr++;
1615 } else if ((cur == '&') && (ptr[1] == '#')){
1616 ptr += 2;
1617 cur = *ptr;
1618 while (cur != ';') { /* Non input consuming loops */
1619 if ((cur >= '0') && (cur <= '9'))
1620 val = val * 10 + (cur - '0');
1621 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001622 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001623 val = 0;
1624 break;
1625 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001626 if (val > 0x10FFFF)
1627 outofrange = val;
1628
Owen Taylor3473f882001-02-23 17:55:21 +00001629 ptr++;
1630 cur = *ptr;
1631 }
1632 if (cur == ';')
1633 ptr++;
1634 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001635 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001636 return(0);
1637 }
1638 *str = ptr;
1639
1640 /*
1641 * [ WFC: Legal Character ]
1642 * Characters referred to using character references must match the
1643 * production for Char.
1644 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001645 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001646 return(val);
1647 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001648 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1649 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1650 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001651 }
1652 return(0);
1653}
1654
1655/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001656 * xmlNewBlanksWrapperInputStream:
1657 * @ctxt: an XML parser context
1658 * @entity: an Entity pointer
1659 *
1660 * Create a new input stream for wrapping
1661 * blanks around a PEReference
1662 *
1663 * Returns the new input stream or NULL
1664 */
1665
1666static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1667
Daniel Veillardf4862f02002-09-10 11:13:43 +00001668static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001669xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1670 xmlParserInputPtr input;
1671 xmlChar *buffer;
1672 size_t length;
1673 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001674 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1675 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001676 return(NULL);
1677 }
1678 if (xmlParserDebugEntities)
1679 xmlGenericError(xmlGenericErrorContext,
1680 "new blanks wrapper for entity: %s\n", entity->name);
1681 input = xmlNewInputStream(ctxt);
1682 if (input == NULL) {
1683 return(NULL);
1684 }
1685 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001686 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001687 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001688 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001689 return(NULL);
1690 }
1691 buffer [0] = ' ';
1692 buffer [1] = '%';
1693 buffer [length-3] = ';';
1694 buffer [length-2] = ' ';
1695 buffer [length-1] = 0;
1696 memcpy(buffer + 2, entity->name, length - 5);
1697 input->free = deallocblankswrapper;
1698 input->base = buffer;
1699 input->cur = buffer;
1700 input->length = length;
1701 input->end = &buffer[length];
1702 return(input);
1703}
1704
1705/**
Owen Taylor3473f882001-02-23 17:55:21 +00001706 * xmlParserHandlePEReference:
1707 * @ctxt: the parser context
1708 *
1709 * [69] PEReference ::= '%' Name ';'
1710 *
1711 * [ WFC: No Recursion ]
1712 * A parsed entity must not contain a recursive
1713 * reference to itself, either directly or indirectly.
1714 *
1715 * [ WFC: Entity Declared ]
1716 * In a document without any DTD, a document with only an internal DTD
1717 * subset which contains no parameter entity references, or a document
1718 * with "standalone='yes'", ... ... The declaration of a parameter
1719 * entity must precede any reference to it...
1720 *
1721 * [ VC: Entity Declared ]
1722 * In a document with an external subset or external parameter entities
1723 * with "standalone='no'", ... ... The declaration of a parameter entity
1724 * must precede any reference to it...
1725 *
1726 * [ WFC: In DTD ]
1727 * Parameter-entity references may only appear in the DTD.
1728 * NOTE: misleading but this is handled.
1729 *
1730 * A PEReference may have been detected in the current input stream
1731 * the handling is done accordingly to
1732 * http://www.w3.org/TR/REC-xml#entproc
1733 * i.e.
1734 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001735 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001736 */
1737void
1738xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001739 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001740 xmlEntityPtr entity = NULL;
1741 xmlParserInputPtr input;
1742
Owen Taylor3473f882001-02-23 17:55:21 +00001743 if (RAW != '%') return;
1744 switch(ctxt->instate) {
1745 case XML_PARSER_CDATA_SECTION:
1746 return;
1747 case XML_PARSER_COMMENT:
1748 return;
1749 case XML_PARSER_START_TAG:
1750 return;
1751 case XML_PARSER_END_TAG:
1752 return;
1753 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001754 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001755 return;
1756 case XML_PARSER_PROLOG:
1757 case XML_PARSER_START:
1758 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001759 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001760 return;
1761 case XML_PARSER_ENTITY_DECL:
1762 case XML_PARSER_CONTENT:
1763 case XML_PARSER_ATTRIBUTE_VALUE:
1764 case XML_PARSER_PI:
1765 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001766 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001767 /* we just ignore it there */
1768 return;
1769 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001770 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001771 return;
1772 case XML_PARSER_ENTITY_VALUE:
1773 /*
1774 * NOTE: in the case of entity values, we don't do the
1775 * substitution here since we need the literal
1776 * entity value to be able to save the internal
1777 * subset of the document.
1778 * This will be handled by xmlStringDecodeEntities
1779 */
1780 return;
1781 case XML_PARSER_DTD:
1782 /*
1783 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1784 * In the internal DTD subset, parameter-entity references
1785 * can occur only where markup declarations can occur, not
1786 * within markup declarations.
1787 * In that case this is handled in xmlParseMarkupDecl
1788 */
1789 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1790 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001791 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001792 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001793 break;
1794 case XML_PARSER_IGNORE:
1795 return;
1796 }
1797
1798 NEXT;
1799 name = xmlParseName(ctxt);
1800 if (xmlParserDebugEntities)
1801 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001802 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001803 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001804 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001805 } else {
1806 if (RAW == ';') {
1807 NEXT;
1808 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1809 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1810 if (entity == NULL) {
1811
1812 /*
1813 * [ WFC: Entity Declared ]
1814 * In a document without any DTD, a document with only an
1815 * internal DTD subset which contains no parameter entity
1816 * references, or a document with "standalone='yes'", ...
1817 * ... The declaration of a parameter entity must precede
1818 * any reference to it...
1819 */
1820 if ((ctxt->standalone == 1) ||
1821 ((ctxt->hasExternalSubset == 0) &&
1822 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001823 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001824 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001825 } else {
1826 /*
1827 * [ VC: Entity Declared ]
1828 * In a document with an external subset or external
1829 * parameter entities with "standalone='no'", ...
1830 * ... The declaration of a parameter entity must precede
1831 * any reference to it...
1832 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001833 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1834 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1835 "PEReference: %%%s; not found\n",
1836 name);
1837 } else
1838 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1839 "PEReference: %%%s; not found\n",
1840 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001841 ctxt->valid = 0;
1842 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001843 } else if (ctxt->input->free != deallocblankswrapper) {
1844 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1845 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001846 } else {
1847 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1848 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001849 xmlChar start[4];
1850 xmlCharEncoding enc;
1851
Owen Taylor3473f882001-02-23 17:55:21 +00001852 /*
1853 * handle the extra spaces added before and after
1854 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001855 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001856 */
1857 input = xmlNewEntityInputStream(ctxt, entity);
1858 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001859
1860 /*
1861 * Get the 4 first bytes and decode the charset
1862 * if enc != XML_CHAR_ENCODING_NONE
1863 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001864 * Note that, since we may have some non-UTF8
1865 * encoding (like UTF16, bug 135229), the 'length'
1866 * is not known, but we can calculate based upon
1867 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001868 */
1869 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001870 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001871 start[0] = RAW;
1872 start[1] = NXT(1);
1873 start[2] = NXT(2);
1874 start[3] = NXT(3);
1875 enc = xmlDetectCharEncoding(start, 4);
1876 if (enc != XML_CHAR_ENCODING_NONE) {
1877 xmlSwitchEncoding(ctxt, enc);
1878 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001879 }
1880
Owen Taylor3473f882001-02-23 17:55:21 +00001881 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001882 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1883 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 xmlParseTextDecl(ctxt);
1885 }
Owen Taylor3473f882001-02-23 17:55:21 +00001886 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001887 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1888 "PEReference: %s is not a parameter entity\n",
1889 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001890 }
1891 }
1892 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001893 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001894 }
Owen Taylor3473f882001-02-23 17:55:21 +00001895 }
1896}
1897
1898/*
1899 * Macro used to grow the current buffer.
1900 */
1901#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001902 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001903 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001904 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001905 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001906 if (tmp == NULL) goto mem_error; \
1907 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001908}
1909
1910/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001911 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * @ctxt: the parser context
1913 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001914 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001915 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1916 * @end: an end marker xmlChar, 0 if none
1917 * @end2: an end marker xmlChar, 0 if none
1918 * @end3: an end marker xmlChar, 0 if none
1919 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001920 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001921 *
1922 * [67] Reference ::= EntityRef | CharRef
1923 *
1924 * [69] PEReference ::= '%' Name ';'
1925 *
1926 * Returns A newly allocated string with the substitution done. The caller
1927 * must deallocate it !
1928 */
1929xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001930xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1931 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 xmlChar *buffer = NULL;
1933 int buffer_size = 0;
1934
1935 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001936 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001937 xmlEntityPtr ent;
1938 int c,l;
1939 int nbchars = 0;
1940
Daniel Veillarda82b1822004-11-08 16:24:57 +00001941 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001942 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001943 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001944
1945 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001946 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001947 return(NULL);
1948 }
1949
1950 /*
1951 * allocate a translation buffer.
1952 */
1953 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001954 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001955 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001956
1957 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001958 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001959 * we are operating on already parsed values.
1960 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001961 if (str < last)
1962 c = CUR_SCHAR(str, l);
1963 else
1964 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001965 while ((c != 0) && (c != end) && /* non input consuming loop */
1966 (c != end2) && (c != end3)) {
1967
1968 if (c == 0) break;
1969 if ((c == '&') && (str[1] == '#')) {
1970 int val = xmlParseStringCharRef(ctxt, &str);
1971 if (val != 0) {
1972 COPY_BUF(0,buffer,nbchars,val);
1973 }
1974 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1975 if (xmlParserDebugEntities)
1976 xmlGenericError(xmlGenericErrorContext,
1977 "String decoding Entity Reference: %.30s\n",
1978 str);
1979 ent = xmlParseStringEntityRef(ctxt, &str);
1980 if ((ent != NULL) &&
1981 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1982 if (ent->content != NULL) {
1983 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1984 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001985 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1986 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001987 }
1988 } else if ((ent != NULL) && (ent->content != NULL)) {
1989 xmlChar *rep;
1990
1991 ctxt->depth++;
1992 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1993 0, 0, 0);
1994 ctxt->depth--;
1995 if (rep != NULL) {
1996 current = rep;
1997 while (*current != 0) { /* non input consuming loop */
1998 buffer[nbchars++] = *current++;
1999 if (nbchars >
2000 buffer_size - XML_PARSER_BUFFER_SIZE) {
2001 growBuffer(buffer);
2002 }
2003 }
2004 xmlFree(rep);
2005 }
2006 } else if (ent != NULL) {
2007 int i = xmlStrlen(ent->name);
2008 const xmlChar *cur = ent->name;
2009
2010 buffer[nbchars++] = '&';
2011 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2012 growBuffer(buffer);
2013 }
2014 for (;i > 0;i--)
2015 buffer[nbchars++] = *cur++;
2016 buffer[nbchars++] = ';';
2017 }
2018 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2019 if (xmlParserDebugEntities)
2020 xmlGenericError(xmlGenericErrorContext,
2021 "String decoding PE Reference: %.30s\n", str);
2022 ent = xmlParseStringPEReference(ctxt, &str);
2023 if (ent != NULL) {
2024 xmlChar *rep;
2025
2026 ctxt->depth++;
2027 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2028 0, 0, 0);
2029 ctxt->depth--;
2030 if (rep != NULL) {
2031 current = rep;
2032 while (*current != 0) { /* non input consuming loop */
2033 buffer[nbchars++] = *current++;
2034 if (nbchars >
2035 buffer_size - XML_PARSER_BUFFER_SIZE) {
2036 growBuffer(buffer);
2037 }
2038 }
2039 xmlFree(rep);
2040 }
2041 }
2042 } else {
2043 COPY_BUF(l,buffer,nbchars,c);
2044 str += l;
2045 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2046 growBuffer(buffer);
2047 }
2048 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002049 if (str < last)
2050 c = CUR_SCHAR(str, l);
2051 else
2052 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002053 }
2054 buffer[nbchars++] = 0;
2055 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002056
2057mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002058 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002059 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002060}
2061
Daniel Veillarde57ec792003-09-10 10:50:59 +00002062/**
2063 * xmlStringDecodeEntities:
2064 * @ctxt: the parser context
2065 * @str: the input string
2066 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2067 * @end: an end marker xmlChar, 0 if none
2068 * @end2: an end marker xmlChar, 0 if none
2069 * @end3: an end marker xmlChar, 0 if none
2070 *
2071 * Takes a entity string content and process to do the adequate substitutions.
2072 *
2073 * [67] Reference ::= EntityRef | CharRef
2074 *
2075 * [69] PEReference ::= '%' Name ';'
2076 *
2077 * Returns A newly allocated string with the substitution done. The caller
2078 * must deallocate it !
2079 */
2080xmlChar *
2081xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2082 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002083 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002084 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2085 end, end2, end3));
2086}
Owen Taylor3473f882001-02-23 17:55:21 +00002087
2088/************************************************************************
2089 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002090 * Commodity functions, cleanup needed ? *
2091 * *
2092 ************************************************************************/
2093
2094/**
2095 * areBlanks:
2096 * @ctxt: an XML parser context
2097 * @str: a xmlChar *
2098 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002099 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002100 *
2101 * Is this a sequence of blank chars that one can ignore ?
2102 *
2103 * Returns 1 if ignorable 0 otherwise.
2104 */
2105
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002106static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2107 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002108 int i, ret;
2109 xmlNodePtr lastChild;
2110
Daniel Veillard05c13a22001-09-09 08:38:09 +00002111 /*
2112 * Don't spend time trying to differentiate them, the same callback is
2113 * used !
2114 */
2115 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002116 return(0);
2117
Owen Taylor3473f882001-02-23 17:55:21 +00002118 /*
2119 * Check for xml:space value.
2120 */
2121 if (*(ctxt->space) == 1)
2122 return(0);
2123
2124 /*
2125 * Check that the string is made of blanks
2126 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002127 if (blank_chars == 0) {
2128 for (i = 0;i < len;i++)
2129 if (!(IS_BLANK_CH(str[i]))) return(0);
2130 }
Owen Taylor3473f882001-02-23 17:55:21 +00002131
2132 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002133 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002134 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002135 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002136 if (ctxt->myDoc != NULL) {
2137 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2138 if (ret == 0) return(1);
2139 if (ret == 1) return(0);
2140 }
2141
2142 /*
2143 * Otherwise, heuristic :-\
2144 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002145 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002146 if ((ctxt->node->children == NULL) &&
2147 (RAW == '<') && (NXT(1) == '/')) return(0);
2148
2149 lastChild = xmlGetLastChild(ctxt->node);
2150 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002151 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2152 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 } else if (xmlNodeIsText(lastChild))
2154 return(0);
2155 else if ((ctxt->node->children != NULL) &&
2156 (xmlNodeIsText(ctxt->node->children)))
2157 return(0);
2158 return(1);
2159}
2160
Owen Taylor3473f882001-02-23 17:55:21 +00002161/************************************************************************
2162 * *
2163 * Extra stuff for namespace support *
2164 * Relates to http://www.w3.org/TR/WD-xml-names *
2165 * *
2166 ************************************************************************/
2167
2168/**
2169 * xmlSplitQName:
2170 * @ctxt: an XML parser context
2171 * @name: an XML parser context
2172 * @prefix: a xmlChar **
2173 *
2174 * parse an UTF8 encoded XML qualified name string
2175 *
2176 * [NS 5] QName ::= (Prefix ':')? LocalPart
2177 *
2178 * [NS 6] Prefix ::= NCName
2179 *
2180 * [NS 7] LocalPart ::= NCName
2181 *
2182 * Returns the local part, and prefix is updated
2183 * to get the Prefix if any.
2184 */
2185
2186xmlChar *
2187xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2188 xmlChar buf[XML_MAX_NAMELEN + 5];
2189 xmlChar *buffer = NULL;
2190 int len = 0;
2191 int max = XML_MAX_NAMELEN;
2192 xmlChar *ret = NULL;
2193 const xmlChar *cur = name;
2194 int c;
2195
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002196 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002197 *prefix = NULL;
2198
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002199 if (cur == NULL) return(NULL);
2200
Owen Taylor3473f882001-02-23 17:55:21 +00002201#ifndef XML_XML_NAMESPACE
2202 /* xml: prefix is not really a namespace */
2203 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2204 (cur[2] == 'l') && (cur[3] == ':'))
2205 return(xmlStrdup(name));
2206#endif
2207
Daniel Veillard597bc482003-07-24 16:08:28 +00002208 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002209 if (cur[0] == ':')
2210 return(xmlStrdup(name));
2211
2212 c = *cur++;
2213 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2214 buf[len++] = c;
2215 c = *cur++;
2216 }
2217 if (len >= max) {
2218 /*
2219 * Okay someone managed to make a huge name, so he's ready to pay
2220 * for the processing speed.
2221 */
2222 max = len * 2;
2223
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002224 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002225 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002226 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002227 return(NULL);
2228 }
2229 memcpy(buffer, buf, len);
2230 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2231 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002232 xmlChar *tmp;
2233
Owen Taylor3473f882001-02-23 17:55:21 +00002234 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002235 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002236 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002237 if (tmp == NULL) {
2238 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002239 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002240 return(NULL);
2241 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002242 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002243 }
2244 buffer[len++] = c;
2245 c = *cur++;
2246 }
2247 buffer[len] = 0;
2248 }
2249
Daniel Veillard597bc482003-07-24 16:08:28 +00002250 /* nasty but well=formed
2251 if ((c == ':') && (*cur == 0)) {
2252 return(xmlStrdup(name));
2253 } */
2254
Owen Taylor3473f882001-02-23 17:55:21 +00002255 if (buffer == NULL)
2256 ret = xmlStrndup(buf, len);
2257 else {
2258 ret = buffer;
2259 buffer = NULL;
2260 max = XML_MAX_NAMELEN;
2261 }
2262
2263
2264 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002265 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002266 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002267 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002268 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 len = 0;
2271
Daniel Veillardbb284f42002-10-16 18:02:47 +00002272 /*
2273 * Check that the first character is proper to start
2274 * a new name
2275 */
2276 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2277 ((c >= 0x41) && (c <= 0x5A)) ||
2278 (c == '_') || (c == ':'))) {
2279 int l;
2280 int first = CUR_SCHAR(cur, l);
2281
2282 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002283 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002284 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002285 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002286 }
2287 }
2288 cur++;
2289
Owen Taylor3473f882001-02-23 17:55:21 +00002290 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2291 buf[len++] = c;
2292 c = *cur++;
2293 }
2294 if (len >= max) {
2295 /*
2296 * Okay someone managed to make a huge name, so he's ready to pay
2297 * for the processing speed.
2298 */
2299 max = len * 2;
2300
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002301 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002302 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002303 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304 return(NULL);
2305 }
2306 memcpy(buffer, buf, len);
2307 while (c != 0) { /* tested bigname2.xml */
2308 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002309 xmlChar *tmp;
2310
Owen Taylor3473f882001-02-23 17:55:21 +00002311 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002312 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002313 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002314 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002315 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002316 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002317 return(NULL);
2318 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002319 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002320 }
2321 buffer[len++] = c;
2322 c = *cur++;
2323 }
2324 buffer[len] = 0;
2325 }
2326
2327 if (buffer == NULL)
2328 ret = xmlStrndup(buf, len);
2329 else {
2330 ret = buffer;
2331 }
2332 }
2333
2334 return(ret);
2335}
2336
2337/************************************************************************
2338 * *
2339 * The parser itself *
2340 * Relates to http://www.w3.org/TR/REC-xml *
2341 * *
2342 ************************************************************************/
2343
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002344static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002345static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002346 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002347
Owen Taylor3473f882001-02-23 17:55:21 +00002348/**
2349 * xmlParseName:
2350 * @ctxt: an XML parser context
2351 *
2352 * parse an XML name.
2353 *
2354 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2355 * CombiningChar | Extender
2356 *
2357 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2358 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002359 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002360 *
2361 * Returns the Name parsed or NULL
2362 */
2363
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002364const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002365xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002366 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002367 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002368 int count = 0;
2369
2370 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002371
2372 /*
2373 * Accelerator for simple ASCII names
2374 */
2375 in = ctxt->input->cur;
2376 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2377 ((*in >= 0x41) && (*in <= 0x5A)) ||
2378 (*in == '_') || (*in == ':')) {
2379 in++;
2380 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2381 ((*in >= 0x41) && (*in <= 0x5A)) ||
2382 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002383 (*in == '_') || (*in == '-') ||
2384 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002385 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002386 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002387 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002388 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002389 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002390 ctxt->nbChars += count;
2391 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002392 if (ret == NULL)
2393 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002394 return(ret);
2395 }
2396 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002397 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002398}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002399
Daniel Veillard46de64e2002-05-29 08:21:33 +00002400/**
2401 * xmlParseNameAndCompare:
2402 * @ctxt: an XML parser context
2403 *
2404 * parse an XML name and compares for match
2405 * (specialized for endtag parsing)
2406 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002407 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2408 * and the name for mismatch
2409 */
2410
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002411static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002412xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002413 register const xmlChar *cmp = other;
2414 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002415 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002416
2417 GROW;
2418
2419 in = ctxt->input->cur;
2420 while (*in != 0 && *in == *cmp) {
2421 ++in;
2422 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002423 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002424 }
William M. Brack76e95df2003-10-18 16:20:14 +00002425 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002426 /* success */
2427 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002428 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002429 }
2430 /* failure (or end of input buffer), check with full function */
2431 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002432 /* strings coming from the dictionnary direct compare possible */
2433 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002434 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002435 }
2436 return ret;
2437}
2438
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002439static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002440xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002441 int len = 0, l;
2442 int c;
2443 int count = 0;
2444
2445 /*
2446 * Handler for more complex cases
2447 */
2448 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002449 c = CUR_CHAR(l);
2450 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2451 (!IS_LETTER(c) && (c != '_') &&
2452 (c != ':'))) {
2453 return(NULL);
2454 }
2455
2456 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002457 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002458 (c == '.') || (c == '-') ||
2459 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002460 (IS_COMBINING(c)) ||
2461 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002462 if (count++ > 100) {
2463 count = 0;
2464 GROW;
2465 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002466 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002467 NEXTL(l);
2468 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002469 }
Daniel Veillard96688262005-08-23 18:14:12 +00002470 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2471 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002472 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002473}
2474
2475/**
2476 * xmlParseStringName:
2477 * @ctxt: an XML parser context
2478 * @str: a pointer to the string pointer (IN/OUT)
2479 *
2480 * parse an XML name.
2481 *
2482 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2483 * CombiningChar | Extender
2484 *
2485 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2486 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002487 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002488 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002489 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002490 * is updated to the current location in the string.
2491 */
2492
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002493static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002494xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2495 xmlChar buf[XML_MAX_NAMELEN + 5];
2496 const xmlChar *cur = *str;
2497 int len = 0, l;
2498 int c;
2499
2500 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002501 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002502 (c != ':')) {
2503 return(NULL);
2504 }
2505
William M. Brack871611b2003-10-18 04:53:14 +00002506 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002507 (c == '.') || (c == '-') ||
2508 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002509 (IS_COMBINING(c)) ||
2510 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002511 COPY_BUF(l,buf,len,c);
2512 cur += l;
2513 c = CUR_SCHAR(cur, l);
2514 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2515 /*
2516 * Okay someone managed to make a huge name, so he's ready to pay
2517 * for the processing speed.
2518 */
2519 xmlChar *buffer;
2520 int max = len * 2;
2521
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002522 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002523 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002524 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002525 return(NULL);
2526 }
2527 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002528 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002529 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002530 (c == '.') || (c == '-') ||
2531 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002532 (IS_COMBINING(c)) ||
2533 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002534 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002535 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002536 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002537 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002538 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002539 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002540 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002541 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002542 return(NULL);
2543 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002544 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002545 }
2546 COPY_BUF(l,buffer,len,c);
2547 cur += l;
2548 c = CUR_SCHAR(cur, l);
2549 }
2550 buffer[len] = 0;
2551 *str = cur;
2552 return(buffer);
2553 }
2554 }
2555 *str = cur;
2556 return(xmlStrndup(buf, len));
2557}
2558
2559/**
2560 * xmlParseNmtoken:
2561 * @ctxt: an XML parser context
2562 *
2563 * parse an XML Nmtoken.
2564 *
2565 * [7] Nmtoken ::= (NameChar)+
2566 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002567 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002568 *
2569 * Returns the Nmtoken parsed or NULL
2570 */
2571
2572xmlChar *
2573xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2574 xmlChar buf[XML_MAX_NAMELEN + 5];
2575 int len = 0, l;
2576 int c;
2577 int count = 0;
2578
2579 GROW;
2580 c = CUR_CHAR(l);
2581
William M. Brack871611b2003-10-18 04:53:14 +00002582 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002583 (c == '.') || (c == '-') ||
2584 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002585 (IS_COMBINING(c)) ||
2586 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002587 if (count++ > 100) {
2588 count = 0;
2589 GROW;
2590 }
2591 COPY_BUF(l,buf,len,c);
2592 NEXTL(l);
2593 c = CUR_CHAR(l);
2594 if (len >= XML_MAX_NAMELEN) {
2595 /*
2596 * Okay someone managed to make a huge token, so he's ready to pay
2597 * for the processing speed.
2598 */
2599 xmlChar *buffer;
2600 int max = len * 2;
2601
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002602 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002603 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002605 return(NULL);
2606 }
2607 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002608 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002609 (c == '.') || (c == '-') ||
2610 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002611 (IS_COMBINING(c)) ||
2612 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002613 if (count++ > 100) {
2614 count = 0;
2615 GROW;
2616 }
2617 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002618 xmlChar *tmp;
2619
Owen Taylor3473f882001-02-23 17:55:21 +00002620 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002621 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002622 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002623 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002624 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002625 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002626 return(NULL);
2627 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002628 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002629 }
2630 COPY_BUF(l,buffer,len,c);
2631 NEXTL(l);
2632 c = CUR_CHAR(l);
2633 }
2634 buffer[len] = 0;
2635 return(buffer);
2636 }
2637 }
2638 if (len == 0)
2639 return(NULL);
2640 return(xmlStrndup(buf, len));
2641}
2642
2643/**
2644 * xmlParseEntityValue:
2645 * @ctxt: an XML parser context
2646 * @orig: if non-NULL store a copy of the original entity value
2647 *
2648 * parse a value for ENTITY declarations
2649 *
2650 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2651 * "'" ([^%&'] | PEReference | Reference)* "'"
2652 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002653 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002654 */
2655
2656xmlChar *
2657xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2658 xmlChar *buf = NULL;
2659 int len = 0;
2660 int size = XML_PARSER_BUFFER_SIZE;
2661 int c, l;
2662 xmlChar stop;
2663 xmlChar *ret = NULL;
2664 const xmlChar *cur = NULL;
2665 xmlParserInputPtr input;
2666
2667 if (RAW == '"') stop = '"';
2668 else if (RAW == '\'') stop = '\'';
2669 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002670 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002671 return(NULL);
2672 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002673 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002674 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002675 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002676 return(NULL);
2677 }
2678
2679 /*
2680 * The content of the entity definition is copied in a buffer.
2681 */
2682
2683 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2684 input = ctxt->input;
2685 GROW;
2686 NEXT;
2687 c = CUR_CHAR(l);
2688 /*
2689 * NOTE: 4.4.5 Included in Literal
2690 * When a parameter entity reference appears in a literal entity
2691 * value, ... a single or double quote character in the replacement
2692 * text is always treated as a normal data character and will not
2693 * terminate the literal.
2694 * In practice it means we stop the loop only when back at parsing
2695 * the initial entity and the quote is found
2696 */
William M. Brack871611b2003-10-18 04:53:14 +00002697 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002698 (ctxt->input != input))) {
2699 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002700 xmlChar *tmp;
2701
Owen Taylor3473f882001-02-23 17:55:21 +00002702 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002703 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2704 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002705 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002706 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002707 return(NULL);
2708 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002709 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002710 }
2711 COPY_BUF(l,buf,len,c);
2712 NEXTL(l);
2713 /*
2714 * Pop-up of finished entities.
2715 */
2716 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2717 xmlPopInput(ctxt);
2718
2719 GROW;
2720 c = CUR_CHAR(l);
2721 if (c == 0) {
2722 GROW;
2723 c = CUR_CHAR(l);
2724 }
2725 }
2726 buf[len] = 0;
2727
2728 /*
2729 * Raise problem w.r.t. '&' and '%' being used in non-entities
2730 * reference constructs. Note Charref will be handled in
2731 * xmlStringDecodeEntities()
2732 */
2733 cur = buf;
2734 while (*cur != 0) { /* non input consuming */
2735 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2736 xmlChar *name;
2737 xmlChar tmp = *cur;
2738
2739 cur++;
2740 name = xmlParseStringName(ctxt, &cur);
2741 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002742 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002743 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002744 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002745 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002746 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2747 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002748 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002749 }
2750 if (name != NULL)
2751 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002752 if (*cur == 0)
2753 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002754 }
2755 cur++;
2756 }
2757
2758 /*
2759 * Then PEReference entities are substituted.
2760 */
2761 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002762 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002763 xmlFree(buf);
2764 } else {
2765 NEXT;
2766 /*
2767 * NOTE: 4.4.7 Bypassed
2768 * When a general entity reference appears in the EntityValue in
2769 * an entity declaration, it is bypassed and left as is.
2770 * so XML_SUBSTITUTE_REF is not set here.
2771 */
2772 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2773 0, 0, 0);
2774 if (orig != NULL)
2775 *orig = buf;
2776 else
2777 xmlFree(buf);
2778 }
2779
2780 return(ret);
2781}
2782
2783/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002784 * xmlParseAttValueComplex:
2785 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002786 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002787 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002788 *
2789 * parse a value for an attribute, this is the fallback function
2790 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002791 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002792 *
2793 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2794 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002795static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002796xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002797 xmlChar limit = 0;
2798 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002799 int len = 0;
2800 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002801 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002802 xmlChar *current = NULL;
2803 xmlEntityPtr ent;
2804
Owen Taylor3473f882001-02-23 17:55:21 +00002805 if (NXT(0) == '"') {
2806 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2807 limit = '"';
2808 NEXT;
2809 } else if (NXT(0) == '\'') {
2810 limit = '\'';
2811 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2812 NEXT;
2813 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002814 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002815 return(NULL);
2816 }
2817
2818 /*
2819 * allocate a translation buffer.
2820 */
2821 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002822 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002823 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002824
2825 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002826 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002827 */
2828 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002829 while ((NXT(0) != limit) && /* checked */
2830 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002831 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002832 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002833 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002834 if (NXT(1) == '#') {
2835 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002836
Owen Taylor3473f882001-02-23 17:55:21 +00002837 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002838 if (ctxt->replaceEntities) {
2839 if (len > buf_size - 10) {
2840 growBuffer(buf);
2841 }
2842 buf[len++] = '&';
2843 } else {
2844 /*
2845 * The reparsing will be done in xmlStringGetNodeList()
2846 * called by the attribute() function in SAX.c
2847 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002848 if (len > buf_size - 10) {
2849 growBuffer(buf);
2850 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002851 buf[len++] = '&';
2852 buf[len++] = '#';
2853 buf[len++] = '3';
2854 buf[len++] = '8';
2855 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002856 }
2857 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002858 if (len > buf_size - 10) {
2859 growBuffer(buf);
2860 }
Owen Taylor3473f882001-02-23 17:55:21 +00002861 len += xmlCopyChar(0, &buf[len], val);
2862 }
2863 } else {
2864 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002865 if ((ent != NULL) &&
2866 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2867 if (len > buf_size - 10) {
2868 growBuffer(buf);
2869 }
2870 if ((ctxt->replaceEntities == 0) &&
2871 (ent->content[0] == '&')) {
2872 buf[len++] = '&';
2873 buf[len++] = '#';
2874 buf[len++] = '3';
2875 buf[len++] = '8';
2876 buf[len++] = ';';
2877 } else {
2878 buf[len++] = ent->content[0];
2879 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002880 } else if ((ent != NULL) &&
2881 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002882 xmlChar *rep;
2883
2884 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2885 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002886 XML_SUBSTITUTE_REF,
2887 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 if (rep != NULL) {
2889 current = rep;
2890 while (*current != 0) { /* non input consuming */
2891 buf[len++] = *current++;
2892 if (len > buf_size - 10) {
2893 growBuffer(buf);
2894 }
2895 }
2896 xmlFree(rep);
2897 }
2898 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002899 if (len > buf_size - 10) {
2900 growBuffer(buf);
2901 }
Owen Taylor3473f882001-02-23 17:55:21 +00002902 if (ent->content != NULL)
2903 buf[len++] = ent->content[0];
2904 }
2905 } else if (ent != NULL) {
2906 int i = xmlStrlen(ent->name);
2907 const xmlChar *cur = ent->name;
2908
2909 /*
2910 * This may look absurd but is needed to detect
2911 * entities problems
2912 */
2913 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2914 (ent->content != NULL)) {
2915 xmlChar *rep;
2916 rep = xmlStringDecodeEntities(ctxt, ent->content,
2917 XML_SUBSTITUTE_REF, 0, 0, 0);
2918 if (rep != NULL)
2919 xmlFree(rep);
2920 }
2921
2922 /*
2923 * Just output the reference
2924 */
2925 buf[len++] = '&';
2926 if (len > buf_size - i - 10) {
2927 growBuffer(buf);
2928 }
2929 for (;i > 0;i--)
2930 buf[len++] = *cur++;
2931 buf[len++] = ';';
2932 }
2933 }
2934 } else {
2935 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002936 if ((len != 0) || (!normalize)) {
2937 if ((!normalize) || (!in_space)) {
2938 COPY_BUF(l,buf,len,0x20);
2939 if (len > buf_size - 10) {
2940 growBuffer(buf);
2941 }
2942 }
2943 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002944 }
2945 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002946 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002947 COPY_BUF(l,buf,len,c);
2948 if (len > buf_size - 10) {
2949 growBuffer(buf);
2950 }
2951 }
2952 NEXTL(l);
2953 }
2954 GROW;
2955 c = CUR_CHAR(l);
2956 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002957 if ((in_space) && (normalize)) {
2958 while (buf[len - 1] == 0x20) len--;
2959 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002960 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002961 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002962 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002963 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002964 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2965 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002966 } else
2967 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002968 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002969 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002970
2971mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002972 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002973 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002974}
2975
2976/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002977 * xmlParseAttValue:
2978 * @ctxt: an XML parser context
2979 *
2980 * parse a value for an attribute
2981 * Note: the parser won't do substitution of entities here, this
2982 * will be handled later in xmlStringGetNodeList
2983 *
2984 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2985 * "'" ([^<&'] | Reference)* "'"
2986 *
2987 * 3.3.3 Attribute-Value Normalization:
2988 * Before the value of an attribute is passed to the application or
2989 * checked for validity, the XML processor must normalize it as follows:
2990 * - a character reference is processed by appending the referenced
2991 * character to the attribute value
2992 * - an entity reference is processed by recursively processing the
2993 * replacement text of the entity
2994 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2995 * appending #x20 to the normalized value, except that only a single
2996 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2997 * parsed entity or the literal entity value of an internal parsed entity
2998 * - other characters are processed by appending them to the normalized value
2999 * If the declared value is not CDATA, then the XML processor must further
3000 * process the normalized attribute value by discarding any leading and
3001 * trailing space (#x20) characters, and by replacing sequences of space
3002 * (#x20) characters by a single space (#x20) character.
3003 * All attributes for which no declaration has been read should be treated
3004 * by a non-validating parser as if declared CDATA.
3005 *
3006 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3007 */
3008
3009
3010xmlChar *
3011xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003012 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003013 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003014}
3015
3016/**
Owen Taylor3473f882001-02-23 17:55:21 +00003017 * xmlParseSystemLiteral:
3018 * @ctxt: an XML parser context
3019 *
3020 * parse an XML Literal
3021 *
3022 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3023 *
3024 * Returns the SystemLiteral parsed or NULL
3025 */
3026
3027xmlChar *
3028xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3029 xmlChar *buf = NULL;
3030 int len = 0;
3031 int size = XML_PARSER_BUFFER_SIZE;
3032 int cur, l;
3033 xmlChar stop;
3034 int state = ctxt->instate;
3035 int count = 0;
3036
3037 SHRINK;
3038 if (RAW == '"') {
3039 NEXT;
3040 stop = '"';
3041 } else if (RAW == '\'') {
3042 NEXT;
3043 stop = '\'';
3044 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003045 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003046 return(NULL);
3047 }
3048
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003049 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003050 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003051 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003052 return(NULL);
3053 }
3054 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3055 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003056 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003057 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003058 xmlChar *tmp;
3059
Owen Taylor3473f882001-02-23 17:55:21 +00003060 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003061 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3062 if (tmp == NULL) {
3063 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003064 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003065 ctxt->instate = (xmlParserInputState) state;
3066 return(NULL);
3067 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003068 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003069 }
3070 count++;
3071 if (count > 50) {
3072 GROW;
3073 count = 0;
3074 }
3075 COPY_BUF(l,buf,len,cur);
3076 NEXTL(l);
3077 cur = CUR_CHAR(l);
3078 if (cur == 0) {
3079 GROW;
3080 SHRINK;
3081 cur = CUR_CHAR(l);
3082 }
3083 }
3084 buf[len] = 0;
3085 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003086 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003087 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003088 } else {
3089 NEXT;
3090 }
3091 return(buf);
3092}
3093
3094/**
3095 * xmlParsePubidLiteral:
3096 * @ctxt: an XML parser context
3097 *
3098 * parse an XML public literal
3099 *
3100 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3101 *
3102 * Returns the PubidLiteral parsed or NULL.
3103 */
3104
3105xmlChar *
3106xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3107 xmlChar *buf = NULL;
3108 int len = 0;
3109 int size = XML_PARSER_BUFFER_SIZE;
3110 xmlChar cur;
3111 xmlChar stop;
3112 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003113 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003114
3115 SHRINK;
3116 if (RAW == '"') {
3117 NEXT;
3118 stop = '"';
3119 } else if (RAW == '\'') {
3120 NEXT;
3121 stop = '\'';
3122 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003123 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003124 return(NULL);
3125 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003126 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003127 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003128 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003129 return(NULL);
3130 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003131 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003132 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003133 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003134 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003135 xmlChar *tmp;
3136
Owen Taylor3473f882001-02-23 17:55:21 +00003137 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003138 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3139 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003140 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003141 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003142 return(NULL);
3143 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003144 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003145 }
3146 buf[len++] = cur;
3147 count++;
3148 if (count > 50) {
3149 GROW;
3150 count = 0;
3151 }
3152 NEXT;
3153 cur = CUR;
3154 if (cur == 0) {
3155 GROW;
3156 SHRINK;
3157 cur = CUR;
3158 }
3159 }
3160 buf[len] = 0;
3161 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003162 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003163 } else {
3164 NEXT;
3165 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003166 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003167 return(buf);
3168}
3169
Daniel Veillard48b2f892001-02-25 16:11:03 +00003170void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003171
3172/*
3173 * used for the test in the inner loop of the char data testing
3174 */
3175static const unsigned char test_char_data[256] = {
3176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3177 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3180 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3181 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3182 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3183 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3184 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3185 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3186 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3187 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3188 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3189 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3190 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3191 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3193 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3194 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3195 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3196 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3197 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3199 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3200 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3202 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3204 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3205 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3206 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3207 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3208};
3209
Owen Taylor3473f882001-02-23 17:55:21 +00003210/**
3211 * xmlParseCharData:
3212 * @ctxt: an XML parser context
3213 * @cdata: int indicating whether we are within a CDATA section
3214 *
3215 * parse a CharData section.
3216 * if we are within a CDATA section ']]>' marks an end of section.
3217 *
3218 * The right angle bracket (>) may be represented using the string "&gt;",
3219 * and must, for compatibility, be escaped using "&gt;" or a character
3220 * reference when it appears in the string "]]>" in content, when that
3221 * string is not marking the end of a CDATA section.
3222 *
3223 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3224 */
3225
3226void
3227xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003228 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003229 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003230 int line = ctxt->input->line;
3231 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003232 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003233
3234 SHRINK;
3235 GROW;
3236 /*
3237 * Accelerated common case where input don't need to be
3238 * modified before passing it to the handler.
3239 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003240 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003241 in = ctxt->input->cur;
3242 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003243get_more_space:
3244 while (*in == 0x20) in++;
3245 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003246 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003247 in++;
3248 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003249 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003250 in++;
3251 }
3252 goto get_more_space;
3253 }
3254 if (*in == '<') {
3255 nbchar = in - ctxt->input->cur;
3256 if (nbchar > 0) {
3257 const xmlChar *tmp = ctxt->input->cur;
3258 ctxt->input->cur = in;
3259
Daniel Veillard34099b42004-11-04 17:34:35 +00003260 if ((ctxt->sax != NULL) &&
3261 (ctxt->sax->ignorableWhitespace !=
3262 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003263 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003264 if (ctxt->sax->ignorableWhitespace != NULL)
3265 ctxt->sax->ignorableWhitespace(ctxt->userData,
3266 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003267 } else if (ctxt->sax->characters != NULL)
3268 ctxt->sax->characters(ctxt->userData,
3269 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003270 } else if ((ctxt->sax != NULL) &&
3271 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003272 ctxt->sax->characters(ctxt->userData,
3273 tmp, nbchar);
3274 }
3275 }
3276 return;
3277 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003278
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003279get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003280 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003281 while (test_char_data[*in]) {
3282 in++;
3283 ccol++;
3284 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003285 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003286 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003287 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003288 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003289 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003290 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003291 in++;
3292 }
3293 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003294 }
3295 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003296 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003297 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003298 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003299 return;
3300 }
3301 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003302 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003303 goto get_more;
3304 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003305 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003306 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003307 if ((ctxt->sax != NULL) &&
3308 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003309 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003310 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003311 const xmlChar *tmp = ctxt->input->cur;
3312 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003313
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003314 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003315 if (ctxt->sax->ignorableWhitespace != NULL)
3316 ctxt->sax->ignorableWhitespace(ctxt->userData,
3317 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003318 } else if (ctxt->sax->characters != NULL)
3319 ctxt->sax->characters(ctxt->userData,
3320 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003321 line = ctxt->input->line;
3322 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003323 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003324 if (ctxt->sax->characters != NULL)
3325 ctxt->sax->characters(ctxt->userData,
3326 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003327 line = ctxt->input->line;
3328 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003329 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003330 }
3331 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003332 if (*in == 0xD) {
3333 in++;
3334 if (*in == 0xA) {
3335 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003336 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003337 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003338 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003339 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003340 in--;
3341 }
3342 if (*in == '<') {
3343 return;
3344 }
3345 if (*in == '&') {
3346 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003347 }
3348 SHRINK;
3349 GROW;
3350 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003351 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003352 nbchar = 0;
3353 }
Daniel Veillard50582112001-03-26 22:52:16 +00003354 ctxt->input->line = line;
3355 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003356 xmlParseCharDataComplex(ctxt, cdata);
3357}
3358
Daniel Veillard01c13b52002-12-10 15:19:08 +00003359/**
3360 * xmlParseCharDataComplex:
3361 * @ctxt: an XML parser context
3362 * @cdata: int indicating whether we are within a CDATA section
3363 *
3364 * parse a CharData section.this is the fallback function
3365 * of xmlParseCharData() when the parsing requires handling
3366 * of non-ASCII characters.
3367 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003368void
3369xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003370 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3371 int nbchar = 0;
3372 int cur, l;
3373 int count = 0;
3374
3375 SHRINK;
3376 GROW;
3377 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003378 while ((cur != '<') && /* checked */
3379 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003380 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003381 if ((cur == ']') && (NXT(1) == ']') &&
3382 (NXT(2) == '>')) {
3383 if (cdata) break;
3384 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003385 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003386 }
3387 }
3388 COPY_BUF(l,buf,nbchar,cur);
3389 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003390 buf[nbchar] = 0;
3391
Owen Taylor3473f882001-02-23 17:55:21 +00003392 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003393 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003394 */
3395 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003396 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003397 if (ctxt->sax->ignorableWhitespace != NULL)
3398 ctxt->sax->ignorableWhitespace(ctxt->userData,
3399 buf, nbchar);
3400 } else {
3401 if (ctxt->sax->characters != NULL)
3402 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3403 }
3404 }
3405 nbchar = 0;
3406 }
3407 count++;
3408 if (count > 50) {
3409 GROW;
3410 count = 0;
3411 }
3412 NEXTL(l);
3413 cur = CUR_CHAR(l);
3414 }
3415 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003416 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003417 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003418 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003419 */
3420 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003421 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003422 if (ctxt->sax->ignorableWhitespace != NULL)
3423 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3424 } else {
3425 if (ctxt->sax->characters != NULL)
3426 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3427 }
3428 }
3429 }
3430}
3431
3432/**
3433 * xmlParseExternalID:
3434 * @ctxt: an XML parser context
3435 * @publicID: a xmlChar** receiving PubidLiteral
3436 * @strict: indicate whether we should restrict parsing to only
3437 * production [75], see NOTE below
3438 *
3439 * Parse an External ID or a Public ID
3440 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003441 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003442 * 'PUBLIC' S PubidLiteral S SystemLiteral
3443 *
3444 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3445 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3446 *
3447 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3448 *
3449 * Returns the function returns SystemLiteral and in the second
3450 * case publicID receives PubidLiteral, is strict is off
3451 * it is possible to return NULL and have publicID set.
3452 */
3453
3454xmlChar *
3455xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3456 xmlChar *URI = NULL;
3457
3458 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003459
3460 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003461 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003462 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003463 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003464 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3465 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003466 }
3467 SKIP_BLANKS;
3468 URI = xmlParseSystemLiteral(ctxt);
3469 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003470 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003471 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003472 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003473 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003474 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003475 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003476 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003477 }
3478 SKIP_BLANKS;
3479 *publicID = xmlParsePubidLiteral(ctxt);
3480 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003481 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003482 }
3483 if (strict) {
3484 /*
3485 * We don't handle [83] so "S SystemLiteral" is required.
3486 */
William M. Brack76e95df2003-10-18 16:20:14 +00003487 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003489 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003490 }
3491 } else {
3492 /*
3493 * We handle [83] so we return immediately, if
3494 * "S SystemLiteral" is not detected. From a purely parsing
3495 * point of view that's a nice mess.
3496 */
3497 const xmlChar *ptr;
3498 GROW;
3499
3500 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003501 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003502
William M. Brack76e95df2003-10-18 16:20:14 +00003503 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003504 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3505 }
3506 SKIP_BLANKS;
3507 URI = xmlParseSystemLiteral(ctxt);
3508 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003509 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003510 }
3511 }
3512 return(URI);
3513}
3514
3515/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003516 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003517 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003518 * @buf: the already parsed part of the buffer
3519 * @len: number of bytes filles in the buffer
3520 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003521 *
3522 * Skip an XML (SGML) comment <!-- .... -->
3523 * The spec says that "For compatibility, the string "--" (double-hyphen)
3524 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003525 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003526 *
3527 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3528 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003529static void
3530xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003531 int q, ql;
3532 int r, rl;
3533 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003534 xmlParserInputPtr input = ctxt->input;
3535 int count = 0;
3536
Owen Taylor3473f882001-02-23 17:55:21 +00003537 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003538 len = 0;
3539 size = XML_PARSER_BUFFER_SIZE;
3540 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3541 if (buf == NULL) {
3542 xmlErrMemory(ctxt, NULL);
3543 return;
3544 }
Owen Taylor3473f882001-02-23 17:55:21 +00003545 }
3546 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003547 if (q == 0)
3548 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003549 NEXTL(ql);
3550 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003551 if (r == 0)
3552 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003553 NEXTL(rl);
3554 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003555 if (cur == 0)
3556 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003557 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003558 ((cur != '>') ||
3559 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003560 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003561 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003562 }
3563 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003564 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003565 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003566 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3567 if (new_buf == NULL) {
3568 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003569 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003570 return;
3571 }
William M. Bracka3215c72004-07-31 16:24:01 +00003572 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003573 }
3574 COPY_BUF(ql,buf,len,q);
3575 q = r;
3576 ql = rl;
3577 r = cur;
3578 rl = l;
3579
3580 count++;
3581 if (count > 50) {
3582 GROW;
3583 count = 0;
3584 }
3585 NEXTL(l);
3586 cur = CUR_CHAR(l);
3587 if (cur == 0) {
3588 SHRINK;
3589 GROW;
3590 cur = CUR_CHAR(l);
3591 }
3592 }
3593 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003594 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003595 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003596 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003597 xmlFree(buf);
3598 } else {
3599 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003600 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3601 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003602 }
3603 NEXT;
3604 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3605 (!ctxt->disableSAX))
3606 ctxt->sax->comment(ctxt->userData, buf);
3607 xmlFree(buf);
3608 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003609 return;
3610not_terminated:
3611 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3612 "Comment not terminated\n", NULL);
3613 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003614}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003615/**
3616 * xmlParseComment:
3617 * @ctxt: an XML parser context
3618 *
3619 * Skip an XML (SGML) comment <!-- .... -->
3620 * The spec says that "For compatibility, the string "--" (double-hyphen)
3621 * must not occur within comments. "
3622 *
3623 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3624 */
3625void
3626xmlParseComment(xmlParserCtxtPtr ctxt) {
3627 xmlChar *buf = NULL;
3628 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003629 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003630 xmlParserInputState state;
3631 const xmlChar *in;
3632 int nbchar = 0, ccol;
3633
3634 /*
3635 * Check that there is a comment right here.
3636 */
3637 if ((RAW != '<') || (NXT(1) != '!') ||
3638 (NXT(2) != '-') || (NXT(3) != '-')) return;
3639
3640 state = ctxt->instate;
3641 ctxt->instate = XML_PARSER_COMMENT;
3642 SKIP(4);
3643 SHRINK;
3644 GROW;
3645
3646 /*
3647 * Accelerated common case where input don't need to be
3648 * modified before passing it to the handler.
3649 */
3650 in = ctxt->input->cur;
3651 do {
3652 if (*in == 0xA) {
3653 ctxt->input->line++; ctxt->input->col = 1;
3654 in++;
3655 while (*in == 0xA) {
3656 ctxt->input->line++; ctxt->input->col = 1;
3657 in++;
3658 }
3659 }
3660get_more:
3661 ccol = ctxt->input->col;
3662 while (((*in > '-') && (*in <= 0x7F)) ||
3663 ((*in >= 0x20) && (*in < '-')) ||
3664 (*in == 0x09)) {
3665 in++;
3666 ccol++;
3667 }
3668 ctxt->input->col = ccol;
3669 if (*in == 0xA) {
3670 ctxt->input->line++; ctxt->input->col = 1;
3671 in++;
3672 while (*in == 0xA) {
3673 ctxt->input->line++; ctxt->input->col = 1;
3674 in++;
3675 }
3676 goto get_more;
3677 }
3678 nbchar = in - ctxt->input->cur;
3679 /*
3680 * save current set of data
3681 */
3682 if (nbchar > 0) {
3683 if ((ctxt->sax != NULL) &&
3684 (ctxt->sax->comment != NULL)) {
3685 if (buf == NULL) {
3686 if ((*in == '-') && (in[1] == '-'))
3687 size = nbchar + 1;
3688 else
3689 size = XML_PARSER_BUFFER_SIZE + nbchar;
3690 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3691 if (buf == NULL) {
3692 xmlErrMemory(ctxt, NULL);
3693 ctxt->instate = state;
3694 return;
3695 }
3696 len = 0;
3697 } else if (len + nbchar + 1 >= size) {
3698 xmlChar *new_buf;
3699 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3700 new_buf = (xmlChar *) xmlRealloc(buf,
3701 size * sizeof(xmlChar));
3702 if (new_buf == NULL) {
3703 xmlFree (buf);
3704 xmlErrMemory(ctxt, NULL);
3705 ctxt->instate = state;
3706 return;
3707 }
3708 buf = new_buf;
3709 }
3710 memcpy(&buf[len], ctxt->input->cur, nbchar);
3711 len += nbchar;
3712 buf[len] = 0;
3713 }
3714 }
3715 ctxt->input->cur = in;
3716 if (*in == 0xA)
3717
3718 if (*in == 0xD) {
3719 in++;
3720 if (*in == 0xA) {
3721 ctxt->input->cur = in;
3722 in++;
3723 ctxt->input->line++; ctxt->input->col = 1;
3724 continue; /* while */
3725 }
3726 in--;
3727 }
3728 SHRINK;
3729 GROW;
3730 in = ctxt->input->cur;
3731 if (*in == '-') {
3732 if (in[1] == '-') {
3733 if (in[2] == '>') {
3734 SKIP(3);
3735 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3736 (!ctxt->disableSAX)) {
3737 if (buf != NULL)
3738 ctxt->sax->comment(ctxt->userData, buf);
3739 else
3740 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3741 }
3742 if (buf != NULL)
3743 xmlFree(buf);
3744 ctxt->instate = state;
3745 return;
3746 }
3747 if (buf != NULL)
3748 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3749 "Comment not terminated \n<!--%.50s\n",
3750 buf);
3751 else
3752 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3753 "Comment not terminated \n", NULL);
3754 in++;
3755 ctxt->input->col++;
3756 }
3757 in++;
3758 ctxt->input->col++;
3759 goto get_more;
3760 }
3761 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3762 xmlParseCommentComplex(ctxt, buf, len, size);
3763 ctxt->instate = state;
3764 return;
3765}
3766
Owen Taylor3473f882001-02-23 17:55:21 +00003767
3768/**
3769 * xmlParsePITarget:
3770 * @ctxt: an XML parser context
3771 *
3772 * parse the name of a PI
3773 *
3774 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3775 *
3776 * Returns the PITarget name or NULL
3777 */
3778
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003779const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003780xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003781 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003782
3783 name = xmlParseName(ctxt);
3784 if ((name != NULL) &&
3785 ((name[0] == 'x') || (name[0] == 'X')) &&
3786 ((name[1] == 'm') || (name[1] == 'M')) &&
3787 ((name[2] == 'l') || (name[2] == 'L'))) {
3788 int i;
3789 if ((name[0] == 'x') && (name[1] == 'm') &&
3790 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003791 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003792 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003793 return(name);
3794 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003795 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003796 return(name);
3797 }
3798 for (i = 0;;i++) {
3799 if (xmlW3CPIs[i] == NULL) break;
3800 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3801 return(name);
3802 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003803 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3804 "xmlParsePITarget: invalid name prefix 'xml'\n",
3805 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003806 }
3807 return(name);
3808}
3809
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003810#ifdef LIBXML_CATALOG_ENABLED
3811/**
3812 * xmlParseCatalogPI:
3813 * @ctxt: an XML parser context
3814 * @catalog: the PI value string
3815 *
3816 * parse an XML Catalog Processing Instruction.
3817 *
3818 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3819 *
3820 * Occurs only if allowed by the user and if happening in the Misc
3821 * part of the document before any doctype informations
3822 * This will add the given catalog to the parsing context in order
3823 * to be used if there is a resolution need further down in the document
3824 */
3825
3826static void
3827xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3828 xmlChar *URL = NULL;
3829 const xmlChar *tmp, *base;
3830 xmlChar marker;
3831
3832 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003833 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003834 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3835 goto error;
3836 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003837 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003838 if (*tmp != '=') {
3839 return;
3840 }
3841 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003842 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003843 marker = *tmp;
3844 if ((marker != '\'') && (marker != '"'))
3845 goto error;
3846 tmp++;
3847 base = tmp;
3848 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3849 if (*tmp == 0)
3850 goto error;
3851 URL = xmlStrndup(base, tmp - base);
3852 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003853 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003854 if (*tmp != 0)
3855 goto error;
3856
3857 if (URL != NULL) {
3858 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3859 xmlFree(URL);
3860 }
3861 return;
3862
3863error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003864 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3865 "Catalog PI syntax error: %s\n",
3866 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003867 if (URL != NULL)
3868 xmlFree(URL);
3869}
3870#endif
3871
Owen Taylor3473f882001-02-23 17:55:21 +00003872/**
3873 * xmlParsePI:
3874 * @ctxt: an XML parser context
3875 *
3876 * parse an XML Processing Instruction.
3877 *
3878 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3879 *
3880 * The processing is transfered to SAX once parsed.
3881 */
3882
3883void
3884xmlParsePI(xmlParserCtxtPtr ctxt) {
3885 xmlChar *buf = NULL;
3886 int len = 0;
3887 int size = XML_PARSER_BUFFER_SIZE;
3888 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003889 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003890 xmlParserInputState state;
3891 int count = 0;
3892
3893 if ((RAW == '<') && (NXT(1) == '?')) {
3894 xmlParserInputPtr input = ctxt->input;
3895 state = ctxt->instate;
3896 ctxt->instate = XML_PARSER_PI;
3897 /*
3898 * this is a Processing Instruction.
3899 */
3900 SKIP(2);
3901 SHRINK;
3902
3903 /*
3904 * Parse the target name and check for special support like
3905 * namespace.
3906 */
3907 target = xmlParsePITarget(ctxt);
3908 if (target != NULL) {
3909 if ((RAW == '?') && (NXT(1) == '>')) {
3910 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003911 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3912 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003913 }
3914 SKIP(2);
3915
3916 /*
3917 * SAX: PI detected.
3918 */
3919 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3920 (ctxt->sax->processingInstruction != NULL))
3921 ctxt->sax->processingInstruction(ctxt->userData,
3922 target, NULL);
3923 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003924 return;
3925 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003926 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003927 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003928 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003929 ctxt->instate = state;
3930 return;
3931 }
3932 cur = CUR;
3933 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003934 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3935 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003936 }
3937 SKIP_BLANKS;
3938 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003939 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003940 ((cur != '?') || (NXT(1) != '>'))) {
3941 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003942 xmlChar *tmp;
3943
Owen Taylor3473f882001-02-23 17:55:21 +00003944 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003945 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3946 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003947 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003948 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003949 ctxt->instate = state;
3950 return;
3951 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003952 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003953 }
3954 count++;
3955 if (count > 50) {
3956 GROW;
3957 count = 0;
3958 }
3959 COPY_BUF(l,buf,len,cur);
3960 NEXTL(l);
3961 cur = CUR_CHAR(l);
3962 if (cur == 0) {
3963 SHRINK;
3964 GROW;
3965 cur = CUR_CHAR(l);
3966 }
3967 }
3968 buf[len] = 0;
3969 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003970 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3971 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003972 } else {
3973 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003974 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3975 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003976 }
3977 SKIP(2);
3978
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003979#ifdef LIBXML_CATALOG_ENABLED
3980 if (((state == XML_PARSER_MISC) ||
3981 (state == XML_PARSER_START)) &&
3982 (xmlStrEqual(target, XML_CATALOG_PI))) {
3983 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3984 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3985 (allow == XML_CATA_ALLOW_ALL))
3986 xmlParseCatalogPI(ctxt, buf);
3987 }
3988#endif
3989
3990
Owen Taylor3473f882001-02-23 17:55:21 +00003991 /*
3992 * SAX: PI detected.
3993 */
3994 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3995 (ctxt->sax->processingInstruction != NULL))
3996 ctxt->sax->processingInstruction(ctxt->userData,
3997 target, buf);
3998 }
3999 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004000 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004001 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004002 }
4003 ctxt->instate = state;
4004 }
4005}
4006
4007/**
4008 * xmlParseNotationDecl:
4009 * @ctxt: an XML parser context
4010 *
4011 * parse a notation declaration
4012 *
4013 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4014 *
4015 * Hence there is actually 3 choices:
4016 * 'PUBLIC' S PubidLiteral
4017 * 'PUBLIC' S PubidLiteral S SystemLiteral
4018 * and 'SYSTEM' S SystemLiteral
4019 *
4020 * See the NOTE on xmlParseExternalID().
4021 */
4022
4023void
4024xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004025 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004026 xmlChar *Pubid;
4027 xmlChar *Systemid;
4028
Daniel Veillarda07050d2003-10-19 14:46:32 +00004029 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004030 xmlParserInputPtr input = ctxt->input;
4031 SHRINK;
4032 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004033 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004034 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4035 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004036 return;
4037 }
4038 SKIP_BLANKS;
4039
Daniel Veillard76d66f42001-05-16 21:05:17 +00004040 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004041 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004042 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004043 return;
4044 }
William M. Brack76e95df2003-10-18 16:20:14 +00004045 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004046 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004047 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004048 return;
4049 }
4050 SKIP_BLANKS;
4051
4052 /*
4053 * Parse the IDs.
4054 */
4055 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4056 SKIP_BLANKS;
4057
4058 if (RAW == '>') {
4059 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004060 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4061 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004062 }
4063 NEXT;
4064 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4065 (ctxt->sax->notationDecl != NULL))
4066 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4067 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004068 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004069 }
Owen Taylor3473f882001-02-23 17:55:21 +00004070 if (Systemid != NULL) xmlFree(Systemid);
4071 if (Pubid != NULL) xmlFree(Pubid);
4072 }
4073}
4074
4075/**
4076 * xmlParseEntityDecl:
4077 * @ctxt: an XML parser context
4078 *
4079 * parse <!ENTITY declarations
4080 *
4081 * [70] EntityDecl ::= GEDecl | PEDecl
4082 *
4083 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4084 *
4085 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4086 *
4087 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4088 *
4089 * [74] PEDef ::= EntityValue | ExternalID
4090 *
4091 * [76] NDataDecl ::= S 'NDATA' S Name
4092 *
4093 * [ VC: Notation Declared ]
4094 * The Name must match the declared name of a notation.
4095 */
4096
4097void
4098xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004099 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004100 xmlChar *value = NULL;
4101 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004102 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004103 int isParameter = 0;
4104 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004105 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004106
Daniel Veillard4c778d82005-01-23 17:37:44 +00004107 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004108 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004109 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004110 SHRINK;
4111 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004112 skipped = SKIP_BLANKS;
4113 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004114 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4115 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004116 }
Owen Taylor3473f882001-02-23 17:55:21 +00004117
4118 if (RAW == '%') {
4119 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004120 skipped = SKIP_BLANKS;
4121 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004122 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4123 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004124 }
Owen Taylor3473f882001-02-23 17:55:21 +00004125 isParameter = 1;
4126 }
4127
Daniel Veillard76d66f42001-05-16 21:05:17 +00004128 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004129 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004130 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4131 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004132 return;
4133 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004134 skipped = SKIP_BLANKS;
4135 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004136 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4137 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004138 }
Owen Taylor3473f882001-02-23 17:55:21 +00004139
Daniel Veillardf5582f12002-06-11 10:08:16 +00004140 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004141 /*
4142 * handle the various case of definitions...
4143 */
4144 if (isParameter) {
4145 if ((RAW == '"') || (RAW == '\'')) {
4146 value = xmlParseEntityValue(ctxt, &orig);
4147 if (value) {
4148 if ((ctxt->sax != NULL) &&
4149 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4150 ctxt->sax->entityDecl(ctxt->userData, name,
4151 XML_INTERNAL_PARAMETER_ENTITY,
4152 NULL, NULL, value);
4153 }
4154 } else {
4155 URI = xmlParseExternalID(ctxt, &literal, 1);
4156 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004157 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004158 }
4159 if (URI) {
4160 xmlURIPtr uri;
4161
4162 uri = xmlParseURI((const char *) URI);
4163 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004164 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4165 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004166 /*
4167 * This really ought to be a well formedness error
4168 * but the XML Core WG decided otherwise c.f. issue
4169 * E26 of the XML erratas.
4170 */
Owen Taylor3473f882001-02-23 17:55:21 +00004171 } else {
4172 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004173 /*
4174 * Okay this is foolish to block those but not
4175 * invalid URIs.
4176 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004177 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004178 } else {
4179 if ((ctxt->sax != NULL) &&
4180 (!ctxt->disableSAX) &&
4181 (ctxt->sax->entityDecl != NULL))
4182 ctxt->sax->entityDecl(ctxt->userData, name,
4183 XML_EXTERNAL_PARAMETER_ENTITY,
4184 literal, URI, NULL);
4185 }
4186 xmlFreeURI(uri);
4187 }
4188 }
4189 }
4190 } else {
4191 if ((RAW == '"') || (RAW == '\'')) {
4192 value = xmlParseEntityValue(ctxt, &orig);
4193 if ((ctxt->sax != NULL) &&
4194 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4195 ctxt->sax->entityDecl(ctxt->userData, name,
4196 XML_INTERNAL_GENERAL_ENTITY,
4197 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004198 /*
4199 * For expat compatibility in SAX mode.
4200 */
4201 if ((ctxt->myDoc == NULL) ||
4202 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4203 if (ctxt->myDoc == NULL) {
4204 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4205 }
4206 if (ctxt->myDoc->intSubset == NULL)
4207 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4208 BAD_CAST "fake", NULL, NULL);
4209
Daniel Veillard1af9a412003-08-20 22:54:39 +00004210 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4211 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004212 }
Owen Taylor3473f882001-02-23 17:55:21 +00004213 } else {
4214 URI = xmlParseExternalID(ctxt, &literal, 1);
4215 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004216 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004217 }
4218 if (URI) {
4219 xmlURIPtr uri;
4220
4221 uri = xmlParseURI((const char *)URI);
4222 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004223 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4224 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004225 /*
4226 * This really ought to be a well formedness error
4227 * but the XML Core WG decided otherwise c.f. issue
4228 * E26 of the XML erratas.
4229 */
Owen Taylor3473f882001-02-23 17:55:21 +00004230 } else {
4231 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004232 /*
4233 * Okay this is foolish to block those but not
4234 * invalid URIs.
4235 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004236 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004237 }
4238 xmlFreeURI(uri);
4239 }
4240 }
William M. Brack76e95df2003-10-18 16:20:14 +00004241 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004242 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4243 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004244 }
4245 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004246 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004247 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004248 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004249 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4250 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004251 }
4252 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004253 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004254 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4255 (ctxt->sax->unparsedEntityDecl != NULL))
4256 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4257 literal, URI, ndata);
4258 } else {
4259 if ((ctxt->sax != NULL) &&
4260 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4261 ctxt->sax->entityDecl(ctxt->userData, name,
4262 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4263 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004264 /*
4265 * For expat compatibility in SAX mode.
4266 * assuming the entity repalcement was asked for
4267 */
4268 if ((ctxt->replaceEntities != 0) &&
4269 ((ctxt->myDoc == NULL) ||
4270 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4271 if (ctxt->myDoc == NULL) {
4272 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4273 }
4274
4275 if (ctxt->myDoc->intSubset == NULL)
4276 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4277 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004278 xmlSAX2EntityDecl(ctxt, name,
4279 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4280 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004281 }
Owen Taylor3473f882001-02-23 17:55:21 +00004282 }
4283 }
4284 }
4285 SKIP_BLANKS;
4286 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004287 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004288 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004289 } else {
4290 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004291 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4292 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004293 }
4294 NEXT;
4295 }
4296 if (orig != NULL) {
4297 /*
4298 * Ugly mechanism to save the raw entity value.
4299 */
4300 xmlEntityPtr cur = NULL;
4301
4302 if (isParameter) {
4303 if ((ctxt->sax != NULL) &&
4304 (ctxt->sax->getParameterEntity != NULL))
4305 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4306 } else {
4307 if ((ctxt->sax != NULL) &&
4308 (ctxt->sax->getEntity != NULL))
4309 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004310 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004311 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004312 }
Owen Taylor3473f882001-02-23 17:55:21 +00004313 }
4314 if (cur != NULL) {
4315 if (cur->orig != NULL)
4316 xmlFree(orig);
4317 else
4318 cur->orig = orig;
4319 } else
4320 xmlFree(orig);
4321 }
Owen Taylor3473f882001-02-23 17:55:21 +00004322 if (value != NULL) xmlFree(value);
4323 if (URI != NULL) xmlFree(URI);
4324 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 }
4326}
4327
4328/**
4329 * xmlParseDefaultDecl:
4330 * @ctxt: an XML parser context
4331 * @value: Receive a possible fixed default value for the attribute
4332 *
4333 * Parse an attribute default declaration
4334 *
4335 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4336 *
4337 * [ VC: Required Attribute ]
4338 * if the default declaration is the keyword #REQUIRED, then the
4339 * attribute must be specified for all elements of the type in the
4340 * attribute-list declaration.
4341 *
4342 * [ VC: Attribute Default Legal ]
4343 * The declared default value must meet the lexical constraints of
4344 * the declared attribute type c.f. xmlValidateAttributeDecl()
4345 *
4346 * [ VC: Fixed Attribute Default ]
4347 * if an attribute has a default value declared with the #FIXED
4348 * keyword, instances of that attribute must match the default value.
4349 *
4350 * [ WFC: No < in Attribute Values ]
4351 * handled in xmlParseAttValue()
4352 *
4353 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4354 * or XML_ATTRIBUTE_FIXED.
4355 */
4356
4357int
4358xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4359 int val;
4360 xmlChar *ret;
4361
4362 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004363 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004364 SKIP(9);
4365 return(XML_ATTRIBUTE_REQUIRED);
4366 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004367 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004368 SKIP(8);
4369 return(XML_ATTRIBUTE_IMPLIED);
4370 }
4371 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004372 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004373 SKIP(6);
4374 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004375 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4377 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004378 }
4379 SKIP_BLANKS;
4380 }
4381 ret = xmlParseAttValue(ctxt);
4382 ctxt->instate = XML_PARSER_DTD;
4383 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004384 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004385 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004386 } else
4387 *value = ret;
4388 return(val);
4389}
4390
4391/**
4392 * xmlParseNotationType:
4393 * @ctxt: an XML parser context
4394 *
4395 * parse an Notation attribute type.
4396 *
4397 * Note: the leading 'NOTATION' S part has already being parsed...
4398 *
4399 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4400 *
4401 * [ VC: Notation Attributes ]
4402 * Values of this type must match one of the notation names included
4403 * in the declaration; all notation names in the declaration must be declared.
4404 *
4405 * Returns: the notation attribute tree built while parsing
4406 */
4407
4408xmlEnumerationPtr
4409xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004410 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004411 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4412
4413 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004414 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004415 return(NULL);
4416 }
4417 SHRINK;
4418 do {
4419 NEXT;
4420 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004421 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004422 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004423 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4424 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004425 return(ret);
4426 }
4427 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004428 if (cur == NULL) return(ret);
4429 if (last == NULL) ret = last = cur;
4430 else {
4431 last->next = cur;
4432 last = cur;
4433 }
4434 SKIP_BLANKS;
4435 } while (RAW == '|');
4436 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004437 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004438 if ((last != NULL) && (last != ret))
4439 xmlFreeEnumeration(last);
4440 return(ret);
4441 }
4442 NEXT;
4443 return(ret);
4444}
4445
4446/**
4447 * xmlParseEnumerationType:
4448 * @ctxt: an XML parser context
4449 *
4450 * parse an Enumeration attribute type.
4451 *
4452 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4453 *
4454 * [ VC: Enumeration ]
4455 * Values of this type must match one of the Nmtoken tokens in
4456 * the declaration
4457 *
4458 * Returns: the enumeration attribute tree built while parsing
4459 */
4460
4461xmlEnumerationPtr
4462xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4463 xmlChar *name;
4464 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4465
4466 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004467 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004468 return(NULL);
4469 }
4470 SHRINK;
4471 do {
4472 NEXT;
4473 SKIP_BLANKS;
4474 name = xmlParseNmtoken(ctxt);
4475 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004476 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004477 return(ret);
4478 }
4479 cur = xmlCreateEnumeration(name);
4480 xmlFree(name);
4481 if (cur == NULL) return(ret);
4482 if (last == NULL) ret = last = cur;
4483 else {
4484 last->next = cur;
4485 last = cur;
4486 }
4487 SKIP_BLANKS;
4488 } while (RAW == '|');
4489 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004490 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004491 return(ret);
4492 }
4493 NEXT;
4494 return(ret);
4495}
4496
4497/**
4498 * xmlParseEnumeratedType:
4499 * @ctxt: an XML parser context
4500 * @tree: the enumeration tree built while parsing
4501 *
4502 * parse an Enumerated attribute type.
4503 *
4504 * [57] EnumeratedType ::= NotationType | Enumeration
4505 *
4506 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4507 *
4508 *
4509 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4510 */
4511
4512int
4513xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004514 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004515 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004516 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004517 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4518 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004519 return(0);
4520 }
4521 SKIP_BLANKS;
4522 *tree = xmlParseNotationType(ctxt);
4523 if (*tree == NULL) return(0);
4524 return(XML_ATTRIBUTE_NOTATION);
4525 }
4526 *tree = xmlParseEnumerationType(ctxt);
4527 if (*tree == NULL) return(0);
4528 return(XML_ATTRIBUTE_ENUMERATION);
4529}
4530
4531/**
4532 * xmlParseAttributeType:
4533 * @ctxt: an XML parser context
4534 * @tree: the enumeration tree built while parsing
4535 *
4536 * parse the Attribute list def for an element
4537 *
4538 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4539 *
4540 * [55] StringType ::= 'CDATA'
4541 *
4542 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4543 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4544 *
4545 * Validity constraints for attribute values syntax are checked in
4546 * xmlValidateAttributeValue()
4547 *
4548 * [ VC: ID ]
4549 * Values of type ID must match the Name production. A name must not
4550 * appear more than once in an XML document as a value of this type;
4551 * i.e., ID values must uniquely identify the elements which bear them.
4552 *
4553 * [ VC: One ID per Element Type ]
4554 * No element type may have more than one ID attribute specified.
4555 *
4556 * [ VC: ID Attribute Default ]
4557 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4558 *
4559 * [ VC: IDREF ]
4560 * Values of type IDREF must match the Name production, and values
4561 * of type IDREFS must match Names; each IDREF Name must match the value
4562 * of an ID attribute on some element in the XML document; i.e. IDREF
4563 * values must match the value of some ID attribute.
4564 *
4565 * [ VC: Entity Name ]
4566 * Values of type ENTITY must match the Name production, values
4567 * of type ENTITIES must match Names; each Entity Name must match the
4568 * name of an unparsed entity declared in the DTD.
4569 *
4570 * [ VC: Name Token ]
4571 * Values of type NMTOKEN must match the Nmtoken production; values
4572 * of type NMTOKENS must match Nmtokens.
4573 *
4574 * Returns the attribute type
4575 */
4576int
4577xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4578 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004579 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004580 SKIP(5);
4581 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004582 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004583 SKIP(6);
4584 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004585 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004586 SKIP(5);
4587 return(XML_ATTRIBUTE_IDREF);
4588 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4589 SKIP(2);
4590 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004591 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004592 SKIP(6);
4593 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004594 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004595 SKIP(8);
4596 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004597 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004598 SKIP(8);
4599 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004600 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004601 SKIP(7);
4602 return(XML_ATTRIBUTE_NMTOKEN);
4603 }
4604 return(xmlParseEnumeratedType(ctxt, tree));
4605}
4606
4607/**
4608 * xmlParseAttributeListDecl:
4609 * @ctxt: an XML parser context
4610 *
4611 * : parse the Attribute list def for an element
4612 *
4613 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4614 *
4615 * [53] AttDef ::= S Name S AttType S DefaultDecl
4616 *
4617 */
4618void
4619xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004620 const xmlChar *elemName;
4621 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004622 xmlEnumerationPtr tree;
4623
Daniel Veillarda07050d2003-10-19 14:46:32 +00004624 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004625 xmlParserInputPtr input = ctxt->input;
4626
4627 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004628 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004629 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004630 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004631 }
4632 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004633 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004634 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004635 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4636 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004637 return;
4638 }
4639 SKIP_BLANKS;
4640 GROW;
4641 while (RAW != '>') {
4642 const xmlChar *check = CUR_PTR;
4643 int type;
4644 int def;
4645 xmlChar *defaultValue = NULL;
4646
4647 GROW;
4648 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004649 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004650 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004651 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4652 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004653 break;
4654 }
4655 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004656 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004657 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004658 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004659 if (defaultValue != NULL)
4660 xmlFree(defaultValue);
4661 break;
4662 }
4663 SKIP_BLANKS;
4664
4665 type = xmlParseAttributeType(ctxt, &tree);
4666 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004667 if (defaultValue != NULL)
4668 xmlFree(defaultValue);
4669 break;
4670 }
4671
4672 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004673 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004674 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4675 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004676 if (defaultValue != NULL)
4677 xmlFree(defaultValue);
4678 if (tree != NULL)
4679 xmlFreeEnumeration(tree);
4680 break;
4681 }
4682 SKIP_BLANKS;
4683
4684 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4685 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004686 if (defaultValue != NULL)
4687 xmlFree(defaultValue);
4688 if (tree != NULL)
4689 xmlFreeEnumeration(tree);
4690 break;
4691 }
4692
4693 GROW;
4694 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004695 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004696 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004697 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004698 if (defaultValue != NULL)
4699 xmlFree(defaultValue);
4700 if (tree != NULL)
4701 xmlFreeEnumeration(tree);
4702 break;
4703 }
4704 SKIP_BLANKS;
4705 }
4706 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004707 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4708 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004709 if (defaultValue != NULL)
4710 xmlFree(defaultValue);
4711 if (tree != NULL)
4712 xmlFreeEnumeration(tree);
4713 break;
4714 }
4715 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4716 (ctxt->sax->attributeDecl != NULL))
4717 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4718 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004719 else if (tree != NULL)
4720 xmlFreeEnumeration(tree);
4721
4722 if ((ctxt->sax2) && (defaultValue != NULL) &&
4723 (def != XML_ATTRIBUTE_IMPLIED) &&
4724 (def != XML_ATTRIBUTE_REQUIRED)) {
4725 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4726 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004727 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4728 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4729 }
Owen Taylor3473f882001-02-23 17:55:21 +00004730 if (defaultValue != NULL)
4731 xmlFree(defaultValue);
4732 GROW;
4733 }
4734 if (RAW == '>') {
4735 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004736 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4737 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004738 }
4739 NEXT;
4740 }
Owen Taylor3473f882001-02-23 17:55:21 +00004741 }
4742}
4743
4744/**
4745 * xmlParseElementMixedContentDecl:
4746 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004747 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004748 *
4749 * parse the declaration for a Mixed Element content
4750 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4751 *
4752 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4753 * '(' S? '#PCDATA' S? ')'
4754 *
4755 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4756 *
4757 * [ VC: No Duplicate Types ]
4758 * The same name must not appear more than once in a single
4759 * mixed-content declaration.
4760 *
4761 * returns: the list of the xmlElementContentPtr describing the element choices
4762 */
4763xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004764xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004765 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004766 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004767
4768 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004769 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004770 SKIP(7);
4771 SKIP_BLANKS;
4772 SHRINK;
4773 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004774 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004775 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4776"Element content declaration doesn't start and stop in the same entity\n",
4777 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004778 }
Owen Taylor3473f882001-02-23 17:55:21 +00004779 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004780 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004781 if (RAW == '*') {
4782 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4783 NEXT;
4784 }
4785 return(ret);
4786 }
4787 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004788 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004789 if (ret == NULL) return(NULL);
4790 }
4791 while (RAW == '|') {
4792 NEXT;
4793 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004794 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004795 if (ret == NULL) return(NULL);
4796 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004797 if (cur != NULL)
4798 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004799 cur = ret;
4800 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004801 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004802 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004803 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004804 if (n->c1 != NULL)
4805 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004806 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004807 if (n != NULL)
4808 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004809 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004810 }
4811 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004812 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004813 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004814 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004815 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004816 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004817 return(NULL);
4818 }
4819 SKIP_BLANKS;
4820 GROW;
4821 }
4822 if ((RAW == ')') && (NXT(1) == '*')) {
4823 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004824 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00004825 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004826 if (cur->c2 != NULL)
4827 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004828 }
4829 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004830 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004831 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4832"Element content declaration doesn't start and stop in the same entity\n",
4833 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004834 }
Owen Taylor3473f882001-02-23 17:55:21 +00004835 SKIP(2);
4836 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004837 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004838 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004839 return(NULL);
4840 }
4841
4842 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004843 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004844 }
4845 return(ret);
4846}
4847
4848/**
4849 * xmlParseElementChildrenContentDecl:
4850 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004851 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004852 *
4853 * parse the declaration for a Mixed Element content
4854 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4855 *
4856 *
4857 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4858 *
4859 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4860 *
4861 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4862 *
4863 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4864 *
4865 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4866 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004867 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004868 * opening or closing parentheses in a choice, seq, or Mixed
4869 * construct is contained in the replacement text for a parameter
4870 * entity, both must be contained in the same replacement text. For
4871 * interoperability, if a parameter-entity reference appears in a
4872 * choice, seq, or Mixed construct, its replacement text should not
4873 * be empty, and neither the first nor last non-blank character of
4874 * the replacement text should be a connector (| or ,).
4875 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004876 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004877 * hierarchy.
4878 */
4879xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004880xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004881 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004882 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004883 xmlChar type = 0;
4884
4885 SKIP_BLANKS;
4886 GROW;
4887 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004888 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004889
Owen Taylor3473f882001-02-23 17:55:21 +00004890 /* Recurse on first child */
4891 NEXT;
4892 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004893 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004894 SKIP_BLANKS;
4895 GROW;
4896 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004897 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004898 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004899 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004900 return(NULL);
4901 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004902 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004903 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004904 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004905 return(NULL);
4906 }
Owen Taylor3473f882001-02-23 17:55:21 +00004907 GROW;
4908 if (RAW == '?') {
4909 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4910 NEXT;
4911 } else if (RAW == '*') {
4912 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4913 NEXT;
4914 } else if (RAW == '+') {
4915 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4916 NEXT;
4917 } else {
4918 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4919 }
Owen Taylor3473f882001-02-23 17:55:21 +00004920 GROW;
4921 }
4922 SKIP_BLANKS;
4923 SHRINK;
4924 while (RAW != ')') {
4925 /*
4926 * Each loop we parse one separator and one element.
4927 */
4928 if (RAW == ',') {
4929 if (type == 0) type = CUR;
4930
4931 /*
4932 * Detect "Name | Name , Name" error
4933 */
4934 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004935 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004936 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004937 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004938 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004939 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004940 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004941 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004942 return(NULL);
4943 }
4944 NEXT;
4945
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004946 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00004947 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004948 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004949 xmlFreeDocElementContent(ctxt->myDoc, last);
4950 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004951 return(NULL);
4952 }
4953 if (last == NULL) {
4954 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004955 if (ret != NULL)
4956 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004957 ret = cur = op;
4958 } else {
4959 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004960 if (op != NULL)
4961 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004962 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004963 if (last != NULL)
4964 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004965 cur =op;
4966 last = NULL;
4967 }
4968 } else if (RAW == '|') {
4969 if (type == 0) type = CUR;
4970
4971 /*
4972 * Detect "Name , Name | Name" error
4973 */
4974 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004975 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004976 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004977 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004978 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004979 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004980 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004981 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004982 return(NULL);
4983 }
4984 NEXT;
4985
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004986 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004987 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004988 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004989 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004990 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004991 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004992 return(NULL);
4993 }
4994 if (last == NULL) {
4995 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004996 if (ret != NULL)
4997 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004998 ret = cur = op;
4999 } else {
5000 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005001 if (op != NULL)
5002 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005003 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005004 if (last != NULL)
5005 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005006 cur =op;
5007 last = NULL;
5008 }
5009 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005010 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005011 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005012 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005013 return(NULL);
5014 }
5015 GROW;
5016 SKIP_BLANKS;
5017 GROW;
5018 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005019 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005020 /* Recurse on second child */
5021 NEXT;
5022 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005023 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005024 SKIP_BLANKS;
5025 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005026 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005027 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005028 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005029 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005030 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005031 return(NULL);
5032 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005033 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005034 if (RAW == '?') {
5035 last->ocur = XML_ELEMENT_CONTENT_OPT;
5036 NEXT;
5037 } else if (RAW == '*') {
5038 last->ocur = XML_ELEMENT_CONTENT_MULT;
5039 NEXT;
5040 } else if (RAW == '+') {
5041 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5042 NEXT;
5043 } else {
5044 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5045 }
5046 }
5047 SKIP_BLANKS;
5048 GROW;
5049 }
5050 if ((cur != NULL) && (last != NULL)) {
5051 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005052 if (last != NULL)
5053 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005054 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005055 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005056 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5057"Element content declaration doesn't start and stop in the same entity\n",
5058 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005059 }
Owen Taylor3473f882001-02-23 17:55:21 +00005060 NEXT;
5061 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005062 if (ret != NULL) {
5063 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5064 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5065 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5066 else
5067 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5068 }
Owen Taylor3473f882001-02-23 17:55:21 +00005069 NEXT;
5070 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005071 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005072 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005073 cur = ret;
5074 /*
5075 * Some normalization:
5076 * (a | b* | c?)* == (a | b | c)*
5077 */
5078 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5079 if ((cur->c1 != NULL) &&
5080 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5081 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5082 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5083 if ((cur->c2 != NULL) &&
5084 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5085 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5086 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5087 cur = cur->c2;
5088 }
5089 }
Owen Taylor3473f882001-02-23 17:55:21 +00005090 NEXT;
5091 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005092 if (ret != NULL) {
5093 int found = 0;
5094
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005095 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5096 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5097 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005098 else
5099 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005100 /*
5101 * Some normalization:
5102 * (a | b*)+ == (a | b)*
5103 * (a | b?)+ == (a | b)*
5104 */
5105 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5106 if ((cur->c1 != NULL) &&
5107 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5108 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5109 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5110 found = 1;
5111 }
5112 if ((cur->c2 != NULL) &&
5113 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5114 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5115 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5116 found = 1;
5117 }
5118 cur = cur->c2;
5119 }
5120 if (found)
5121 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5122 }
Owen Taylor3473f882001-02-23 17:55:21 +00005123 NEXT;
5124 }
5125 return(ret);
5126}
5127
5128/**
5129 * xmlParseElementContentDecl:
5130 * @ctxt: an XML parser context
5131 * @name: the name of the element being defined.
5132 * @result: the Element Content pointer will be stored here if any
5133 *
5134 * parse the declaration for an Element content either Mixed or Children,
5135 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5136 *
5137 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5138 *
5139 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5140 */
5141
5142int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005143xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005144 xmlElementContentPtr *result) {
5145
5146 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005147 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005148 int res;
5149
5150 *result = NULL;
5151
5152 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005153 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005154 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005155 return(-1);
5156 }
5157 NEXT;
5158 GROW;
5159 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005160 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005161 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005162 res = XML_ELEMENT_TYPE_MIXED;
5163 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005164 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005165 res = XML_ELEMENT_TYPE_ELEMENT;
5166 }
Owen Taylor3473f882001-02-23 17:55:21 +00005167 SKIP_BLANKS;
5168 *result = tree;
5169 return(res);
5170}
5171
5172/**
5173 * xmlParseElementDecl:
5174 * @ctxt: an XML parser context
5175 *
5176 * parse an Element declaration.
5177 *
5178 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5179 *
5180 * [ VC: Unique Element Type Declaration ]
5181 * No element type may be declared more than once
5182 *
5183 * Returns the type of the element, or -1 in case of error
5184 */
5185int
5186xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005187 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005188 int ret = -1;
5189 xmlElementContentPtr content = NULL;
5190
Daniel Veillard4c778d82005-01-23 17:37:44 +00005191 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005192 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005193 xmlParserInputPtr input = ctxt->input;
5194
5195 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005196 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005197 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5198 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005199 }
5200 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005201 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005202 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005203 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5204 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005205 return(-1);
5206 }
5207 while ((RAW == 0) && (ctxt->inputNr > 1))
5208 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005209 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005210 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5211 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005212 }
5213 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005214 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005215 SKIP(5);
5216 /*
5217 * Element must always be empty.
5218 */
5219 ret = XML_ELEMENT_TYPE_EMPTY;
5220 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5221 (NXT(2) == 'Y')) {
5222 SKIP(3);
5223 /*
5224 * Element is a generic container.
5225 */
5226 ret = XML_ELEMENT_TYPE_ANY;
5227 } else if (RAW == '(') {
5228 ret = xmlParseElementContentDecl(ctxt, name, &content);
5229 } else {
5230 /*
5231 * [ WFC: PEs in Internal Subset ] error handling.
5232 */
5233 if ((RAW == '%') && (ctxt->external == 0) &&
5234 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005235 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005236 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005237 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005238 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005239 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5240 }
Owen Taylor3473f882001-02-23 17:55:21 +00005241 return(-1);
5242 }
5243
5244 SKIP_BLANKS;
5245 /*
5246 * Pop-up of finished entities.
5247 */
5248 while ((RAW == 0) && (ctxt->inputNr > 1))
5249 xmlPopInput(ctxt);
5250 SKIP_BLANKS;
5251
5252 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005253 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005254 if (content != NULL) {
5255 xmlFreeDocElementContent(ctxt->myDoc, content);
5256 }
Owen Taylor3473f882001-02-23 17:55:21 +00005257 } else {
5258 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005259 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5260 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005261 }
5262
5263 NEXT;
5264 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005265 (ctxt->sax->elementDecl != NULL)) {
5266 if (content != NULL)
5267 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005268 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5269 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005270 if ((content != NULL) && (content->parent == NULL)) {
5271 /*
5272 * this is a trick: if xmlAddElementDecl is called,
5273 * instead of copying the full tree it is plugged directly
5274 * if called from the parser. Avoid duplicating the
5275 * interfaces or change the API/ABI
5276 */
5277 xmlFreeDocElementContent(ctxt->myDoc, content);
5278 }
5279 } else if (content != NULL) {
5280 xmlFreeDocElementContent(ctxt->myDoc, content);
5281 }
Owen Taylor3473f882001-02-23 17:55:21 +00005282 }
Owen Taylor3473f882001-02-23 17:55:21 +00005283 }
5284 return(ret);
5285}
5286
5287/**
Owen Taylor3473f882001-02-23 17:55:21 +00005288 * xmlParseConditionalSections
5289 * @ctxt: an XML parser context
5290 *
5291 * [61] conditionalSect ::= includeSect | ignoreSect
5292 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5293 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5294 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5295 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5296 */
5297
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005298static void
Owen Taylor3473f882001-02-23 17:55:21 +00005299xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5300 SKIP(3);
5301 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005302 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005303 SKIP(7);
5304 SKIP_BLANKS;
5305 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005306 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005307 } else {
5308 NEXT;
5309 }
5310 if (xmlParserDebugEntities) {
5311 if ((ctxt->input != NULL) && (ctxt->input->filename))
5312 xmlGenericError(xmlGenericErrorContext,
5313 "%s(%d): ", ctxt->input->filename,
5314 ctxt->input->line);
5315 xmlGenericError(xmlGenericErrorContext,
5316 "Entering INCLUDE Conditional Section\n");
5317 }
5318
5319 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5320 (NXT(2) != '>'))) {
5321 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005322 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005323
5324 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5325 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005326 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005327 NEXT;
5328 } else if (RAW == '%') {
5329 xmlParsePEReference(ctxt);
5330 } else
5331 xmlParseMarkupDecl(ctxt);
5332
5333 /*
5334 * Pop-up of finished entities.
5335 */
5336 while ((RAW == 0) && (ctxt->inputNr > 1))
5337 xmlPopInput(ctxt);
5338
Daniel Veillardfdc91562002-07-01 21:52:03 +00005339 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005340 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005341 break;
5342 }
5343 }
5344 if (xmlParserDebugEntities) {
5345 if ((ctxt->input != NULL) && (ctxt->input->filename))
5346 xmlGenericError(xmlGenericErrorContext,
5347 "%s(%d): ", ctxt->input->filename,
5348 ctxt->input->line);
5349 xmlGenericError(xmlGenericErrorContext,
5350 "Leaving INCLUDE Conditional Section\n");
5351 }
5352
Daniel Veillarda07050d2003-10-19 14:46:32 +00005353 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005354 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005355 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005356 int depth = 0;
5357
5358 SKIP(6);
5359 SKIP_BLANKS;
5360 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005361 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005362 } else {
5363 NEXT;
5364 }
5365 if (xmlParserDebugEntities) {
5366 if ((ctxt->input != NULL) && (ctxt->input->filename))
5367 xmlGenericError(xmlGenericErrorContext,
5368 "%s(%d): ", ctxt->input->filename,
5369 ctxt->input->line);
5370 xmlGenericError(xmlGenericErrorContext,
5371 "Entering IGNORE Conditional Section\n");
5372 }
5373
5374 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005375 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005376 * But disable SAX event generating DTD building in the meantime
5377 */
5378 state = ctxt->disableSAX;
5379 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005380 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005381 ctxt->instate = XML_PARSER_IGNORE;
5382
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005383 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005384 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5385 depth++;
5386 SKIP(3);
5387 continue;
5388 }
5389 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5390 if (--depth >= 0) SKIP(3);
5391 continue;
5392 }
5393 NEXT;
5394 continue;
5395 }
5396
5397 ctxt->disableSAX = state;
5398 ctxt->instate = instate;
5399
5400 if (xmlParserDebugEntities) {
5401 if ((ctxt->input != NULL) && (ctxt->input->filename))
5402 xmlGenericError(xmlGenericErrorContext,
5403 "%s(%d): ", ctxt->input->filename,
5404 ctxt->input->line);
5405 xmlGenericError(xmlGenericErrorContext,
5406 "Leaving IGNORE Conditional Section\n");
5407 }
5408
5409 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005410 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005411 }
5412
5413 if (RAW == 0)
5414 SHRINK;
5415
5416 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005417 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 } else {
5419 SKIP(3);
5420 }
5421}
5422
5423/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005424 * xmlParseMarkupDecl:
5425 * @ctxt: an XML parser context
5426 *
5427 * parse Markup declarations
5428 *
5429 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5430 * NotationDecl | PI | Comment
5431 *
5432 * [ VC: Proper Declaration/PE Nesting ]
5433 * Parameter-entity replacement text must be properly nested with
5434 * markup declarations. That is to say, if either the first character
5435 * or the last character of a markup declaration (markupdecl above) is
5436 * contained in the replacement text for a parameter-entity reference,
5437 * both must be contained in the same replacement text.
5438 *
5439 * [ WFC: PEs in Internal Subset ]
5440 * In the internal DTD subset, parameter-entity references can occur
5441 * only where markup declarations can occur, not within markup declarations.
5442 * (This does not apply to references that occur in external parameter
5443 * entities or to the external subset.)
5444 */
5445void
5446xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5447 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005448 if (CUR == '<') {
5449 if (NXT(1) == '!') {
5450 switch (NXT(2)) {
5451 case 'E':
5452 if (NXT(3) == 'L')
5453 xmlParseElementDecl(ctxt);
5454 else if (NXT(3) == 'N')
5455 xmlParseEntityDecl(ctxt);
5456 break;
5457 case 'A':
5458 xmlParseAttributeListDecl(ctxt);
5459 break;
5460 case 'N':
5461 xmlParseNotationDecl(ctxt);
5462 break;
5463 case '-':
5464 xmlParseComment(ctxt);
5465 break;
5466 default:
5467 /* there is an error but it will be detected later */
5468 break;
5469 }
5470 } else if (NXT(1) == '?') {
5471 xmlParsePI(ctxt);
5472 }
5473 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005474 /*
5475 * This is only for internal subset. On external entities,
5476 * the replacement is done before parsing stage
5477 */
5478 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5479 xmlParsePEReference(ctxt);
5480
5481 /*
5482 * Conditional sections are allowed from entities included
5483 * by PE References in the internal subset.
5484 */
5485 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5486 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5487 xmlParseConditionalSections(ctxt);
5488 }
5489 }
5490
5491 ctxt->instate = XML_PARSER_DTD;
5492}
5493
5494/**
5495 * xmlParseTextDecl:
5496 * @ctxt: an XML parser context
5497 *
5498 * parse an XML declaration header for external entities
5499 *
5500 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5501 *
5502 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5503 */
5504
5505void
5506xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5507 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005508 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005509
5510 /*
5511 * We know that '<?xml' is here.
5512 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005513 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005514 SKIP(5);
5515 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005516 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005517 return;
5518 }
5519
William M. Brack76e95df2003-10-18 16:20:14 +00005520 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005521 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5522 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005523 }
5524 SKIP_BLANKS;
5525
5526 /*
5527 * We may have the VersionInfo here.
5528 */
5529 version = xmlParseVersionInfo(ctxt);
5530 if (version == NULL)
5531 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005532 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005533 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005534 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5535 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005536 }
5537 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005538 ctxt->input->version = version;
5539
5540 /*
5541 * We must have the encoding declaration
5542 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005543 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005544 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5545 /*
5546 * The XML REC instructs us to stop parsing right here
5547 */
5548 return;
5549 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005550 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5551 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5552 "Missing encoding in text declaration\n");
5553 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005554
5555 SKIP_BLANKS;
5556 if ((RAW == '?') && (NXT(1) == '>')) {
5557 SKIP(2);
5558 } else if (RAW == '>') {
5559 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005560 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005561 NEXT;
5562 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005563 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005564 MOVETO_ENDTAG(CUR_PTR);
5565 NEXT;
5566 }
5567}
5568
5569/**
Owen Taylor3473f882001-02-23 17:55:21 +00005570 * xmlParseExternalSubset:
5571 * @ctxt: an XML parser context
5572 * @ExternalID: the external identifier
5573 * @SystemID: the system identifier (or URL)
5574 *
5575 * parse Markup declarations from an external subset
5576 *
5577 * [30] extSubset ::= textDecl? extSubsetDecl
5578 *
5579 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5580 */
5581void
5582xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5583 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005584 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005585 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005586 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005587 xmlParseTextDecl(ctxt);
5588 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5589 /*
5590 * The XML REC instructs us to stop parsing right here
5591 */
5592 ctxt->instate = XML_PARSER_EOF;
5593 return;
5594 }
5595 }
5596 if (ctxt->myDoc == NULL) {
5597 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5598 }
5599 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5600 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5601
5602 ctxt->instate = XML_PARSER_DTD;
5603 ctxt->external = 1;
5604 while (((RAW == '<') && (NXT(1) == '?')) ||
5605 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005606 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005607 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005608 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005609
5610 GROW;
5611 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5612 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005613 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005614 NEXT;
5615 } else if (RAW == '%') {
5616 xmlParsePEReference(ctxt);
5617 } else
5618 xmlParseMarkupDecl(ctxt);
5619
5620 /*
5621 * Pop-up of finished entities.
5622 */
5623 while ((RAW == 0) && (ctxt->inputNr > 1))
5624 xmlPopInput(ctxt);
5625
Daniel Veillardfdc91562002-07-01 21:52:03 +00005626 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005627 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005628 break;
5629 }
5630 }
5631
5632 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005633 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005634 }
5635
5636}
5637
5638/**
5639 * xmlParseReference:
5640 * @ctxt: an XML parser context
5641 *
5642 * parse and handle entity references in content, depending on the SAX
5643 * interface, this may end-up in a call to character() if this is a
5644 * CharRef, a predefined entity, if there is no reference() callback.
5645 * or if the parser was asked to switch to that mode.
5646 *
5647 * [67] Reference ::= EntityRef | CharRef
5648 */
5649void
5650xmlParseReference(xmlParserCtxtPtr ctxt) {
5651 xmlEntityPtr ent;
5652 xmlChar *val;
5653 if (RAW != '&') return;
5654
5655 if (NXT(1) == '#') {
5656 int i = 0;
5657 xmlChar out[10];
5658 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005659 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005660
5661 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5662 /*
5663 * So we are using non-UTF-8 buffers
5664 * Check that the char fit on 8bits, if not
5665 * generate a CharRef.
5666 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005667 if (value <= 0xFF) {
5668 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005669 out[1] = 0;
5670 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5671 (!ctxt->disableSAX))
5672 ctxt->sax->characters(ctxt->userData, out, 1);
5673 } else {
5674 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005675 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005676 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005677 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005678 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5679 (!ctxt->disableSAX))
5680 ctxt->sax->reference(ctxt->userData, out);
5681 }
5682 } else {
5683 /*
5684 * Just encode the value in UTF-8
5685 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005686 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005687 out[i] = 0;
5688 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5689 (!ctxt->disableSAX))
5690 ctxt->sax->characters(ctxt->userData, out, i);
5691 }
5692 } else {
5693 ent = xmlParseEntityRef(ctxt);
5694 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005695 if (!ctxt->wellFormed)
5696 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005697 if ((ent->name != NULL) &&
5698 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5699 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005700 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005701
5702
5703 /*
5704 * The first reference to the entity trigger a parsing phase
5705 * where the ent->children is filled with the result from
5706 * the parsing.
5707 */
5708 if (ent->children == NULL) {
5709 xmlChar *value;
5710 value = ent->content;
5711
5712 /*
5713 * Check that this entity is well formed
5714 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005715 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005716 (value[1] == 0) && (value[0] == '<') &&
5717 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5718 /*
5719 * DONE: get definite answer on this !!!
5720 * Lots of entity decls are used to declare a single
5721 * char
5722 * <!ENTITY lt "<">
5723 * Which seems to be valid since
5724 * 2.4: The ampersand character (&) and the left angle
5725 * bracket (<) may appear in their literal form only
5726 * when used ... They are also legal within the literal
5727 * entity value of an internal entity declaration;i
5728 * see "4.3.2 Well-Formed Parsed Entities".
5729 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5730 * Looking at the OASIS test suite and James Clark
5731 * tests, this is broken. However the XML REC uses
5732 * it. Is the XML REC not well-formed ????
5733 * This is a hack to avoid this problem
5734 *
5735 * ANSWER: since lt gt amp .. are already defined,
5736 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005737 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005738 * is lousy but acceptable.
5739 */
5740 list = xmlNewDocText(ctxt->myDoc, value);
5741 if (list != NULL) {
5742 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5743 (ent->children == NULL)) {
5744 ent->children = list;
5745 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005746 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005747 list->parent = (xmlNodePtr) ent;
5748 } else {
5749 xmlFreeNodeList(list);
5750 }
5751 } else if (list != NULL) {
5752 xmlFreeNodeList(list);
5753 }
5754 } else {
5755 /*
5756 * 4.3.2: An internal general parsed entity is well-formed
5757 * if its replacement text matches the production labeled
5758 * content.
5759 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005760
5761 void *user_data;
5762 /*
5763 * This is a bit hackish but this seems the best
5764 * way to make sure both SAX and DOM entity support
5765 * behaves okay.
5766 */
5767 if (ctxt->userData == ctxt)
5768 user_data = NULL;
5769 else
5770 user_data = ctxt->userData;
5771
Owen Taylor3473f882001-02-23 17:55:21 +00005772 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5773 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005774 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5775 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005776 ctxt->depth--;
5777 } else if (ent->etype ==
5778 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5779 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005780 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005781 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005782 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005783 ctxt->depth--;
5784 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005785 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005786 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5787 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005788 }
5789 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005790 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005791 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005792 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005793 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5794 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005795 (ent->children == NULL)) {
5796 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005797 if (ctxt->replaceEntities) {
5798 /*
5799 * Prune it directly in the generated document
5800 * except for single text nodes.
5801 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005802 if (((list->type == XML_TEXT_NODE) &&
5803 (list->next == NULL)) ||
5804 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005805 list->parent = (xmlNodePtr) ent;
5806 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005807 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005808 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005809 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005810 while (list != NULL) {
5811 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005812 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005813 if (list->next == NULL)
5814 ent->last = list;
5815 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005816 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005817 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005818#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005819 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5820 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005821#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005822 }
5823 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005824 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005825 while (list != NULL) {
5826 list->parent = (xmlNodePtr) ent;
5827 if (list->next == NULL)
5828 ent->last = list;
5829 list = list->next;
5830 }
Owen Taylor3473f882001-02-23 17:55:21 +00005831 }
5832 } else {
5833 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005834 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005835 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005836 } else if ((ret != XML_ERR_OK) &&
5837 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005838 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005839 } else if (list != NULL) {
5840 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005841 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005842 }
5843 }
5844 }
5845 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5846 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5847 /*
5848 * Create a node.
5849 */
5850 ctxt->sax->reference(ctxt->userData, ent->name);
5851 return;
5852 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005853 /*
5854 * There is a problem on the handling of _private for entities
5855 * (bug 155816): Should we copy the content of the field from
5856 * the entity (possibly overwriting some value set by the user
5857 * when a copy is created), should we leave it alone, or should
5858 * we try to take care of different situations? The problem
5859 * is exacerbated by the usage of this field by the xmlReader.
5860 * To fix this bug, we look at _private on the created node
5861 * and, if it's NULL, we copy in whatever was in the entity.
5862 * If it's not NULL we leave it alone. This is somewhat of a
5863 * hack - maybe we should have further tests to determine
5864 * what to do.
5865 */
Owen Taylor3473f882001-02-23 17:55:21 +00005866 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5867 /*
5868 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005869 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005870 * In the first occurrence list contains the replacement.
5871 * progressive == 2 means we are operating on the Reader
5872 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005873 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005874 if (((list == NULL) && (ent->owner == 0)) ||
5875 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005876 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005877
5878 /*
5879 * when operating on a reader, the entities definitions
5880 * are always owning the entities subtree.
5881 if (ctxt->parseMode == XML_PARSE_READER)
5882 ent->owner = 1;
5883 */
5884
Daniel Veillard62f313b2001-07-04 19:49:14 +00005885 cur = ent->children;
5886 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005887 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005888 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005889 if (nw->_private == NULL)
5890 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005891 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005892 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005893 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005894 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005895 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005896 if (cur == ent->last) {
5897 /*
5898 * needed to detect some strange empty
5899 * node cases in the reader tests
5900 */
5901 if ((ctxt->parseMode == XML_PARSE_READER) &&
5902 (nw->type == XML_ELEMENT_NODE) &&
5903 (nw->children == NULL))
5904 nw->extra = 1;
5905
Daniel Veillard62f313b2001-07-04 19:49:14 +00005906 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005907 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005908 cur = cur->next;
5909 }
Daniel Veillard81273902003-09-30 00:43:48 +00005910#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005911 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005912 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005913#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005914 } else if (list == NULL) {
5915 xmlNodePtr nw = NULL, cur, next, last,
5916 firstChild = NULL;
5917 /*
5918 * Copy the entity child list and make it the new
5919 * entity child list. The goal is to make sure any
5920 * ID or REF referenced will be the one from the
5921 * document content and not the entity copy.
5922 */
5923 cur = ent->children;
5924 ent->children = NULL;
5925 last = ent->last;
5926 ent->last = NULL;
5927 while (cur != NULL) {
5928 next = cur->next;
5929 cur->next = NULL;
5930 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005931 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005932 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005933 if (nw->_private == NULL)
5934 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005935 if (firstChild == NULL){
5936 firstChild = cur;
5937 }
5938 xmlAddChild((xmlNodePtr) ent, nw);
5939 xmlAddChild(ctxt->node, cur);
5940 }
5941 if (cur == last)
5942 break;
5943 cur = next;
5944 }
5945 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005946#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005947 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5948 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005949#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005950 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005951 const xmlChar *nbktext;
5952
Daniel Veillard62f313b2001-07-04 19:49:14 +00005953 /*
5954 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005955 * node with a possible previous text one which
5956 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005957 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005958 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5959 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005960 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005961 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005962 if ((ent->last != ent->children) &&
5963 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005964 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005965 xmlAddChildList(ctxt->node, ent->children);
5966 }
5967
Owen Taylor3473f882001-02-23 17:55:21 +00005968 /*
5969 * This is to avoid a nasty side effect, see
5970 * characters() in SAX.c
5971 */
5972 ctxt->nodemem = 0;
5973 ctxt->nodelen = 0;
5974 return;
5975 } else {
5976 /*
5977 * Probably running in SAX mode
5978 */
5979 xmlParserInputPtr input;
5980
5981 input = xmlNewEntityInputStream(ctxt, ent);
5982 xmlPushInput(ctxt, input);
5983 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005984 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5985 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005986 xmlParseTextDecl(ctxt);
5987 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5988 /*
5989 * The XML REC instructs us to stop parsing right here
5990 */
5991 ctxt->instate = XML_PARSER_EOF;
5992 return;
5993 }
5994 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005995 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5996 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005997 }
5998 }
5999 return;
6000 }
6001 }
6002 } else {
6003 val = ent->content;
6004 if (val == NULL) return;
6005 /*
6006 * inline the entity.
6007 */
6008 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6009 (!ctxt->disableSAX))
6010 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6011 }
6012 }
6013}
6014
6015/**
6016 * xmlParseEntityRef:
6017 * @ctxt: an XML parser context
6018 *
6019 * parse ENTITY references declarations
6020 *
6021 * [68] EntityRef ::= '&' Name ';'
6022 *
6023 * [ WFC: Entity Declared ]
6024 * In a document without any DTD, a document with only an internal DTD
6025 * subset which contains no parameter entity references, or a document
6026 * with "standalone='yes'", the Name given in the entity reference
6027 * must match that in an entity declaration, except that well-formed
6028 * documents need not declare any of the following entities: amp, lt,
6029 * gt, apos, quot. The declaration of a parameter entity must precede
6030 * any reference to it. Similarly, the declaration of a general entity
6031 * must precede any reference to it which appears in a default value in an
6032 * attribute-list declaration. Note that if entities are declared in the
6033 * external subset or in external parameter entities, a non-validating
6034 * processor is not obligated to read and process their declarations;
6035 * for such documents, the rule that an entity must be declared is a
6036 * well-formedness constraint only if standalone='yes'.
6037 *
6038 * [ WFC: Parsed Entity ]
6039 * An entity reference must not contain the name of an unparsed entity
6040 *
6041 * Returns the xmlEntityPtr if found, or NULL otherwise.
6042 */
6043xmlEntityPtr
6044xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006045 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006046 xmlEntityPtr ent = NULL;
6047
6048 GROW;
6049
6050 if (RAW == '&') {
6051 NEXT;
6052 name = xmlParseName(ctxt);
6053 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006054 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6055 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006056 } else {
6057 if (RAW == ';') {
6058 NEXT;
6059 /*
6060 * Ask first SAX for entity resolution, otherwise try the
6061 * predefined set.
6062 */
6063 if (ctxt->sax != NULL) {
6064 if (ctxt->sax->getEntity != NULL)
6065 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006066 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006067 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006068 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6069 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006070 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006071 }
Owen Taylor3473f882001-02-23 17:55:21 +00006072 }
6073 /*
6074 * [ WFC: Entity Declared ]
6075 * In a document without any DTD, a document with only an
6076 * internal DTD subset which contains no parameter entity
6077 * references, or a document with "standalone='yes'", the
6078 * Name given in the entity reference must match that in an
6079 * entity declaration, except that well-formed documents
6080 * need not declare any of the following entities: amp, lt,
6081 * gt, apos, quot.
6082 * The declaration of a parameter entity must precede any
6083 * reference to it.
6084 * Similarly, the declaration of a general entity must
6085 * precede any reference to it which appears in a default
6086 * value in an attribute-list declaration. Note that if
6087 * entities are declared in the external subset or in
6088 * external parameter entities, a non-validating processor
6089 * is not obligated to read and process their declarations;
6090 * for such documents, the rule that an entity must be
6091 * declared is a well-formedness constraint only if
6092 * standalone='yes'.
6093 */
6094 if (ent == NULL) {
6095 if ((ctxt->standalone == 1) ||
6096 ((ctxt->hasExternalSubset == 0) &&
6097 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006098 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006099 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006100 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006101 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006102 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006103 if ((ctxt->inSubset == 0) &&
6104 (ctxt->sax != NULL) &&
6105 (ctxt->sax->reference != NULL)) {
6106 ctxt->sax->reference(ctxt, name);
6107 }
Owen Taylor3473f882001-02-23 17:55:21 +00006108 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006109 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006110 }
6111
6112 /*
6113 * [ WFC: Parsed Entity ]
6114 * An entity reference must not contain the name of an
6115 * unparsed entity
6116 */
6117 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006118 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006119 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006120 }
6121
6122 /*
6123 * [ WFC: No External Entity References ]
6124 * Attribute values cannot contain direct or indirect
6125 * entity references to external entities.
6126 */
6127 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6128 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006129 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6130 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006131 }
6132 /*
6133 * [ WFC: No < in Attribute Values ]
6134 * The replacement text of any entity referred to directly or
6135 * indirectly in an attribute value (other than "&lt;") must
6136 * not contain a <.
6137 */
6138 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6139 (ent != NULL) &&
6140 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6141 (ent->content != NULL) &&
6142 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006143 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006144 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006145 }
6146
6147 /*
6148 * Internal check, no parameter entities here ...
6149 */
6150 else {
6151 switch (ent->etype) {
6152 case XML_INTERNAL_PARAMETER_ENTITY:
6153 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006154 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6155 "Attempt to reference the parameter entity '%s'\n",
6156 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006157 break;
6158 default:
6159 break;
6160 }
6161 }
6162
6163 /*
6164 * [ WFC: No Recursion ]
6165 * A parsed entity must not contain a recursive reference
6166 * to itself, either directly or indirectly.
6167 * Done somewhere else
6168 */
6169
6170 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006171 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 }
Owen Taylor3473f882001-02-23 17:55:21 +00006173 }
6174 }
6175 return(ent);
6176}
6177
6178/**
6179 * xmlParseStringEntityRef:
6180 * @ctxt: an XML parser context
6181 * @str: a pointer to an index in the string
6182 *
6183 * parse ENTITY references declarations, but this version parses it from
6184 * a string value.
6185 *
6186 * [68] EntityRef ::= '&' Name ';'
6187 *
6188 * [ WFC: Entity Declared ]
6189 * In a document without any DTD, a document with only an internal DTD
6190 * subset which contains no parameter entity references, or a document
6191 * with "standalone='yes'", the Name given in the entity reference
6192 * must match that in an entity declaration, except that well-formed
6193 * documents need not declare any of the following entities: amp, lt,
6194 * gt, apos, quot. The declaration of a parameter entity must precede
6195 * any reference to it. Similarly, the declaration of a general entity
6196 * must precede any reference to it which appears in a default value in an
6197 * attribute-list declaration. Note that if entities are declared in the
6198 * external subset or in external parameter entities, a non-validating
6199 * processor is not obligated to read and process their declarations;
6200 * for such documents, the rule that an entity must be declared is a
6201 * well-formedness constraint only if standalone='yes'.
6202 *
6203 * [ WFC: Parsed Entity ]
6204 * An entity reference must not contain the name of an unparsed entity
6205 *
6206 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6207 * is updated to the current location in the string.
6208 */
6209xmlEntityPtr
6210xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6211 xmlChar *name;
6212 const xmlChar *ptr;
6213 xmlChar cur;
6214 xmlEntityPtr ent = NULL;
6215
6216 if ((str == NULL) || (*str == NULL))
6217 return(NULL);
6218 ptr = *str;
6219 cur = *ptr;
6220 if (cur == '&') {
6221 ptr++;
6222 cur = *ptr;
6223 name = xmlParseStringName(ctxt, &ptr);
6224 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006225 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6226 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006227 } else {
6228 if (*ptr == ';') {
6229 ptr++;
6230 /*
6231 * Ask first SAX for entity resolution, otherwise try the
6232 * predefined set.
6233 */
6234 if (ctxt->sax != NULL) {
6235 if (ctxt->sax->getEntity != NULL)
6236 ent = ctxt->sax->getEntity(ctxt->userData, name);
6237 if (ent == NULL)
6238 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006239 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006240 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006241 }
Owen Taylor3473f882001-02-23 17:55:21 +00006242 }
6243 /*
6244 * [ WFC: Entity Declared ]
6245 * In a document without any DTD, a document with only an
6246 * internal DTD subset which contains no parameter entity
6247 * references, or a document with "standalone='yes'", the
6248 * Name given in the entity reference must match that in an
6249 * entity declaration, except that well-formed documents
6250 * need not declare any of the following entities: amp, lt,
6251 * gt, apos, quot.
6252 * The declaration of a parameter entity must precede any
6253 * reference to it.
6254 * Similarly, the declaration of a general entity must
6255 * precede any reference to it which appears in a default
6256 * value in an attribute-list declaration. Note that if
6257 * entities are declared in the external subset or in
6258 * external parameter entities, a non-validating processor
6259 * is not obligated to read and process their declarations;
6260 * for such documents, the rule that an entity must be
6261 * declared is a well-formedness constraint only if
6262 * standalone='yes'.
6263 */
6264 if (ent == NULL) {
6265 if ((ctxt->standalone == 1) ||
6266 ((ctxt->hasExternalSubset == 0) &&
6267 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006268 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006269 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006270 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006271 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006272 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006273 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006274 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006275 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006276 }
6277
6278 /*
6279 * [ WFC: Parsed Entity ]
6280 * An entity reference must not contain the name of an
6281 * unparsed entity
6282 */
6283 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006284 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006285 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006286 }
6287
6288 /*
6289 * [ WFC: No External Entity References ]
6290 * Attribute values cannot contain direct or indirect
6291 * entity references to external entities.
6292 */
6293 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6294 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006295 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006296 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006297 }
6298 /*
6299 * [ WFC: No < in Attribute Values ]
6300 * The replacement text of any entity referred to directly or
6301 * indirectly in an attribute value (other than "&lt;") must
6302 * not contain a <.
6303 */
6304 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6305 (ent != NULL) &&
6306 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6307 (ent->content != NULL) &&
6308 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006309 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6310 "'<' in entity '%s' is not allowed in attributes values\n",
6311 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006312 }
6313
6314 /*
6315 * Internal check, no parameter entities here ...
6316 */
6317 else {
6318 switch (ent->etype) {
6319 case XML_INTERNAL_PARAMETER_ENTITY:
6320 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006321 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6322 "Attempt to reference the parameter entity '%s'\n",
6323 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006324 break;
6325 default:
6326 break;
6327 }
6328 }
6329
6330 /*
6331 * [ WFC: No Recursion ]
6332 * A parsed entity must not contain a recursive reference
6333 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006334 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006335 */
6336
6337 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006338 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006339 }
6340 xmlFree(name);
6341 }
6342 }
6343 *str = ptr;
6344 return(ent);
6345}
6346
6347/**
6348 * xmlParsePEReference:
6349 * @ctxt: an XML parser context
6350 *
6351 * parse PEReference declarations
6352 * The entity content is handled directly by pushing it's content as
6353 * a new input stream.
6354 *
6355 * [69] PEReference ::= '%' Name ';'
6356 *
6357 * [ WFC: No Recursion ]
6358 * A parsed entity must not contain a recursive
6359 * reference to itself, either directly or indirectly.
6360 *
6361 * [ WFC: Entity Declared ]
6362 * In a document without any DTD, a document with only an internal DTD
6363 * subset which contains no parameter entity references, or a document
6364 * with "standalone='yes'", ... ... The declaration of a parameter
6365 * entity must precede any reference to it...
6366 *
6367 * [ VC: Entity Declared ]
6368 * In a document with an external subset or external parameter entities
6369 * with "standalone='no'", ... ... The declaration of a parameter entity
6370 * must precede any reference to it...
6371 *
6372 * [ WFC: In DTD ]
6373 * Parameter-entity references may only appear in the DTD.
6374 * NOTE: misleading but this is handled.
6375 */
6376void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006377xmlParsePEReference(xmlParserCtxtPtr ctxt)
6378{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006379 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006380 xmlEntityPtr entity = NULL;
6381 xmlParserInputPtr input;
6382
6383 if (RAW == '%') {
6384 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006385 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006386 if (name == NULL) {
6387 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6388 "xmlParsePEReference: no name\n");
6389 } else {
6390 if (RAW == ';') {
6391 NEXT;
6392 if ((ctxt->sax != NULL) &&
6393 (ctxt->sax->getParameterEntity != NULL))
6394 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6395 name);
6396 if (entity == NULL) {
6397 /*
6398 * [ WFC: Entity Declared ]
6399 * In a document without any DTD, a document with only an
6400 * internal DTD subset which contains no parameter entity
6401 * references, or a document with "standalone='yes'", ...
6402 * ... The declaration of a parameter entity must precede
6403 * any reference to it...
6404 */
6405 if ((ctxt->standalone == 1) ||
6406 ((ctxt->hasExternalSubset == 0) &&
6407 (ctxt->hasPErefs == 0))) {
6408 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6409 "PEReference: %%%s; not found\n",
6410 name);
6411 } else {
6412 /*
6413 * [ VC: Entity Declared ]
6414 * In a document with an external subset or external
6415 * parameter entities with "standalone='no'", ...
6416 * ... The declaration of a parameter entity must
6417 * precede any reference to it...
6418 */
6419 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6420 "PEReference: %%%s; not found\n",
6421 name, NULL);
6422 ctxt->valid = 0;
6423 }
6424 } else {
6425 /*
6426 * Internal checking in case the entity quest barfed
6427 */
6428 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6429 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6430 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6431 "Internal: %%%s; is not a parameter entity\n",
6432 name, NULL);
6433 } else if (ctxt->input->free != deallocblankswrapper) {
6434 input =
6435 xmlNewBlanksWrapperInputStream(ctxt, entity);
6436 xmlPushInput(ctxt, input);
6437 } else {
6438 /*
6439 * TODO !!!
6440 * handle the extra spaces added before and after
6441 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6442 */
6443 input = xmlNewEntityInputStream(ctxt, entity);
6444 xmlPushInput(ctxt, input);
6445 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006446 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006447 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006448 xmlParseTextDecl(ctxt);
6449 if (ctxt->errNo ==
6450 XML_ERR_UNSUPPORTED_ENCODING) {
6451 /*
6452 * The XML REC instructs us to stop parsing
6453 * right here
6454 */
6455 ctxt->instate = XML_PARSER_EOF;
6456 return;
6457 }
6458 }
6459 }
6460 }
6461 ctxt->hasPErefs = 1;
6462 } else {
6463 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6464 }
6465 }
Owen Taylor3473f882001-02-23 17:55:21 +00006466 }
6467}
6468
6469/**
6470 * xmlParseStringPEReference:
6471 * @ctxt: an XML parser context
6472 * @str: a pointer to an index in the string
6473 *
6474 * parse PEReference declarations
6475 *
6476 * [69] PEReference ::= '%' Name ';'
6477 *
6478 * [ WFC: No Recursion ]
6479 * A parsed entity must not contain a recursive
6480 * reference to itself, either directly or indirectly.
6481 *
6482 * [ WFC: Entity Declared ]
6483 * In a document without any DTD, a document with only an internal DTD
6484 * subset which contains no parameter entity references, or a document
6485 * with "standalone='yes'", ... ... The declaration of a parameter
6486 * entity must precede any reference to it...
6487 *
6488 * [ VC: Entity Declared ]
6489 * In a document with an external subset or external parameter entities
6490 * with "standalone='no'", ... ... The declaration of a parameter entity
6491 * must precede any reference to it...
6492 *
6493 * [ WFC: In DTD ]
6494 * Parameter-entity references may only appear in the DTD.
6495 * NOTE: misleading but this is handled.
6496 *
6497 * Returns the string of the entity content.
6498 * str is updated to the current value of the index
6499 */
6500xmlEntityPtr
6501xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6502 const xmlChar *ptr;
6503 xmlChar cur;
6504 xmlChar *name;
6505 xmlEntityPtr entity = NULL;
6506
6507 if ((str == NULL) || (*str == NULL)) return(NULL);
6508 ptr = *str;
6509 cur = *ptr;
6510 if (cur == '%') {
6511 ptr++;
6512 cur = *ptr;
6513 name = xmlParseStringName(ctxt, &ptr);
6514 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006515 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6516 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006517 } else {
6518 cur = *ptr;
6519 if (cur == ';') {
6520 ptr++;
6521 cur = *ptr;
6522 if ((ctxt->sax != NULL) &&
6523 (ctxt->sax->getParameterEntity != NULL))
6524 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6525 name);
6526 if (entity == NULL) {
6527 /*
6528 * [ WFC: Entity Declared ]
6529 * In a document without any DTD, a document with only an
6530 * internal DTD subset which contains no parameter entity
6531 * references, or a document with "standalone='yes'", ...
6532 * ... The declaration of a parameter entity must precede
6533 * any reference to it...
6534 */
6535 if ((ctxt->standalone == 1) ||
6536 ((ctxt->hasExternalSubset == 0) &&
6537 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006538 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006539 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006540 } else {
6541 /*
6542 * [ VC: Entity Declared ]
6543 * In a document with an external subset or external
6544 * parameter entities with "standalone='no'", ...
6545 * ... The declaration of a parameter entity must
6546 * precede any reference to it...
6547 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006548 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6549 "PEReference: %%%s; not found\n",
6550 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006551 ctxt->valid = 0;
6552 }
6553 } else {
6554 /*
6555 * Internal checking in case the entity quest barfed
6556 */
6557 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6558 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006559 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6560 "%%%s; is not a parameter entity\n",
6561 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006562 }
6563 }
6564 ctxt->hasPErefs = 1;
6565 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006566 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006567 }
6568 xmlFree(name);
6569 }
6570 }
6571 *str = ptr;
6572 return(entity);
6573}
6574
6575/**
6576 * xmlParseDocTypeDecl:
6577 * @ctxt: an XML parser context
6578 *
6579 * parse a DOCTYPE declaration
6580 *
6581 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6582 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6583 *
6584 * [ VC: Root Element Type ]
6585 * The Name in the document type declaration must match the element
6586 * type of the root element.
6587 */
6588
6589void
6590xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006591 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006592 xmlChar *ExternalID = NULL;
6593 xmlChar *URI = NULL;
6594
6595 /*
6596 * We know that '<!DOCTYPE' has been detected.
6597 */
6598 SKIP(9);
6599
6600 SKIP_BLANKS;
6601
6602 /*
6603 * Parse the DOCTYPE name.
6604 */
6605 name = xmlParseName(ctxt);
6606 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006607 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6608 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006609 }
6610 ctxt->intSubName = name;
6611
6612 SKIP_BLANKS;
6613
6614 /*
6615 * Check for SystemID and ExternalID
6616 */
6617 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6618
6619 if ((URI != NULL) || (ExternalID != NULL)) {
6620 ctxt->hasExternalSubset = 1;
6621 }
6622 ctxt->extSubURI = URI;
6623 ctxt->extSubSystem = ExternalID;
6624
6625 SKIP_BLANKS;
6626
6627 /*
6628 * Create and update the internal subset.
6629 */
6630 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6631 (!ctxt->disableSAX))
6632 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6633
6634 /*
6635 * Is there any internal subset declarations ?
6636 * they are handled separately in xmlParseInternalSubset()
6637 */
6638 if (RAW == '[')
6639 return;
6640
6641 /*
6642 * We should be at the end of the DOCTYPE declaration.
6643 */
6644 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006645 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006646 }
6647 NEXT;
6648}
6649
6650/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006651 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006652 * @ctxt: an XML parser context
6653 *
6654 * parse the internal subset declaration
6655 *
6656 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6657 */
6658
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006659static void
Owen Taylor3473f882001-02-23 17:55:21 +00006660xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6661 /*
6662 * Is there any DTD definition ?
6663 */
6664 if (RAW == '[') {
6665 ctxt->instate = XML_PARSER_DTD;
6666 NEXT;
6667 /*
6668 * Parse the succession of Markup declarations and
6669 * PEReferences.
6670 * Subsequence (markupdecl | PEReference | S)*
6671 */
6672 while (RAW != ']') {
6673 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006674 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006675
6676 SKIP_BLANKS;
6677 xmlParseMarkupDecl(ctxt);
6678 xmlParsePEReference(ctxt);
6679
6680 /*
6681 * Pop-up of finished entities.
6682 */
6683 while ((RAW == 0) && (ctxt->inputNr > 1))
6684 xmlPopInput(ctxt);
6685
6686 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006687 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006688 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006689 break;
6690 }
6691 }
6692 if (RAW == ']') {
6693 NEXT;
6694 SKIP_BLANKS;
6695 }
6696 }
6697
6698 /*
6699 * We should be at the end of the DOCTYPE declaration.
6700 */
6701 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006702 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006703 }
6704 NEXT;
6705}
6706
Daniel Veillard81273902003-09-30 00:43:48 +00006707#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006708/**
6709 * xmlParseAttribute:
6710 * @ctxt: an XML parser context
6711 * @value: a xmlChar ** used to store the value of the attribute
6712 *
6713 * parse an attribute
6714 *
6715 * [41] Attribute ::= Name Eq AttValue
6716 *
6717 * [ WFC: No External Entity References ]
6718 * Attribute values cannot contain direct or indirect entity references
6719 * to external entities.
6720 *
6721 * [ WFC: No < in Attribute Values ]
6722 * The replacement text of any entity referred to directly or indirectly in
6723 * an attribute value (other than "&lt;") must not contain a <.
6724 *
6725 * [ VC: Attribute Value Type ]
6726 * The attribute must have been declared; the value must be of the type
6727 * declared for it.
6728 *
6729 * [25] Eq ::= S? '=' S?
6730 *
6731 * With namespace:
6732 *
6733 * [NS 11] Attribute ::= QName Eq AttValue
6734 *
6735 * Also the case QName == xmlns:??? is handled independently as a namespace
6736 * definition.
6737 *
6738 * Returns the attribute name, and the value in *value.
6739 */
6740
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006741const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006742xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006743 const xmlChar *name;
6744 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006745
6746 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006747 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006748 name = xmlParseName(ctxt);
6749 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006750 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006751 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006752 return(NULL);
6753 }
6754
6755 /*
6756 * read the value
6757 */
6758 SKIP_BLANKS;
6759 if (RAW == '=') {
6760 NEXT;
6761 SKIP_BLANKS;
6762 val = xmlParseAttValue(ctxt);
6763 ctxt->instate = XML_PARSER_CONTENT;
6764 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006765 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006766 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006767 return(NULL);
6768 }
6769
6770 /*
6771 * Check that xml:lang conforms to the specification
6772 * No more registered as an error, just generate a warning now
6773 * since this was deprecated in XML second edition
6774 */
6775 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6776 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006777 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6778 "Malformed value for xml:lang : %s\n",
6779 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006780 }
6781 }
6782
6783 /*
6784 * Check that xml:space conforms to the specification
6785 */
6786 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6787 if (xmlStrEqual(val, BAD_CAST "default"))
6788 *(ctxt->space) = 0;
6789 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6790 *(ctxt->space) = 1;
6791 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00006792 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006793"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00006794 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006795 }
6796 }
6797
6798 *value = val;
6799 return(name);
6800}
6801
6802/**
6803 * xmlParseStartTag:
6804 * @ctxt: an XML parser context
6805 *
6806 * parse a start of tag either for rule element or
6807 * EmptyElement. In both case we don't parse the tag closing chars.
6808 *
6809 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6810 *
6811 * [ WFC: Unique Att Spec ]
6812 * No attribute name may appear more than once in the same start-tag or
6813 * empty-element tag.
6814 *
6815 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6816 *
6817 * [ WFC: Unique Att Spec ]
6818 * No attribute name may appear more than once in the same start-tag or
6819 * empty-element tag.
6820 *
6821 * With namespace:
6822 *
6823 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6824 *
6825 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6826 *
6827 * Returns the element name parsed
6828 */
6829
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006830const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006831xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006832 const xmlChar *name;
6833 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006834 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006835 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006836 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006837 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006838 int i;
6839
6840 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006841 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006842
6843 name = xmlParseName(ctxt);
6844 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006845 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006846 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006847 return(NULL);
6848 }
6849
6850 /*
6851 * Now parse the attributes, it ends up with the ending
6852 *
6853 * (S Attribute)* S?
6854 */
6855 SKIP_BLANKS;
6856 GROW;
6857
Daniel Veillard21a0f912001-02-25 19:54:14 +00006858 while ((RAW != '>') &&
6859 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006860 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006861 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006862 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006863
6864 attname = xmlParseAttribute(ctxt, &attvalue);
6865 if ((attname != NULL) && (attvalue != NULL)) {
6866 /*
6867 * [ WFC: Unique Att Spec ]
6868 * No attribute name may appear more than once in the same
6869 * start-tag or empty-element tag.
6870 */
6871 for (i = 0; i < nbatts;i += 2) {
6872 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006873 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006874 xmlFree(attvalue);
6875 goto failed;
6876 }
6877 }
Owen Taylor3473f882001-02-23 17:55:21 +00006878 /*
6879 * Add the pair to atts
6880 */
6881 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006882 maxatts = 22; /* allow for 10 attrs by default */
6883 atts = (const xmlChar **)
6884 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006885 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006886 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006887 if (attvalue != NULL)
6888 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006889 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006890 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006891 ctxt->atts = atts;
6892 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006893 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006894 const xmlChar **n;
6895
Owen Taylor3473f882001-02-23 17:55:21 +00006896 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006897 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006898 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006899 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006900 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006901 if (attvalue != NULL)
6902 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006903 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006904 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006905 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006906 ctxt->atts = atts;
6907 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006908 }
6909 atts[nbatts++] = attname;
6910 atts[nbatts++] = attvalue;
6911 atts[nbatts] = NULL;
6912 atts[nbatts + 1] = NULL;
6913 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006914 if (attvalue != NULL)
6915 xmlFree(attvalue);
6916 }
6917
6918failed:
6919
Daniel Veillard3772de32002-12-17 10:31:45 +00006920 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006921 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6922 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006923 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006924 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6925 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006926 }
6927 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006928 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6929 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006930 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6931 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006932 break;
6933 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006934 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006935 GROW;
6936 }
6937
6938 /*
6939 * SAX: Start of Element !
6940 */
6941 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006942 (!ctxt->disableSAX)) {
6943 if (nbatts > 0)
6944 ctxt->sax->startElement(ctxt->userData, name, atts);
6945 else
6946 ctxt->sax->startElement(ctxt->userData, name, NULL);
6947 }
Owen Taylor3473f882001-02-23 17:55:21 +00006948
6949 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006950 /* Free only the content strings */
6951 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006952 if (atts[i] != NULL)
6953 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006954 }
6955 return(name);
6956}
6957
6958/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006959 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006960 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006961 * @line: line of the start tag
6962 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006963 *
6964 * parse an end of tag
6965 *
6966 * [42] ETag ::= '</' Name S? '>'
6967 *
6968 * With namespace
6969 *
6970 * [NS 9] ETag ::= '</' QName S? '>'
6971 */
6972
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006973static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006974xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006975 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006976
6977 GROW;
6978 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006979 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006980 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006981 return;
6982 }
6983 SKIP(2);
6984
Daniel Veillard46de64e2002-05-29 08:21:33 +00006985 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006986
6987 /*
6988 * We should definitely be at the ending "S? '>'" part
6989 */
6990 GROW;
6991 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006992 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006993 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006994 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006995 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006996
6997 /*
6998 * [ WFC: Element Type Match ]
6999 * The Name in an element's end-tag must match the element type in the
7000 * start-tag.
7001 *
7002 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007003 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007004 if (name == NULL) name = BAD_CAST "unparseable";
7005 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007006 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007007 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007008 }
7009
7010 /*
7011 * SAX: End of Tag
7012 */
7013 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7014 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007015 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007016
Daniel Veillarde57ec792003-09-10 10:50:59 +00007017 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007018 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007019 return;
7020}
7021
7022/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007023 * xmlParseEndTag:
7024 * @ctxt: an XML parser context
7025 *
7026 * parse an end of tag
7027 *
7028 * [42] ETag ::= '</' Name S? '>'
7029 *
7030 * With namespace
7031 *
7032 * [NS 9] ETag ::= '</' QName S? '>'
7033 */
7034
7035void
7036xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007037 xmlParseEndTag1(ctxt, 0);
7038}
Daniel Veillard81273902003-09-30 00:43:48 +00007039#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007040
7041/************************************************************************
7042 * *
7043 * SAX 2 specific operations *
7044 * *
7045 ************************************************************************/
7046
7047static const xmlChar *
7048xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7049 int len = 0, l;
7050 int c;
7051 int count = 0;
7052
7053 /*
7054 * Handler for more complex cases
7055 */
7056 GROW;
7057 c = CUR_CHAR(l);
7058 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007059 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007060 return(NULL);
7061 }
7062
7063 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007064 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007065 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007066 (IS_COMBINING(c)) ||
7067 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007068 if (count++ > 100) {
7069 count = 0;
7070 GROW;
7071 }
7072 len += l;
7073 NEXTL(l);
7074 c = CUR_CHAR(l);
7075 }
7076 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7077}
7078
7079/*
7080 * xmlGetNamespace:
7081 * @ctxt: an XML parser context
7082 * @prefix: the prefix to lookup
7083 *
7084 * Lookup the namespace name for the @prefix (which ca be NULL)
7085 * The prefix must come from the @ctxt->dict dictionnary
7086 *
7087 * Returns the namespace name or NULL if not bound
7088 */
7089static const xmlChar *
7090xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7091 int i;
7092
Daniel Veillarde57ec792003-09-10 10:50:59 +00007093 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007094 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007095 if (ctxt->nsTab[i] == prefix) {
7096 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7097 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007098 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007099 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007100 return(NULL);
7101}
7102
7103/**
7104 * xmlParseNCName:
7105 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007106 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007107 *
7108 * parse an XML name.
7109 *
7110 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7111 * CombiningChar | Extender
7112 *
7113 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7114 *
7115 * Returns the Name parsed or NULL
7116 */
7117
7118static const xmlChar *
7119xmlParseNCName(xmlParserCtxtPtr ctxt) {
7120 const xmlChar *in;
7121 const xmlChar *ret;
7122 int count = 0;
7123
7124 /*
7125 * Accelerator for simple ASCII names
7126 */
7127 in = ctxt->input->cur;
7128 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7129 ((*in >= 0x41) && (*in <= 0x5A)) ||
7130 (*in == '_')) {
7131 in++;
7132 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7133 ((*in >= 0x41) && (*in <= 0x5A)) ||
7134 ((*in >= 0x30) && (*in <= 0x39)) ||
7135 (*in == '_') || (*in == '-') ||
7136 (*in == '.'))
7137 in++;
7138 if ((*in > 0) && (*in < 0x80)) {
7139 count = in - ctxt->input->cur;
7140 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7141 ctxt->input->cur = in;
7142 ctxt->nbChars += count;
7143 ctxt->input->col += count;
7144 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007145 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007146 }
7147 return(ret);
7148 }
7149 }
7150 return(xmlParseNCNameComplex(ctxt));
7151}
7152
7153/**
7154 * xmlParseQName:
7155 * @ctxt: an XML parser context
7156 * @prefix: pointer to store the prefix part
7157 *
7158 * parse an XML Namespace QName
7159 *
7160 * [6] QName ::= (Prefix ':')? LocalPart
7161 * [7] Prefix ::= NCName
7162 * [8] LocalPart ::= NCName
7163 *
7164 * Returns the Name parsed or NULL
7165 */
7166
7167static const xmlChar *
7168xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7169 const xmlChar *l, *p;
7170
7171 GROW;
7172
7173 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007174 if (l == NULL) {
7175 if (CUR == ':') {
7176 l = xmlParseName(ctxt);
7177 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007178 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7179 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007180 *prefix = NULL;
7181 return(l);
7182 }
7183 }
7184 return(NULL);
7185 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007186 if (CUR == ':') {
7187 NEXT;
7188 p = l;
7189 l = xmlParseNCName(ctxt);
7190 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007191 xmlChar *tmp;
7192
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007193 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7194 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007195 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7196 p = xmlDictLookup(ctxt->dict, tmp, -1);
7197 if (tmp != NULL) xmlFree(tmp);
7198 *prefix = NULL;
7199 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007200 }
7201 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007202 xmlChar *tmp;
7203
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007204 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7205 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007206 NEXT;
7207 tmp = (xmlChar *) xmlParseName(ctxt);
7208 if (tmp != NULL) {
7209 tmp = xmlBuildQName(tmp, l, NULL, 0);
7210 l = xmlDictLookup(ctxt->dict, tmp, -1);
7211 if (tmp != NULL) xmlFree(tmp);
7212 *prefix = p;
7213 return(l);
7214 }
7215 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7216 l = xmlDictLookup(ctxt->dict, tmp, -1);
7217 if (tmp != NULL) xmlFree(tmp);
7218 *prefix = p;
7219 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007220 }
7221 *prefix = p;
7222 } else
7223 *prefix = NULL;
7224 return(l);
7225}
7226
7227/**
7228 * xmlParseQNameAndCompare:
7229 * @ctxt: an XML parser context
7230 * @name: the localname
7231 * @prefix: the prefix, if any.
7232 *
7233 * parse an XML name and compares for match
7234 * (specialized for endtag parsing)
7235 *
7236 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7237 * and the name for mismatch
7238 */
7239
7240static const xmlChar *
7241xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7242 xmlChar const *prefix) {
7243 const xmlChar *cmp = name;
7244 const xmlChar *in;
7245 const xmlChar *ret;
7246 const xmlChar *prefix2;
7247
7248 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7249
7250 GROW;
7251 in = ctxt->input->cur;
7252
7253 cmp = prefix;
7254 while (*in != 0 && *in == *cmp) {
7255 ++in;
7256 ++cmp;
7257 }
7258 if ((*cmp == 0) && (*in == ':')) {
7259 in++;
7260 cmp = name;
7261 while (*in != 0 && *in == *cmp) {
7262 ++in;
7263 ++cmp;
7264 }
William M. Brack76e95df2003-10-18 16:20:14 +00007265 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007266 /* success */
7267 ctxt->input->cur = in;
7268 return((const xmlChar*) 1);
7269 }
7270 }
7271 /*
7272 * all strings coms from the dictionary, equality can be done directly
7273 */
7274 ret = xmlParseQName (ctxt, &prefix2);
7275 if ((ret == name) && (prefix == prefix2))
7276 return((const xmlChar*) 1);
7277 return ret;
7278}
7279
7280/**
7281 * xmlParseAttValueInternal:
7282 * @ctxt: an XML parser context
7283 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007284 * @alloc: whether the attribute was reallocated as a new string
7285 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007286 *
7287 * parse a value for an attribute.
7288 * NOTE: if no normalization is needed, the routine will return pointers
7289 * directly from the data buffer.
7290 *
7291 * 3.3.3 Attribute-Value Normalization:
7292 * Before the value of an attribute is passed to the application or
7293 * checked for validity, the XML processor must normalize it as follows:
7294 * - a character reference is processed by appending the referenced
7295 * character to the attribute value
7296 * - an entity reference is processed by recursively processing the
7297 * replacement text of the entity
7298 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7299 * appending #x20 to the normalized value, except that only a single
7300 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7301 * parsed entity or the literal entity value of an internal parsed entity
7302 * - other characters are processed by appending them to the normalized value
7303 * If the declared value is not CDATA, then the XML processor must further
7304 * process the normalized attribute value by discarding any leading and
7305 * trailing space (#x20) characters, and by replacing sequences of space
7306 * (#x20) characters by a single space (#x20) character.
7307 * All attributes for which no declaration has been read should be treated
7308 * by a non-validating parser as if declared CDATA.
7309 *
7310 * Returns the AttValue parsed or NULL. The value has to be freed by the
7311 * caller if it was copied, this can be detected by val[*len] == 0.
7312 */
7313
7314static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007315xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7316 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007317{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007318 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007319 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007320 xmlChar *ret = NULL;
7321
7322 GROW;
7323 in = (xmlChar *) CUR_PTR;
7324 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007325 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007326 return (NULL);
7327 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007328 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007329
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007330 /*
7331 * try to handle in this routine the most common case where no
7332 * allocation of a new string is required and where content is
7333 * pure ASCII.
7334 */
7335 limit = *in++;
7336 end = ctxt->input->end;
7337 start = in;
7338 if (in >= end) {
7339 const xmlChar *oldbase = ctxt->input->base;
7340 GROW;
7341 if (oldbase != ctxt->input->base) {
7342 long delta = ctxt->input->base - oldbase;
7343 start = start + delta;
7344 in = in + delta;
7345 }
7346 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007347 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007348 if (normalize) {
7349 /*
7350 * Skip any leading spaces
7351 */
7352 while ((in < end) && (*in != limit) &&
7353 ((*in == 0x20) || (*in == 0x9) ||
7354 (*in == 0xA) || (*in == 0xD))) {
7355 in++;
7356 start = in;
7357 if (in >= end) {
7358 const xmlChar *oldbase = ctxt->input->base;
7359 GROW;
7360 if (oldbase != ctxt->input->base) {
7361 long delta = ctxt->input->base - oldbase;
7362 start = start + delta;
7363 in = in + delta;
7364 }
7365 end = ctxt->input->end;
7366 }
7367 }
7368 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7369 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7370 if ((*in++ == 0x20) && (*in == 0x20)) break;
7371 if (in >= end) {
7372 const xmlChar *oldbase = ctxt->input->base;
7373 GROW;
7374 if (oldbase != ctxt->input->base) {
7375 long delta = ctxt->input->base - oldbase;
7376 start = start + delta;
7377 in = in + delta;
7378 }
7379 end = ctxt->input->end;
7380 }
7381 }
7382 last = in;
7383 /*
7384 * skip the trailing blanks
7385 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007386 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007387 while ((in < end) && (*in != limit) &&
7388 ((*in == 0x20) || (*in == 0x9) ||
7389 (*in == 0xA) || (*in == 0xD))) {
7390 in++;
7391 if (in >= end) {
7392 const xmlChar *oldbase = ctxt->input->base;
7393 GROW;
7394 if (oldbase != ctxt->input->base) {
7395 long delta = ctxt->input->base - oldbase;
7396 start = start + delta;
7397 in = in + delta;
7398 last = last + delta;
7399 }
7400 end = ctxt->input->end;
7401 }
7402 }
7403 if (*in != limit) goto need_complex;
7404 } else {
7405 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7406 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7407 in++;
7408 if (in >= end) {
7409 const xmlChar *oldbase = ctxt->input->base;
7410 GROW;
7411 if (oldbase != ctxt->input->base) {
7412 long delta = ctxt->input->base - oldbase;
7413 start = start + delta;
7414 in = in + delta;
7415 }
7416 end = ctxt->input->end;
7417 }
7418 }
7419 last = in;
7420 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007421 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007422 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007423 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007424 *len = last - start;
7425 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007426 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007427 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007428 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007429 }
7430 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007431 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007432 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007433need_complex:
7434 if (alloc) *alloc = 1;
7435 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007436}
7437
7438/**
7439 * xmlParseAttribute2:
7440 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007441 * @pref: the element prefix
7442 * @elem: the element name
7443 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007444 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007445 * @len: an int * to save the length of the attribute
7446 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007447 *
7448 * parse an attribute in the new SAX2 framework.
7449 *
7450 * Returns the attribute name, and the value in *value, .
7451 */
7452
7453static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007454xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7455 const xmlChar *pref, const xmlChar *elem,
7456 const xmlChar **prefix, xmlChar **value,
7457 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007458 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007459 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007460 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007461
7462 *value = NULL;
7463 GROW;
7464 name = xmlParseQName(ctxt, prefix);
7465 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007466 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7467 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007468 return(NULL);
7469 }
7470
7471 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007472 * get the type if needed
7473 */
7474 if (ctxt->attsSpecial != NULL) {
7475 int type;
7476
7477 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7478 pref, elem, *prefix, name);
7479 if (type != 0) normalize = 1;
7480 }
7481
7482 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007483 * read the value
7484 */
7485 SKIP_BLANKS;
7486 if (RAW == '=') {
7487 NEXT;
7488 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007489 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007490 ctxt->instate = XML_PARSER_CONTENT;
7491 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007492 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007493 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007494 return(NULL);
7495 }
7496
Daniel Veillardd8925572005-06-08 22:34:55 +00007497 if (*prefix == ctxt->str_xml) {
7498 /*
7499 * Check that xml:lang conforms to the specification
7500 * No more registered as an error, just generate a warning now
7501 * since this was deprecated in XML second edition
7502 */
7503 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7504 internal_val = xmlStrndup(val, *len);
7505 if (!xmlCheckLanguageID(internal_val)) {
7506 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7507 "Malformed value for xml:lang : %s\n",
7508 internal_val, NULL);
7509 }
7510 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007511
Daniel Veillardd8925572005-06-08 22:34:55 +00007512 /*
7513 * Check that xml:space conforms to the specification
7514 */
7515 if (xmlStrEqual(name, BAD_CAST "space")) {
7516 internal_val = xmlStrndup(val, *len);
7517 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7518 *(ctxt->space) = 0;
7519 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7520 *(ctxt->space) = 1;
7521 else {
7522 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007523"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007524 internal_val, NULL);
7525 }
7526 }
7527 if (internal_val) {
7528 xmlFree(internal_val);
7529 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007530 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007531
7532 *value = val;
7533 return(name);
7534}
7535
7536/**
7537 * xmlParseStartTag2:
7538 * @ctxt: an XML parser context
7539 *
7540 * parse a start of tag either for rule element or
7541 * EmptyElement. In both case we don't parse the tag closing chars.
7542 * This routine is called when running SAX2 parsing
7543 *
7544 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7545 *
7546 * [ WFC: Unique Att Spec ]
7547 * No attribute name may appear more than once in the same start-tag or
7548 * empty-element tag.
7549 *
7550 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7551 *
7552 * [ WFC: Unique Att Spec ]
7553 * No attribute name may appear more than once in the same start-tag or
7554 * empty-element tag.
7555 *
7556 * With namespace:
7557 *
7558 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7559 *
7560 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7561 *
7562 * Returns the element name parsed
7563 */
7564
7565static const xmlChar *
7566xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007567 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007568 const xmlChar *localname;
7569 const xmlChar *prefix;
7570 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007571 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007572 const xmlChar *nsname;
7573 xmlChar *attvalue;
7574 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007575 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007576 int nratts, nbatts, nbdef;
7577 int i, j, nbNs, attval;
7578 const xmlChar *base;
7579 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007580 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007581
7582 if (RAW != '<') return(NULL);
7583 NEXT1;
7584
7585 /*
7586 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7587 * point since the attribute values may be stored as pointers to
7588 * the buffer and calling SHRINK would destroy them !
7589 * The Shrinking is only possible once the full set of attribute
7590 * callbacks have been done.
7591 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007592reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007593 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007594 base = ctxt->input->base;
7595 cur = ctxt->input->cur - ctxt->input->base;
7596 nbatts = 0;
7597 nratts = 0;
7598 nbdef = 0;
7599 nbNs = 0;
7600 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007601 /* Forget any namespaces added during an earlier parse of this element. */
7602 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007603
7604 localname = xmlParseQName(ctxt, &prefix);
7605 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007606 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7607 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007608 return(NULL);
7609 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007610 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007611
7612 /*
7613 * Now parse the attributes, it ends up with the ending
7614 *
7615 * (S Attribute)* S?
7616 */
7617 SKIP_BLANKS;
7618 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007619 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007620
7621 while ((RAW != '>') &&
7622 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007623 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007624 const xmlChar *q = CUR_PTR;
7625 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007626 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007627
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007628 attname = xmlParseAttribute2(ctxt, prefix, localname,
7629 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007630 if ((attname != NULL) && (attvalue != NULL)) {
7631 if (len < 0) len = xmlStrlen(attvalue);
7632 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007633 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7634 xmlURIPtr uri;
7635
7636 if (*URL != 0) {
7637 uri = xmlParseURI((const char *) URL);
7638 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007639 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7640 "xmlns: %s not a valid URI\n",
7641 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007642 } else {
7643 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007644 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7645 "xmlns: URI %s is not absolute\n",
7646 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007647 }
7648 xmlFreeURI(uri);
7649 }
7650 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007651 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007652 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007653 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007654 for (j = 1;j <= nbNs;j++)
7655 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7656 break;
7657 if (j <= nbNs)
7658 xmlErrAttributeDup(ctxt, NULL, attname);
7659 else
7660 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007661 if (alloc != 0) xmlFree(attvalue);
7662 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007663 continue;
7664 }
7665 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007666 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7667 xmlURIPtr uri;
7668
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007669 if (attname == ctxt->str_xml) {
7670 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007671 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7672 "xml namespace prefix mapped to wrong URI\n",
7673 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007674 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007675 /*
7676 * Do not keep a namespace definition node
7677 */
7678 if (alloc != 0) xmlFree(attvalue);
7679 SKIP_BLANKS;
7680 continue;
7681 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007682 uri = xmlParseURI((const char *) URL);
7683 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007684 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7685 "xmlns:%s: '%s' is not a valid URI\n",
7686 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007687 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007688 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007689 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7690 "xmlns:%s: URI %s is not absolute\n",
7691 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007692 }
7693 xmlFreeURI(uri);
7694 }
7695
Daniel Veillard0fb18932003-09-07 09:14:37 +00007696 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007697 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007698 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007699 for (j = 1;j <= nbNs;j++)
7700 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7701 break;
7702 if (j <= nbNs)
7703 xmlErrAttributeDup(ctxt, aprefix, attname);
7704 else
7705 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007706 if (alloc != 0) xmlFree(attvalue);
7707 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007708 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007709 continue;
7710 }
7711
7712 /*
7713 * Add the pair to atts
7714 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007715 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7716 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007717 if (attvalue[len] == 0)
7718 xmlFree(attvalue);
7719 goto failed;
7720 }
7721 maxatts = ctxt->maxatts;
7722 atts = ctxt->atts;
7723 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007724 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007725 atts[nbatts++] = attname;
7726 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007727 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007728 atts[nbatts++] = attvalue;
7729 attvalue += len;
7730 atts[nbatts++] = attvalue;
7731 /*
7732 * tag if some deallocation is needed
7733 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007734 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007735 } else {
7736 if ((attvalue != NULL) && (attvalue[len] == 0))
7737 xmlFree(attvalue);
7738 }
7739
7740failed:
7741
7742 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007743 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007744 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7745 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007746 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007747 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7748 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007749 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007750 }
7751 SKIP_BLANKS;
7752 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7753 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007754 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007755 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007756 break;
7757 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007758 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007759 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007760 }
7761
Daniel Veillard0fb18932003-09-07 09:14:37 +00007762 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007763 * The attributes defaulting
7764 */
7765 if (ctxt->attsDefault != NULL) {
7766 xmlDefAttrsPtr defaults;
7767
7768 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7769 if (defaults != NULL) {
7770 for (i = 0;i < defaults->nbAttrs;i++) {
7771 attname = defaults->values[4 * i];
7772 aprefix = defaults->values[4 * i + 1];
7773
7774 /*
7775 * special work for namespaces defaulted defs
7776 */
7777 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7778 /*
7779 * check that it's not a defined namespace
7780 */
7781 for (j = 1;j <= nbNs;j++)
7782 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7783 break;
7784 if (j <= nbNs) continue;
7785
7786 nsname = xmlGetNamespace(ctxt, NULL);
7787 if (nsname != defaults->values[4 * i + 2]) {
7788 if (nsPush(ctxt, NULL,
7789 defaults->values[4 * i + 2]) > 0)
7790 nbNs++;
7791 }
7792 } else if (aprefix == ctxt->str_xmlns) {
7793 /*
7794 * check that it's not a defined namespace
7795 */
7796 for (j = 1;j <= nbNs;j++)
7797 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7798 break;
7799 if (j <= nbNs) continue;
7800
7801 nsname = xmlGetNamespace(ctxt, attname);
7802 if (nsname != defaults->values[2]) {
7803 if (nsPush(ctxt, attname,
7804 defaults->values[4 * i + 2]) > 0)
7805 nbNs++;
7806 }
7807 } else {
7808 /*
7809 * check that it's not a defined attribute
7810 */
7811 for (j = 0;j < nbatts;j+=5) {
7812 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7813 break;
7814 }
7815 if (j < nbatts) continue;
7816
7817 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7818 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007819 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007820 }
7821 maxatts = ctxt->maxatts;
7822 atts = ctxt->atts;
7823 }
7824 atts[nbatts++] = attname;
7825 atts[nbatts++] = aprefix;
7826 if (aprefix == NULL)
7827 atts[nbatts++] = NULL;
7828 else
7829 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7830 atts[nbatts++] = defaults->values[4 * i + 2];
7831 atts[nbatts++] = defaults->values[4 * i + 3];
7832 nbdef++;
7833 }
7834 }
7835 }
7836 }
7837
Daniel Veillarde70c8772003-11-25 07:21:18 +00007838 /*
7839 * The attributes checkings
7840 */
7841 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00007842 /*
7843 * The default namespace does not apply to attribute names.
7844 */
7845 if (atts[i + 1] != NULL) {
7846 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7847 if (nsname == NULL) {
7848 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7849 "Namespace prefix %s for %s on %s is not defined\n",
7850 atts[i + 1], atts[i], localname);
7851 }
7852 atts[i + 2] = nsname;
7853 } else
7854 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00007855 /*
7856 * [ WFC: Unique Att Spec ]
7857 * No attribute name may appear more than once in the same
7858 * start-tag or empty-element tag.
7859 * As extended by the Namespace in XML REC.
7860 */
7861 for (j = 0; j < i;j += 5) {
7862 if (atts[i] == atts[j]) {
7863 if (atts[i+1] == atts[j+1]) {
7864 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7865 break;
7866 }
7867 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7868 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7869 "Namespaced Attribute %s in '%s' redefined\n",
7870 atts[i], nsname, NULL);
7871 break;
7872 }
7873 }
7874 }
7875 }
7876
Daniel Veillarde57ec792003-09-10 10:50:59 +00007877 nsname = xmlGetNamespace(ctxt, prefix);
7878 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007879 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7880 "Namespace prefix %s on %s is not defined\n",
7881 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007882 }
7883 *pref = prefix;
7884 *URI = nsname;
7885
7886 /*
7887 * SAX: Start of Element !
7888 */
7889 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7890 (!ctxt->disableSAX)) {
7891 if (nbNs > 0)
7892 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7893 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7894 nbatts / 5, nbdef, atts);
7895 else
7896 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7897 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7898 }
7899
7900 /*
7901 * Free up attribute allocated strings if needed
7902 */
7903 if (attval != 0) {
7904 for (i = 3,j = 0; j < nratts;i += 5,j++)
7905 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7906 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007907 }
7908
7909 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007910
7911base_changed:
7912 /*
7913 * the attribute strings are valid iif the base didn't changed
7914 */
7915 if (attval != 0) {
7916 for (i = 3,j = 0; j < nratts;i += 5,j++)
7917 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7918 xmlFree((xmlChar *) atts[i]);
7919 }
7920 ctxt->input->cur = ctxt->input->base + cur;
7921 if (ctxt->wellFormed == 1) {
7922 goto reparse;
7923 }
7924 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007925}
7926
7927/**
7928 * xmlParseEndTag2:
7929 * @ctxt: an XML parser context
7930 * @line: line of the start tag
7931 * @nsNr: number of namespaces on the start tag
7932 *
7933 * parse an end of tag
7934 *
7935 * [42] ETag ::= '</' Name S? '>'
7936 *
7937 * With namespace
7938 *
7939 * [NS 9] ETag ::= '</' QName S? '>'
7940 */
7941
7942static void
7943xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007944 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007945 const xmlChar *name;
7946
7947 GROW;
7948 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007949 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007950 return;
7951 }
7952 SKIP(2);
7953
William M. Brack13dfa872004-09-18 04:52:08 +00007954 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007955 if (ctxt->input->cur[tlen] == '>') {
7956 ctxt->input->cur += tlen + 1;
7957 goto done;
7958 }
7959 ctxt->input->cur += tlen;
7960 name = (xmlChar*)1;
7961 } else {
7962 if (prefix == NULL)
7963 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7964 else
7965 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7966 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007967
7968 /*
7969 * We should definitely be at the ending "S? '>'" part
7970 */
7971 GROW;
7972 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007973 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007974 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007975 } else
7976 NEXT1;
7977
7978 /*
7979 * [ WFC: Element Type Match ]
7980 * The Name in an element's end-tag must match the element type in the
7981 * start-tag.
7982 *
7983 */
7984 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007985 if (name == NULL) name = BAD_CAST "unparseable";
7986 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007987 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007988 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007989 }
7990
7991 /*
7992 * SAX: End of Tag
7993 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007994done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007995 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7996 (!ctxt->disableSAX))
7997 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7998
Daniel Veillard0fb18932003-09-07 09:14:37 +00007999 spacePop(ctxt);
8000 if (nsNr != 0)
8001 nsPop(ctxt, nsNr);
8002 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008003}
8004
8005/**
Owen Taylor3473f882001-02-23 17:55:21 +00008006 * xmlParseCDSect:
8007 * @ctxt: an XML parser context
8008 *
8009 * Parse escaped pure raw content.
8010 *
8011 * [18] CDSect ::= CDStart CData CDEnd
8012 *
8013 * [19] CDStart ::= '<![CDATA['
8014 *
8015 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8016 *
8017 * [21] CDEnd ::= ']]>'
8018 */
8019void
8020xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8021 xmlChar *buf = NULL;
8022 int len = 0;
8023 int size = XML_PARSER_BUFFER_SIZE;
8024 int r, rl;
8025 int s, sl;
8026 int cur, l;
8027 int count = 0;
8028
Daniel Veillard8f597c32003-10-06 08:19:27 +00008029 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008030 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008031 SKIP(9);
8032 } else
8033 return;
8034
8035 ctxt->instate = XML_PARSER_CDATA_SECTION;
8036 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008037 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008038 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008039 ctxt->instate = XML_PARSER_CONTENT;
8040 return;
8041 }
8042 NEXTL(rl);
8043 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008044 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008045 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008046 ctxt->instate = XML_PARSER_CONTENT;
8047 return;
8048 }
8049 NEXTL(sl);
8050 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008051 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008052 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008053 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008054 return;
8055 }
William M. Brack871611b2003-10-18 04:53:14 +00008056 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008057 ((r != ']') || (s != ']') || (cur != '>'))) {
8058 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008059 xmlChar *tmp;
8060
Owen Taylor3473f882001-02-23 17:55:21 +00008061 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008062 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8063 if (tmp == NULL) {
8064 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008065 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008066 return;
8067 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008068 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008069 }
8070 COPY_BUF(rl,buf,len,r);
8071 r = s;
8072 rl = sl;
8073 s = cur;
8074 sl = l;
8075 count++;
8076 if (count > 50) {
8077 GROW;
8078 count = 0;
8079 }
8080 NEXTL(l);
8081 cur = CUR_CHAR(l);
8082 }
8083 buf[len] = 0;
8084 ctxt->instate = XML_PARSER_CONTENT;
8085 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008086 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008087 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008088 xmlFree(buf);
8089 return;
8090 }
8091 NEXTL(l);
8092
8093 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008094 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008095 */
8096 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8097 if (ctxt->sax->cdataBlock != NULL)
8098 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008099 else if (ctxt->sax->characters != NULL)
8100 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008101 }
8102 xmlFree(buf);
8103}
8104
8105/**
8106 * xmlParseContent:
8107 * @ctxt: an XML parser context
8108 *
8109 * Parse a content:
8110 *
8111 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8112 */
8113
8114void
8115xmlParseContent(xmlParserCtxtPtr ctxt) {
8116 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008117 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008118 ((RAW != '<') || (NXT(1) != '/'))) {
8119 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008120 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008121 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008122
8123 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008124 * First case : a Processing Instruction.
8125 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008126 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008127 xmlParsePI(ctxt);
8128 }
8129
8130 /*
8131 * Second case : a CDSection
8132 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008133 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008134 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008135 xmlParseCDSect(ctxt);
8136 }
8137
8138 /*
8139 * Third case : a comment
8140 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008141 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008142 (NXT(2) == '-') && (NXT(3) == '-')) {
8143 xmlParseComment(ctxt);
8144 ctxt->instate = XML_PARSER_CONTENT;
8145 }
8146
8147 /*
8148 * Fourth case : a sub-element.
8149 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008150 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008151 xmlParseElement(ctxt);
8152 }
8153
8154 /*
8155 * Fifth case : a reference. If if has not been resolved,
8156 * parsing returns it's Name, create the node
8157 */
8158
Daniel Veillard21a0f912001-02-25 19:54:14 +00008159 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008160 xmlParseReference(ctxt);
8161 }
8162
8163 /*
8164 * Last case, text. Note that References are handled directly.
8165 */
8166 else {
8167 xmlParseCharData(ctxt, 0);
8168 }
8169
8170 GROW;
8171 /*
8172 * Pop-up of finished entities.
8173 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008174 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008175 xmlPopInput(ctxt);
8176 SHRINK;
8177
Daniel Veillardfdc91562002-07-01 21:52:03 +00008178 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008179 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8180 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008181 ctxt->instate = XML_PARSER_EOF;
8182 break;
8183 }
8184 }
8185}
8186
8187/**
8188 * xmlParseElement:
8189 * @ctxt: an XML parser context
8190 *
8191 * parse an XML element, this is highly recursive
8192 *
8193 * [39] element ::= EmptyElemTag | STag content ETag
8194 *
8195 * [ WFC: Element Type Match ]
8196 * The Name in an element's end-tag must match the element type in the
8197 * start-tag.
8198 *
Owen Taylor3473f882001-02-23 17:55:21 +00008199 */
8200
8201void
8202xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008203 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008204 const xmlChar *prefix;
8205 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008206 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008207 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008208 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008209 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008210
8211 /* Capture start position */
8212 if (ctxt->record_info) {
8213 node_info.begin_pos = ctxt->input->consumed +
8214 (CUR_PTR - ctxt->input->base);
8215 node_info.begin_line = ctxt->input->line;
8216 }
8217
8218 if (ctxt->spaceNr == 0)
8219 spacePush(ctxt, -1);
8220 else
8221 spacePush(ctxt, *ctxt->space);
8222
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008223 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008224#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008225 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008226#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008227 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008228#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008229 else
8230 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008231#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008232 if (name == NULL) {
8233 spacePop(ctxt);
8234 return;
8235 }
8236 namePush(ctxt, name);
8237 ret = ctxt->node;
8238
Daniel Veillard4432df22003-09-28 18:58:27 +00008239#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008240 /*
8241 * [ VC: Root Element Type ]
8242 * The Name in the document type declaration must match the element
8243 * type of the root element.
8244 */
8245 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8246 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8247 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008248#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008249
8250 /*
8251 * Check for an Empty Element.
8252 */
8253 if ((RAW == '/') && (NXT(1) == '>')) {
8254 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008255 if (ctxt->sax2) {
8256 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8257 (!ctxt->disableSAX))
8258 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008259#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008260 } else {
8261 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8262 (!ctxt->disableSAX))
8263 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008264#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008265 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008266 namePop(ctxt);
8267 spacePop(ctxt);
8268 if (nsNr != ctxt->nsNr)
8269 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008270 if ( ret != NULL && ctxt->record_info ) {
8271 node_info.end_pos = ctxt->input->consumed +
8272 (CUR_PTR - ctxt->input->base);
8273 node_info.end_line = ctxt->input->line;
8274 node_info.node = ret;
8275 xmlParserAddNodeInfo(ctxt, &node_info);
8276 }
8277 return;
8278 }
8279 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008280 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008281 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008282 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8283 "Couldn't find end of Start Tag %s line %d\n",
8284 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008285
8286 /*
8287 * end of parsing of this node.
8288 */
8289 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008290 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008291 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008292 if (nsNr != ctxt->nsNr)
8293 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008294
8295 /*
8296 * Capture end position and add node
8297 */
8298 if ( ret != NULL && ctxt->record_info ) {
8299 node_info.end_pos = ctxt->input->consumed +
8300 (CUR_PTR - ctxt->input->base);
8301 node_info.end_line = ctxt->input->line;
8302 node_info.node = ret;
8303 xmlParserAddNodeInfo(ctxt, &node_info);
8304 }
8305 return;
8306 }
8307
8308 /*
8309 * Parse the content of the element:
8310 */
8311 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008312 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008313 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008314 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008315 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008316
8317 /*
8318 * end of parsing of this node.
8319 */
8320 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008321 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008322 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008323 if (nsNr != ctxt->nsNr)
8324 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008325 return;
8326 }
8327
8328 /*
8329 * parse the end of tag: '</' should be here.
8330 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008331 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008332 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008333 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008334 }
8335#ifdef LIBXML_SAX1_ENABLED
8336 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008337 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008338#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008339
8340 /*
8341 * Capture end position and add node
8342 */
8343 if ( ret != NULL && ctxt->record_info ) {
8344 node_info.end_pos = ctxt->input->consumed +
8345 (CUR_PTR - ctxt->input->base);
8346 node_info.end_line = ctxt->input->line;
8347 node_info.node = ret;
8348 xmlParserAddNodeInfo(ctxt, &node_info);
8349 }
8350}
8351
8352/**
8353 * xmlParseVersionNum:
8354 * @ctxt: an XML parser context
8355 *
8356 * parse the XML version value.
8357 *
8358 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8359 *
8360 * Returns the string giving the XML version number, or NULL
8361 */
8362xmlChar *
8363xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8364 xmlChar *buf = NULL;
8365 int len = 0;
8366 int size = 10;
8367 xmlChar cur;
8368
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008369 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008370 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008371 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008372 return(NULL);
8373 }
8374 cur = CUR;
8375 while (((cur >= 'a') && (cur <= 'z')) ||
8376 ((cur >= 'A') && (cur <= 'Z')) ||
8377 ((cur >= '0') && (cur <= '9')) ||
8378 (cur == '_') || (cur == '.') ||
8379 (cur == ':') || (cur == '-')) {
8380 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008381 xmlChar *tmp;
8382
Owen Taylor3473f882001-02-23 17:55:21 +00008383 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008384 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8385 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008386 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008387 return(NULL);
8388 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008389 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008390 }
8391 buf[len++] = cur;
8392 NEXT;
8393 cur=CUR;
8394 }
8395 buf[len] = 0;
8396 return(buf);
8397}
8398
8399/**
8400 * xmlParseVersionInfo:
8401 * @ctxt: an XML parser context
8402 *
8403 * parse the XML version.
8404 *
8405 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8406 *
8407 * [25] Eq ::= S? '=' S?
8408 *
8409 * Returns the version string, e.g. "1.0"
8410 */
8411
8412xmlChar *
8413xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8414 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008415
Daniel Veillarda07050d2003-10-19 14:46:32 +00008416 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008417 SKIP(7);
8418 SKIP_BLANKS;
8419 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008420 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008421 return(NULL);
8422 }
8423 NEXT;
8424 SKIP_BLANKS;
8425 if (RAW == '"') {
8426 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008427 version = xmlParseVersionNum(ctxt);
8428 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008429 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008430 } else
8431 NEXT;
8432 } else if (RAW == '\''){
8433 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008434 version = xmlParseVersionNum(ctxt);
8435 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008436 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008437 } else
8438 NEXT;
8439 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008440 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008441 }
8442 }
8443 return(version);
8444}
8445
8446/**
8447 * xmlParseEncName:
8448 * @ctxt: an XML parser context
8449 *
8450 * parse the XML encoding name
8451 *
8452 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8453 *
8454 * Returns the encoding name value or NULL
8455 */
8456xmlChar *
8457xmlParseEncName(xmlParserCtxtPtr ctxt) {
8458 xmlChar *buf = NULL;
8459 int len = 0;
8460 int size = 10;
8461 xmlChar cur;
8462
8463 cur = CUR;
8464 if (((cur >= 'a') && (cur <= 'z')) ||
8465 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008466 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008467 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008468 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008469 return(NULL);
8470 }
8471
8472 buf[len++] = cur;
8473 NEXT;
8474 cur = CUR;
8475 while (((cur >= 'a') && (cur <= 'z')) ||
8476 ((cur >= 'A') && (cur <= 'Z')) ||
8477 ((cur >= '0') && (cur <= '9')) ||
8478 (cur == '.') || (cur == '_') ||
8479 (cur == '-')) {
8480 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008481 xmlChar *tmp;
8482
Owen Taylor3473f882001-02-23 17:55:21 +00008483 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008484 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8485 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008486 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008487 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008488 return(NULL);
8489 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008490 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008491 }
8492 buf[len++] = cur;
8493 NEXT;
8494 cur = CUR;
8495 if (cur == 0) {
8496 SHRINK;
8497 GROW;
8498 cur = CUR;
8499 }
8500 }
8501 buf[len] = 0;
8502 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008503 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008504 }
8505 return(buf);
8506}
8507
8508/**
8509 * xmlParseEncodingDecl:
8510 * @ctxt: an XML parser context
8511 *
8512 * parse the XML encoding declaration
8513 *
8514 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8515 *
8516 * this setups the conversion filters.
8517 *
8518 * Returns the encoding value or NULL
8519 */
8520
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008521const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008522xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8523 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008524
8525 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008526 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008527 SKIP(8);
8528 SKIP_BLANKS;
8529 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008530 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008531 return(NULL);
8532 }
8533 NEXT;
8534 SKIP_BLANKS;
8535 if (RAW == '"') {
8536 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008537 encoding = xmlParseEncName(ctxt);
8538 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008539 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008540 } else
8541 NEXT;
8542 } else if (RAW == '\''){
8543 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008544 encoding = xmlParseEncName(ctxt);
8545 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008546 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008547 } else
8548 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008549 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008550 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008551 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008552 /*
8553 * UTF-16 encoding stwich has already taken place at this stage,
8554 * more over the little-endian/big-endian selection is already done
8555 */
8556 if ((encoding != NULL) &&
8557 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8558 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008559 if (ctxt->encoding != NULL)
8560 xmlFree((xmlChar *) ctxt->encoding);
8561 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008562 }
8563 /*
8564 * UTF-8 encoding is handled natively
8565 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008566 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008567 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8568 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008569 if (ctxt->encoding != NULL)
8570 xmlFree((xmlChar *) ctxt->encoding);
8571 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008572 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008573 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008574 xmlCharEncodingHandlerPtr handler;
8575
8576 if (ctxt->input->encoding != NULL)
8577 xmlFree((xmlChar *) ctxt->input->encoding);
8578 ctxt->input->encoding = encoding;
8579
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008580 handler = xmlFindCharEncodingHandler((const char *) encoding);
8581 if (handler != NULL) {
8582 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008583 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008584 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008585 "Unsupported encoding %s\n", encoding);
8586 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008587 }
8588 }
8589 }
8590 return(encoding);
8591}
8592
8593/**
8594 * xmlParseSDDecl:
8595 * @ctxt: an XML parser context
8596 *
8597 * parse the XML standalone declaration
8598 *
8599 * [32] SDDecl ::= S 'standalone' Eq
8600 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8601 *
8602 * [ VC: Standalone Document Declaration ]
8603 * TODO The standalone document declaration must have the value "no"
8604 * if any external markup declarations contain declarations of:
8605 * - attributes with default values, if elements to which these
8606 * attributes apply appear in the document without specifications
8607 * of values for these attributes, or
8608 * - entities (other than amp, lt, gt, apos, quot), if references
8609 * to those entities appear in the document, or
8610 * - attributes with values subject to normalization, where the
8611 * attribute appears in the document with a value which will change
8612 * as a result of normalization, or
8613 * - element types with element content, if white space occurs directly
8614 * within any instance of those types.
8615 *
8616 * Returns 1 if standalone, 0 otherwise
8617 */
8618
8619int
8620xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8621 int standalone = -1;
8622
8623 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008624 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008625 SKIP(10);
8626 SKIP_BLANKS;
8627 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008628 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008629 return(standalone);
8630 }
8631 NEXT;
8632 SKIP_BLANKS;
8633 if (RAW == '\''){
8634 NEXT;
8635 if ((RAW == 'n') && (NXT(1) == 'o')) {
8636 standalone = 0;
8637 SKIP(2);
8638 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8639 (NXT(2) == 's')) {
8640 standalone = 1;
8641 SKIP(3);
8642 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008643 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008644 }
8645 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008646 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008647 } else
8648 NEXT;
8649 } else if (RAW == '"'){
8650 NEXT;
8651 if ((RAW == 'n') && (NXT(1) == 'o')) {
8652 standalone = 0;
8653 SKIP(2);
8654 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8655 (NXT(2) == 's')) {
8656 standalone = 1;
8657 SKIP(3);
8658 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008659 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008660 }
8661 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008662 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008663 } else
8664 NEXT;
8665 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008666 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008667 }
8668 }
8669 return(standalone);
8670}
8671
8672/**
8673 * xmlParseXMLDecl:
8674 * @ctxt: an XML parser context
8675 *
8676 * parse an XML declaration header
8677 *
8678 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8679 */
8680
8681void
8682xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8683 xmlChar *version;
8684
8685 /*
8686 * We know that '<?xml' is here.
8687 */
8688 SKIP(5);
8689
William M. Brack76e95df2003-10-18 16:20:14 +00008690 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008691 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8692 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008693 }
8694 SKIP_BLANKS;
8695
8696 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008697 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008698 */
8699 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008700 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008701 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008702 } else {
8703 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8704 /*
8705 * TODO: Blueberry should be detected here
8706 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008707 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8708 "Unsupported version '%s'\n",
8709 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008710 }
8711 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008712 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008713 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008714 }
Owen Taylor3473f882001-02-23 17:55:21 +00008715
8716 /*
8717 * We may have the encoding declaration
8718 */
William M. Brack76e95df2003-10-18 16:20:14 +00008719 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008720 if ((RAW == '?') && (NXT(1) == '>')) {
8721 SKIP(2);
8722 return;
8723 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008724 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008725 }
8726 xmlParseEncodingDecl(ctxt);
8727 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8728 /*
8729 * The XML REC instructs us to stop parsing right here
8730 */
8731 return;
8732 }
8733
8734 /*
8735 * We may have the standalone status.
8736 */
William M. Brack76e95df2003-10-18 16:20:14 +00008737 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008738 if ((RAW == '?') && (NXT(1) == '>')) {
8739 SKIP(2);
8740 return;
8741 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008742 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008743 }
8744 SKIP_BLANKS;
8745 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8746
8747 SKIP_BLANKS;
8748 if ((RAW == '?') && (NXT(1) == '>')) {
8749 SKIP(2);
8750 } else if (RAW == '>') {
8751 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008752 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008753 NEXT;
8754 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008755 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008756 MOVETO_ENDTAG(CUR_PTR);
8757 NEXT;
8758 }
8759}
8760
8761/**
8762 * xmlParseMisc:
8763 * @ctxt: an XML parser context
8764 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008765 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008766 *
8767 * [27] Misc ::= Comment | PI | S
8768 */
8769
8770void
8771xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008772 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008773 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008774 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008775 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008776 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008777 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008778 NEXT;
8779 } else
8780 xmlParseComment(ctxt);
8781 }
8782}
8783
8784/**
8785 * xmlParseDocument:
8786 * @ctxt: an XML parser context
8787 *
8788 * parse an XML document (and build a tree if using the standard SAX
8789 * interface).
8790 *
8791 * [1] document ::= prolog element Misc*
8792 *
8793 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8794 *
8795 * Returns 0, -1 in case of error. the parser context is augmented
8796 * as a result of the parsing.
8797 */
8798
8799int
8800xmlParseDocument(xmlParserCtxtPtr ctxt) {
8801 xmlChar start[4];
8802 xmlCharEncoding enc;
8803
8804 xmlInitParser();
8805
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008806 if ((ctxt == NULL) || (ctxt->input == NULL))
8807 return(-1);
8808
Owen Taylor3473f882001-02-23 17:55:21 +00008809 GROW;
8810
8811 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008812 * SAX: detecting the level.
8813 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008814 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008815
8816 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008817 * SAX: beginning of the document processing.
8818 */
8819 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8820 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8821
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008822 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8823 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008824 /*
8825 * Get the 4 first bytes and decode the charset
8826 * if enc != XML_CHAR_ENCODING_NONE
8827 * plug some encoding conversion routines.
8828 */
8829 start[0] = RAW;
8830 start[1] = NXT(1);
8831 start[2] = NXT(2);
8832 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008833 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008834 if (enc != XML_CHAR_ENCODING_NONE) {
8835 xmlSwitchEncoding(ctxt, enc);
8836 }
Owen Taylor3473f882001-02-23 17:55:21 +00008837 }
8838
8839
8840 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008841 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008842 }
8843
8844 /*
8845 * Check for the XMLDecl in the Prolog.
8846 */
8847 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008848 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008849
8850 /*
8851 * Note that we will switch encoding on the fly.
8852 */
8853 xmlParseXMLDecl(ctxt);
8854 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8855 /*
8856 * The XML REC instructs us to stop parsing right here
8857 */
8858 return(-1);
8859 }
8860 ctxt->standalone = ctxt->input->standalone;
8861 SKIP_BLANKS;
8862 } else {
8863 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8864 }
8865 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8866 ctxt->sax->startDocument(ctxt->userData);
8867
8868 /*
8869 * The Misc part of the Prolog
8870 */
8871 GROW;
8872 xmlParseMisc(ctxt);
8873
8874 /*
8875 * Then possibly doc type declaration(s) and more Misc
8876 * (doctypedecl Misc*)?
8877 */
8878 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008879 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008880
8881 ctxt->inSubset = 1;
8882 xmlParseDocTypeDecl(ctxt);
8883 if (RAW == '[') {
8884 ctxt->instate = XML_PARSER_DTD;
8885 xmlParseInternalSubset(ctxt);
8886 }
8887
8888 /*
8889 * Create and update the external subset.
8890 */
8891 ctxt->inSubset = 2;
8892 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8893 (!ctxt->disableSAX))
8894 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8895 ctxt->extSubSystem, ctxt->extSubURI);
8896 ctxt->inSubset = 0;
8897
8898
8899 ctxt->instate = XML_PARSER_PROLOG;
8900 xmlParseMisc(ctxt);
8901 }
8902
8903 /*
8904 * Time to start parsing the tree itself
8905 */
8906 GROW;
8907 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008908 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8909 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008910 } else {
8911 ctxt->instate = XML_PARSER_CONTENT;
8912 xmlParseElement(ctxt);
8913 ctxt->instate = XML_PARSER_EPILOG;
8914
8915
8916 /*
8917 * The Misc part at the end
8918 */
8919 xmlParseMisc(ctxt);
8920
Daniel Veillard561b7f82002-03-20 21:55:57 +00008921 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008922 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008923 }
8924 ctxt->instate = XML_PARSER_EOF;
8925 }
8926
8927 /*
8928 * SAX: end of the document processing.
8929 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008930 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008931 ctxt->sax->endDocument(ctxt->userData);
8932
Daniel Veillard5997aca2002-03-18 18:36:20 +00008933 /*
8934 * Remove locally kept entity definitions if the tree was not built
8935 */
8936 if ((ctxt->myDoc != NULL) &&
8937 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8938 xmlFreeDoc(ctxt->myDoc);
8939 ctxt->myDoc = NULL;
8940 }
8941
Daniel Veillardc7612992002-02-17 22:47:37 +00008942 if (! ctxt->wellFormed) {
8943 ctxt->valid = 0;
8944 return(-1);
8945 }
Owen Taylor3473f882001-02-23 17:55:21 +00008946 return(0);
8947}
8948
8949/**
8950 * xmlParseExtParsedEnt:
8951 * @ctxt: an XML parser context
8952 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008953 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008954 * An external general parsed entity is well-formed if it matches the
8955 * production labeled extParsedEnt.
8956 *
8957 * [78] extParsedEnt ::= TextDecl? content
8958 *
8959 * Returns 0, -1 in case of error. the parser context is augmented
8960 * as a result of the parsing.
8961 */
8962
8963int
8964xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8965 xmlChar start[4];
8966 xmlCharEncoding enc;
8967
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008968 if ((ctxt == NULL) || (ctxt->input == NULL))
8969 return(-1);
8970
Owen Taylor3473f882001-02-23 17:55:21 +00008971 xmlDefaultSAXHandlerInit();
8972
Daniel Veillard309f81d2003-09-23 09:02:53 +00008973 xmlDetectSAX2(ctxt);
8974
Owen Taylor3473f882001-02-23 17:55:21 +00008975 GROW;
8976
8977 /*
8978 * SAX: beginning of the document processing.
8979 */
8980 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8981 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8982
8983 /*
8984 * Get the 4 first bytes and decode the charset
8985 * if enc != XML_CHAR_ENCODING_NONE
8986 * plug some encoding conversion routines.
8987 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008988 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8989 start[0] = RAW;
8990 start[1] = NXT(1);
8991 start[2] = NXT(2);
8992 start[3] = NXT(3);
8993 enc = xmlDetectCharEncoding(start, 4);
8994 if (enc != XML_CHAR_ENCODING_NONE) {
8995 xmlSwitchEncoding(ctxt, enc);
8996 }
Owen Taylor3473f882001-02-23 17:55:21 +00008997 }
8998
8999
9000 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009001 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009002 }
9003
9004 /*
9005 * Check for the XMLDecl in the Prolog.
9006 */
9007 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009008 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009009
9010 /*
9011 * Note that we will switch encoding on the fly.
9012 */
9013 xmlParseXMLDecl(ctxt);
9014 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9015 /*
9016 * The XML REC instructs us to stop parsing right here
9017 */
9018 return(-1);
9019 }
9020 SKIP_BLANKS;
9021 } else {
9022 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9023 }
9024 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9025 ctxt->sax->startDocument(ctxt->userData);
9026
9027 /*
9028 * Doing validity checking on chunk doesn't make sense
9029 */
9030 ctxt->instate = XML_PARSER_CONTENT;
9031 ctxt->validate = 0;
9032 ctxt->loadsubset = 0;
9033 ctxt->depth = 0;
9034
9035 xmlParseContent(ctxt);
9036
9037 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009038 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009039 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009040 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009041 }
9042
9043 /*
9044 * SAX: end of the document processing.
9045 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009046 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009047 ctxt->sax->endDocument(ctxt->userData);
9048
9049 if (! ctxt->wellFormed) return(-1);
9050 return(0);
9051}
9052
Daniel Veillard73b013f2003-09-30 12:36:01 +00009053#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009054/************************************************************************
9055 * *
9056 * Progressive parsing interfaces *
9057 * *
9058 ************************************************************************/
9059
9060/**
9061 * xmlParseLookupSequence:
9062 * @ctxt: an XML parser context
9063 * @first: the first char to lookup
9064 * @next: the next char to lookup or zero
9065 * @third: the next char to lookup or zero
9066 *
9067 * Try to find if a sequence (first, next, third) or just (first next) or
9068 * (first) is available in the input stream.
9069 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9070 * to avoid rescanning sequences of bytes, it DOES change the state of the
9071 * parser, do not use liberally.
9072 *
9073 * Returns the index to the current parsing point if the full sequence
9074 * is available, -1 otherwise.
9075 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009076static int
Owen Taylor3473f882001-02-23 17:55:21 +00009077xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9078 xmlChar next, xmlChar third) {
9079 int base, len;
9080 xmlParserInputPtr in;
9081 const xmlChar *buf;
9082
9083 in = ctxt->input;
9084 if (in == NULL) return(-1);
9085 base = in->cur - in->base;
9086 if (base < 0) return(-1);
9087 if (ctxt->checkIndex > base)
9088 base = ctxt->checkIndex;
9089 if (in->buf == NULL) {
9090 buf = in->base;
9091 len = in->length;
9092 } else {
9093 buf = in->buf->buffer->content;
9094 len = in->buf->buffer->use;
9095 }
9096 /* take into account the sequence length */
9097 if (third) len -= 2;
9098 else if (next) len --;
9099 for (;base < len;base++) {
9100 if (buf[base] == first) {
9101 if (third != 0) {
9102 if ((buf[base + 1] != next) ||
9103 (buf[base + 2] != third)) continue;
9104 } else if (next != 0) {
9105 if (buf[base + 1] != next) continue;
9106 }
9107 ctxt->checkIndex = 0;
9108#ifdef DEBUG_PUSH
9109 if (next == 0)
9110 xmlGenericError(xmlGenericErrorContext,
9111 "PP: lookup '%c' found at %d\n",
9112 first, base);
9113 else if (third == 0)
9114 xmlGenericError(xmlGenericErrorContext,
9115 "PP: lookup '%c%c' found at %d\n",
9116 first, next, base);
9117 else
9118 xmlGenericError(xmlGenericErrorContext,
9119 "PP: lookup '%c%c%c' found at %d\n",
9120 first, next, third, base);
9121#endif
9122 return(base - (in->cur - in->base));
9123 }
9124 }
9125 ctxt->checkIndex = base;
9126#ifdef DEBUG_PUSH
9127 if (next == 0)
9128 xmlGenericError(xmlGenericErrorContext,
9129 "PP: lookup '%c' failed\n", first);
9130 else if (third == 0)
9131 xmlGenericError(xmlGenericErrorContext,
9132 "PP: lookup '%c%c' failed\n", first, next);
9133 else
9134 xmlGenericError(xmlGenericErrorContext,
9135 "PP: lookup '%c%c%c' failed\n", first, next, third);
9136#endif
9137 return(-1);
9138}
9139
9140/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009141 * xmlParseGetLasts:
9142 * @ctxt: an XML parser context
9143 * @lastlt: pointer to store the last '<' from the input
9144 * @lastgt: pointer to store the last '>' from the input
9145 *
9146 * Lookup the last < and > in the current chunk
9147 */
9148static void
9149xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9150 const xmlChar **lastgt) {
9151 const xmlChar *tmp;
9152
9153 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9154 xmlGenericError(xmlGenericErrorContext,
9155 "Internal error: xmlParseGetLasts\n");
9156 return;
9157 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009158 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009159 tmp = ctxt->input->end;
9160 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009161 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009162 if (tmp < ctxt->input->base) {
9163 *lastlt = NULL;
9164 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009165 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009166 *lastlt = tmp;
9167 tmp++;
9168 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9169 if (*tmp == '\'') {
9170 tmp++;
9171 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9172 if (tmp < ctxt->input->end) tmp++;
9173 } else if (*tmp == '"') {
9174 tmp++;
9175 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9176 if (tmp < ctxt->input->end) tmp++;
9177 } else
9178 tmp++;
9179 }
9180 if (tmp < ctxt->input->end)
9181 *lastgt = tmp;
9182 else {
9183 tmp = *lastlt;
9184 tmp--;
9185 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9186 if (tmp >= ctxt->input->base)
9187 *lastgt = tmp;
9188 else
9189 *lastgt = NULL;
9190 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009191 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009192 } else {
9193 *lastlt = NULL;
9194 *lastgt = NULL;
9195 }
9196}
9197/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009198 * xmlCheckCdataPush:
9199 * @cur: pointer to the bock of characters
9200 * @len: length of the block in bytes
9201 *
9202 * Check that the block of characters is okay as SCdata content [20]
9203 *
9204 * Returns the number of bytes to pass if okay, a negative index where an
9205 * UTF-8 error occured otherwise
9206 */
9207static int
9208xmlCheckCdataPush(const xmlChar *utf, int len) {
9209 int ix;
9210 unsigned char c;
9211 int codepoint;
9212
9213 if ((utf == NULL) || (len <= 0))
9214 return(0);
9215
9216 for (ix = 0; ix < len;) { /* string is 0-terminated */
9217 c = utf[ix];
9218 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9219 if (c >= 0x20)
9220 ix++;
9221 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9222 ix++;
9223 else
9224 return(-ix);
9225 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9226 if (ix + 2 > len) return(ix);
9227 if ((utf[ix+1] & 0xc0 ) != 0x80)
9228 return(-ix);
9229 codepoint = (utf[0] & 0x1f) << 6;
9230 codepoint |= utf[1] & 0x3f;
9231 if (!xmlIsCharQ(codepoint))
9232 return(-ix);
9233 ix += 2;
9234 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9235 if (ix + 3 > len) return(ix);
9236 if (((utf[ix+1] & 0xc0) != 0x80) ||
9237 ((utf[ix+2] & 0xc0) != 0x80))
9238 return(-ix);
9239 codepoint = (utf[0] & 0xf) << 12;
9240 codepoint |= (utf[1] & 0x3f) << 6;
9241 codepoint |= utf[2] & 0x3f;
9242 if (!xmlIsCharQ(codepoint))
9243 return(-ix);
9244 ix += 3;
9245 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9246 if (ix + 4 > len) return(ix);
9247 if (((utf[ix+1] & 0xc0) != 0x80) ||
9248 ((utf[ix+2] & 0xc0) != 0x80) ||
9249 ((utf[ix+3] & 0xc0) != 0x80))
9250 return(-ix);
9251 codepoint = (utf[0] & 0x7) << 18;
9252 codepoint |= (utf[1] & 0x3f) << 12;
9253 codepoint |= (utf[2] & 0x3f) << 6;
9254 codepoint |= utf[3] & 0x3f;
9255 if (!xmlIsCharQ(codepoint))
9256 return(-ix);
9257 ix += 4;
9258 } else /* unknown encoding */
9259 return(-ix);
9260 }
9261 return(ix);
9262}
9263
9264/**
Owen Taylor3473f882001-02-23 17:55:21 +00009265 * xmlParseTryOrFinish:
9266 * @ctxt: an XML parser context
9267 * @terminate: last chunk indicator
9268 *
9269 * Try to progress on parsing
9270 *
9271 * Returns zero if no parsing was possible
9272 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009273static int
Owen Taylor3473f882001-02-23 17:55:21 +00009274xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9275 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009276 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009277 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009278 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009279
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009280 if (ctxt->input == NULL)
9281 return(0);
9282
Owen Taylor3473f882001-02-23 17:55:21 +00009283#ifdef DEBUG_PUSH
9284 switch (ctxt->instate) {
9285 case XML_PARSER_EOF:
9286 xmlGenericError(xmlGenericErrorContext,
9287 "PP: try EOF\n"); break;
9288 case XML_PARSER_START:
9289 xmlGenericError(xmlGenericErrorContext,
9290 "PP: try START\n"); break;
9291 case XML_PARSER_MISC:
9292 xmlGenericError(xmlGenericErrorContext,
9293 "PP: try MISC\n");break;
9294 case XML_PARSER_COMMENT:
9295 xmlGenericError(xmlGenericErrorContext,
9296 "PP: try COMMENT\n");break;
9297 case XML_PARSER_PROLOG:
9298 xmlGenericError(xmlGenericErrorContext,
9299 "PP: try PROLOG\n");break;
9300 case XML_PARSER_START_TAG:
9301 xmlGenericError(xmlGenericErrorContext,
9302 "PP: try START_TAG\n");break;
9303 case XML_PARSER_CONTENT:
9304 xmlGenericError(xmlGenericErrorContext,
9305 "PP: try CONTENT\n");break;
9306 case XML_PARSER_CDATA_SECTION:
9307 xmlGenericError(xmlGenericErrorContext,
9308 "PP: try CDATA_SECTION\n");break;
9309 case XML_PARSER_END_TAG:
9310 xmlGenericError(xmlGenericErrorContext,
9311 "PP: try END_TAG\n");break;
9312 case XML_PARSER_ENTITY_DECL:
9313 xmlGenericError(xmlGenericErrorContext,
9314 "PP: try ENTITY_DECL\n");break;
9315 case XML_PARSER_ENTITY_VALUE:
9316 xmlGenericError(xmlGenericErrorContext,
9317 "PP: try ENTITY_VALUE\n");break;
9318 case XML_PARSER_ATTRIBUTE_VALUE:
9319 xmlGenericError(xmlGenericErrorContext,
9320 "PP: try ATTRIBUTE_VALUE\n");break;
9321 case XML_PARSER_DTD:
9322 xmlGenericError(xmlGenericErrorContext,
9323 "PP: try DTD\n");break;
9324 case XML_PARSER_EPILOG:
9325 xmlGenericError(xmlGenericErrorContext,
9326 "PP: try EPILOG\n");break;
9327 case XML_PARSER_PI:
9328 xmlGenericError(xmlGenericErrorContext,
9329 "PP: try PI\n");break;
9330 case XML_PARSER_IGNORE:
9331 xmlGenericError(xmlGenericErrorContext,
9332 "PP: try IGNORE\n");break;
9333 }
9334#endif
9335
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009336 if ((ctxt->input != NULL) &&
9337 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009338 xmlSHRINK(ctxt);
9339 ctxt->checkIndex = 0;
9340 }
9341 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009342
Daniel Veillarda880b122003-04-21 21:36:41 +00009343 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009344 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009345 return(0);
9346
9347
Owen Taylor3473f882001-02-23 17:55:21 +00009348 /*
9349 * Pop-up of finished entities.
9350 */
9351 while ((RAW == 0) && (ctxt->inputNr > 1))
9352 xmlPopInput(ctxt);
9353
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009354 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009355 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009356 avail = ctxt->input->length -
9357 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009358 else {
9359 /*
9360 * If we are operating on converted input, try to flush
9361 * remainng chars to avoid them stalling in the non-converted
9362 * buffer.
9363 */
9364 if ((ctxt->input->buf->raw != NULL) &&
9365 (ctxt->input->buf->raw->use > 0)) {
9366 int base = ctxt->input->base -
9367 ctxt->input->buf->buffer->content;
9368 int current = ctxt->input->cur - ctxt->input->base;
9369
9370 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9371 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9372 ctxt->input->cur = ctxt->input->base + current;
9373 ctxt->input->end =
9374 &ctxt->input->buf->buffer->content[
9375 ctxt->input->buf->buffer->use];
9376 }
9377 avail = ctxt->input->buf->buffer->use -
9378 (ctxt->input->cur - ctxt->input->base);
9379 }
Owen Taylor3473f882001-02-23 17:55:21 +00009380 if (avail < 1)
9381 goto done;
9382 switch (ctxt->instate) {
9383 case XML_PARSER_EOF:
9384 /*
9385 * Document parsing is done !
9386 */
9387 goto done;
9388 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009389 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9390 xmlChar start[4];
9391 xmlCharEncoding enc;
9392
9393 /*
9394 * Very first chars read from the document flow.
9395 */
9396 if (avail < 4)
9397 goto done;
9398
9399 /*
9400 * Get the 4 first bytes and decode the charset
9401 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009402 * plug some encoding conversion routines,
9403 * else xmlSwitchEncoding will set to (default)
9404 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009405 */
9406 start[0] = RAW;
9407 start[1] = NXT(1);
9408 start[2] = NXT(2);
9409 start[3] = NXT(3);
9410 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009411 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009412 break;
9413 }
Owen Taylor3473f882001-02-23 17:55:21 +00009414
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009415 if (avail < 2)
9416 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009417 cur = ctxt->input->cur[0];
9418 next = ctxt->input->cur[1];
9419 if (cur == 0) {
9420 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9421 ctxt->sax->setDocumentLocator(ctxt->userData,
9422 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009423 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009424 ctxt->instate = XML_PARSER_EOF;
9425#ifdef DEBUG_PUSH
9426 xmlGenericError(xmlGenericErrorContext,
9427 "PP: entering EOF\n");
9428#endif
9429 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9430 ctxt->sax->endDocument(ctxt->userData);
9431 goto done;
9432 }
9433 if ((cur == '<') && (next == '?')) {
9434 /* PI or XML decl */
9435 if (avail < 5) return(ret);
9436 if ((!terminate) &&
9437 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9438 return(ret);
9439 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9440 ctxt->sax->setDocumentLocator(ctxt->userData,
9441 &xmlDefaultSAXLocator);
9442 if ((ctxt->input->cur[2] == 'x') &&
9443 (ctxt->input->cur[3] == 'm') &&
9444 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009445 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009446 ret += 5;
9447#ifdef DEBUG_PUSH
9448 xmlGenericError(xmlGenericErrorContext,
9449 "PP: Parsing XML Decl\n");
9450#endif
9451 xmlParseXMLDecl(ctxt);
9452 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9453 /*
9454 * The XML REC instructs us to stop parsing right
9455 * here
9456 */
9457 ctxt->instate = XML_PARSER_EOF;
9458 return(0);
9459 }
9460 ctxt->standalone = ctxt->input->standalone;
9461 if ((ctxt->encoding == NULL) &&
9462 (ctxt->input->encoding != NULL))
9463 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9464 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9465 (!ctxt->disableSAX))
9466 ctxt->sax->startDocument(ctxt->userData);
9467 ctxt->instate = XML_PARSER_MISC;
9468#ifdef DEBUG_PUSH
9469 xmlGenericError(xmlGenericErrorContext,
9470 "PP: entering MISC\n");
9471#endif
9472 } else {
9473 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9474 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9475 (!ctxt->disableSAX))
9476 ctxt->sax->startDocument(ctxt->userData);
9477 ctxt->instate = XML_PARSER_MISC;
9478#ifdef DEBUG_PUSH
9479 xmlGenericError(xmlGenericErrorContext,
9480 "PP: entering MISC\n");
9481#endif
9482 }
9483 } else {
9484 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9485 ctxt->sax->setDocumentLocator(ctxt->userData,
9486 &xmlDefaultSAXLocator);
9487 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009488 if (ctxt->version == NULL) {
9489 xmlErrMemory(ctxt, NULL);
9490 break;
9491 }
Owen Taylor3473f882001-02-23 17:55:21 +00009492 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9493 (!ctxt->disableSAX))
9494 ctxt->sax->startDocument(ctxt->userData);
9495 ctxt->instate = XML_PARSER_MISC;
9496#ifdef DEBUG_PUSH
9497 xmlGenericError(xmlGenericErrorContext,
9498 "PP: entering MISC\n");
9499#endif
9500 }
9501 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009502 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009503 const xmlChar *name;
9504 const xmlChar *prefix;
9505 const xmlChar *URI;
9506 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009507
9508 if ((avail < 2) && (ctxt->inputNr == 1))
9509 goto done;
9510 cur = ctxt->input->cur[0];
9511 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009512 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009513 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009514 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9515 ctxt->sax->endDocument(ctxt->userData);
9516 goto done;
9517 }
9518 if (!terminate) {
9519 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009520 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009521 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009522 goto done;
9523 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9524 goto done;
9525 }
9526 }
9527 if (ctxt->spaceNr == 0)
9528 spacePush(ctxt, -1);
9529 else
9530 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009531#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009532 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009533#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009534 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009535#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009536 else
9537 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009538#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009539 if (name == NULL) {
9540 spacePop(ctxt);
9541 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009542 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9543 ctxt->sax->endDocument(ctxt->userData);
9544 goto done;
9545 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009546#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009547 /*
9548 * [ VC: Root Element Type ]
9549 * The Name in the document type declaration must match
9550 * the element type of the root element.
9551 */
9552 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9553 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9554 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009555#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009556
9557 /*
9558 * Check for an Empty Element.
9559 */
9560 if ((RAW == '/') && (NXT(1) == '>')) {
9561 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009562
9563 if (ctxt->sax2) {
9564 if ((ctxt->sax != NULL) &&
9565 (ctxt->sax->endElementNs != NULL) &&
9566 (!ctxt->disableSAX))
9567 ctxt->sax->endElementNs(ctxt->userData, name,
9568 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009569 if (ctxt->nsNr - nsNr > 0)
9570 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009571#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009572 } else {
9573 if ((ctxt->sax != NULL) &&
9574 (ctxt->sax->endElement != NULL) &&
9575 (!ctxt->disableSAX))
9576 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009577#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009578 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009579 spacePop(ctxt);
9580 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009581 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009582 } else {
9583 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009584 }
9585 break;
9586 }
9587 if (RAW == '>') {
9588 NEXT;
9589 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009590 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009591 "Couldn't find end of Start Tag %s\n",
9592 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009593 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009594 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009595 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009596 if (ctxt->sax2)
9597 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009598#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009599 else
9600 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009601#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009602
Daniel Veillarda880b122003-04-21 21:36:41 +00009603 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009604 break;
9605 }
9606 case XML_PARSER_CONTENT: {
9607 const xmlChar *test;
9608 unsigned int cons;
9609 if ((avail < 2) && (ctxt->inputNr == 1))
9610 goto done;
9611 cur = ctxt->input->cur[0];
9612 next = ctxt->input->cur[1];
9613
9614 test = CUR_PTR;
9615 cons = ctxt->input->consumed;
9616 if ((cur == '<') && (next == '/')) {
9617 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009618 break;
9619 } else if ((cur == '<') && (next == '?')) {
9620 if ((!terminate) &&
9621 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9622 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009623 xmlParsePI(ctxt);
9624 } else if ((cur == '<') && (next != '!')) {
9625 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009626 break;
9627 } else if ((cur == '<') && (next == '!') &&
9628 (ctxt->input->cur[2] == '-') &&
9629 (ctxt->input->cur[3] == '-')) {
9630 if ((!terminate) &&
9631 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9632 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009633 xmlParseComment(ctxt);
9634 ctxt->instate = XML_PARSER_CONTENT;
9635 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9636 (ctxt->input->cur[2] == '[') &&
9637 (ctxt->input->cur[3] == 'C') &&
9638 (ctxt->input->cur[4] == 'D') &&
9639 (ctxt->input->cur[5] == 'A') &&
9640 (ctxt->input->cur[6] == 'T') &&
9641 (ctxt->input->cur[7] == 'A') &&
9642 (ctxt->input->cur[8] == '[')) {
9643 SKIP(9);
9644 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009645 break;
9646 } else if ((cur == '<') && (next == '!') &&
9647 (avail < 9)) {
9648 goto done;
9649 } else if (cur == '&') {
9650 if ((!terminate) &&
9651 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9652 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009653 xmlParseReference(ctxt);
9654 } else {
9655 /* TODO Avoid the extra copy, handle directly !!! */
9656 /*
9657 * Goal of the following test is:
9658 * - minimize calls to the SAX 'character' callback
9659 * when they are mergeable
9660 * - handle an problem for isBlank when we only parse
9661 * a sequence of blank chars and the next one is
9662 * not available to check against '<' presence.
9663 * - tries to homogenize the differences in SAX
9664 * callbacks between the push and pull versions
9665 * of the parser.
9666 */
9667 if ((ctxt->inputNr == 1) &&
9668 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9669 if (!terminate) {
9670 if (ctxt->progressive) {
9671 if ((lastlt == NULL) ||
9672 (ctxt->input->cur > lastlt))
9673 goto done;
9674 } else if (xmlParseLookupSequence(ctxt,
9675 '<', 0, 0) < 0) {
9676 goto done;
9677 }
9678 }
9679 }
9680 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009681 xmlParseCharData(ctxt, 0);
9682 }
9683 /*
9684 * Pop-up of finished entities.
9685 */
9686 while ((RAW == 0) && (ctxt->inputNr > 1))
9687 xmlPopInput(ctxt);
9688 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009689 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9690 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009691 ctxt->instate = XML_PARSER_EOF;
9692 break;
9693 }
9694 break;
9695 }
9696 case XML_PARSER_END_TAG:
9697 if (avail < 2)
9698 goto done;
9699 if (!terminate) {
9700 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009701 /* > can be found unescaped in attribute values */
9702 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009703 goto done;
9704 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9705 goto done;
9706 }
9707 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009708 if (ctxt->sax2) {
9709 xmlParseEndTag2(ctxt,
9710 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9711 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009712 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009713 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009714 }
9715#ifdef LIBXML_SAX1_ENABLED
9716 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009717 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009718#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009719 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009720 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009721 } else {
9722 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009723 }
9724 break;
9725 case XML_PARSER_CDATA_SECTION: {
9726 /*
9727 * The Push mode need to have the SAX callback for
9728 * cdataBlock merge back contiguous callbacks.
9729 */
9730 int base;
9731
9732 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9733 if (base < 0) {
9734 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009735 int tmp;
9736
9737 tmp = xmlCheckCdataPush(ctxt->input->cur,
9738 XML_PARSER_BIG_BUFFER_SIZE);
9739 if (tmp < 0) {
9740 tmp = -tmp;
9741 ctxt->input->cur += tmp;
9742 goto encoding_error;
9743 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009744 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9745 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009746 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009747 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009748 else if (ctxt->sax->characters != NULL)
9749 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009750 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009751 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009752 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009753 ctxt->checkIndex = 0;
9754 }
9755 goto done;
9756 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009757 int tmp;
9758
9759 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
9760 if ((tmp < 0) || (tmp != base)) {
9761 tmp = -tmp;
9762 ctxt->input->cur += tmp;
9763 goto encoding_error;
9764 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009765 if ((ctxt->sax != NULL) && (base > 0) &&
9766 (!ctxt->disableSAX)) {
9767 if (ctxt->sax->cdataBlock != NULL)
9768 ctxt->sax->cdataBlock(ctxt->userData,
9769 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009770 else if (ctxt->sax->characters != NULL)
9771 ctxt->sax->characters(ctxt->userData,
9772 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009773 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009774 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009775 ctxt->checkIndex = 0;
9776 ctxt->instate = XML_PARSER_CONTENT;
9777#ifdef DEBUG_PUSH
9778 xmlGenericError(xmlGenericErrorContext,
9779 "PP: entering CONTENT\n");
9780#endif
9781 }
9782 break;
9783 }
Owen Taylor3473f882001-02-23 17:55:21 +00009784 case XML_PARSER_MISC:
9785 SKIP_BLANKS;
9786 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009787 avail = ctxt->input->length -
9788 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009789 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009790 avail = ctxt->input->buf->buffer->use -
9791 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009792 if (avail < 2)
9793 goto done;
9794 cur = ctxt->input->cur[0];
9795 next = ctxt->input->cur[1];
9796 if ((cur == '<') && (next == '?')) {
9797 if ((!terminate) &&
9798 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9799 goto done;
9800#ifdef DEBUG_PUSH
9801 xmlGenericError(xmlGenericErrorContext,
9802 "PP: Parsing PI\n");
9803#endif
9804 xmlParsePI(ctxt);
9805 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009806 (ctxt->input->cur[2] == '-') &&
9807 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009808 if ((!terminate) &&
9809 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9810 goto done;
9811#ifdef DEBUG_PUSH
9812 xmlGenericError(xmlGenericErrorContext,
9813 "PP: Parsing Comment\n");
9814#endif
9815 xmlParseComment(ctxt);
9816 ctxt->instate = XML_PARSER_MISC;
9817 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009818 (ctxt->input->cur[2] == 'D') &&
9819 (ctxt->input->cur[3] == 'O') &&
9820 (ctxt->input->cur[4] == 'C') &&
9821 (ctxt->input->cur[5] == 'T') &&
9822 (ctxt->input->cur[6] == 'Y') &&
9823 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009824 (ctxt->input->cur[8] == 'E')) {
9825 if ((!terminate) &&
9826 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9827 goto done;
9828#ifdef DEBUG_PUSH
9829 xmlGenericError(xmlGenericErrorContext,
9830 "PP: Parsing internal subset\n");
9831#endif
9832 ctxt->inSubset = 1;
9833 xmlParseDocTypeDecl(ctxt);
9834 if (RAW == '[') {
9835 ctxt->instate = XML_PARSER_DTD;
9836#ifdef DEBUG_PUSH
9837 xmlGenericError(xmlGenericErrorContext,
9838 "PP: entering DTD\n");
9839#endif
9840 } else {
9841 /*
9842 * Create and update the external subset.
9843 */
9844 ctxt->inSubset = 2;
9845 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9846 (ctxt->sax->externalSubset != NULL))
9847 ctxt->sax->externalSubset(ctxt->userData,
9848 ctxt->intSubName, ctxt->extSubSystem,
9849 ctxt->extSubURI);
9850 ctxt->inSubset = 0;
9851 ctxt->instate = XML_PARSER_PROLOG;
9852#ifdef DEBUG_PUSH
9853 xmlGenericError(xmlGenericErrorContext,
9854 "PP: entering PROLOG\n");
9855#endif
9856 }
9857 } else if ((cur == '<') && (next == '!') &&
9858 (avail < 9)) {
9859 goto done;
9860 } else {
9861 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009862 ctxt->progressive = 1;
9863 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009864#ifdef DEBUG_PUSH
9865 xmlGenericError(xmlGenericErrorContext,
9866 "PP: entering START_TAG\n");
9867#endif
9868 }
9869 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009870 case XML_PARSER_PROLOG:
9871 SKIP_BLANKS;
9872 if (ctxt->input->buf == NULL)
9873 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9874 else
9875 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9876 if (avail < 2)
9877 goto done;
9878 cur = ctxt->input->cur[0];
9879 next = ctxt->input->cur[1];
9880 if ((cur == '<') && (next == '?')) {
9881 if ((!terminate) &&
9882 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9883 goto done;
9884#ifdef DEBUG_PUSH
9885 xmlGenericError(xmlGenericErrorContext,
9886 "PP: Parsing PI\n");
9887#endif
9888 xmlParsePI(ctxt);
9889 } else if ((cur == '<') && (next == '!') &&
9890 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9891 if ((!terminate) &&
9892 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9893 goto done;
9894#ifdef DEBUG_PUSH
9895 xmlGenericError(xmlGenericErrorContext,
9896 "PP: Parsing Comment\n");
9897#endif
9898 xmlParseComment(ctxt);
9899 ctxt->instate = XML_PARSER_PROLOG;
9900 } else if ((cur == '<') && (next == '!') &&
9901 (avail < 4)) {
9902 goto done;
9903 } else {
9904 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009905 if (ctxt->progressive == 0)
9906 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009907 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009908#ifdef DEBUG_PUSH
9909 xmlGenericError(xmlGenericErrorContext,
9910 "PP: entering START_TAG\n");
9911#endif
9912 }
9913 break;
9914 case XML_PARSER_EPILOG:
9915 SKIP_BLANKS;
9916 if (ctxt->input->buf == NULL)
9917 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9918 else
9919 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9920 if (avail < 2)
9921 goto done;
9922 cur = ctxt->input->cur[0];
9923 next = ctxt->input->cur[1];
9924 if ((cur == '<') && (next == '?')) {
9925 if ((!terminate) &&
9926 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9927 goto done;
9928#ifdef DEBUG_PUSH
9929 xmlGenericError(xmlGenericErrorContext,
9930 "PP: Parsing PI\n");
9931#endif
9932 xmlParsePI(ctxt);
9933 ctxt->instate = XML_PARSER_EPILOG;
9934 } else if ((cur == '<') && (next == '!') &&
9935 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9936 if ((!terminate) &&
9937 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9938 goto done;
9939#ifdef DEBUG_PUSH
9940 xmlGenericError(xmlGenericErrorContext,
9941 "PP: Parsing Comment\n");
9942#endif
9943 xmlParseComment(ctxt);
9944 ctxt->instate = XML_PARSER_EPILOG;
9945 } else if ((cur == '<') && (next == '!') &&
9946 (avail < 4)) {
9947 goto done;
9948 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009949 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009950 ctxt->instate = XML_PARSER_EOF;
9951#ifdef DEBUG_PUSH
9952 xmlGenericError(xmlGenericErrorContext,
9953 "PP: entering EOF\n");
9954#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009955 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009956 ctxt->sax->endDocument(ctxt->userData);
9957 goto done;
9958 }
9959 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009960 case XML_PARSER_DTD: {
9961 /*
9962 * Sorry but progressive parsing of the internal subset
9963 * is not expected to be supported. We first check that
9964 * the full content of the internal subset is available and
9965 * the parsing is launched only at that point.
9966 * Internal subset ends up with "']' S? '>'" in an unescaped
9967 * section and not in a ']]>' sequence which are conditional
9968 * sections (whoever argued to keep that crap in XML deserve
9969 * a place in hell !).
9970 */
9971 int base, i;
9972 xmlChar *buf;
9973 xmlChar quote = 0;
9974
9975 base = ctxt->input->cur - ctxt->input->base;
9976 if (base < 0) return(0);
9977 if (ctxt->checkIndex > base)
9978 base = ctxt->checkIndex;
9979 buf = ctxt->input->buf->buffer->content;
9980 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9981 base++) {
9982 if (quote != 0) {
9983 if (buf[base] == quote)
9984 quote = 0;
9985 continue;
9986 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009987 if ((quote == 0) && (buf[base] == '<')) {
9988 int found = 0;
9989 /* special handling of comments */
9990 if (((unsigned int) base + 4 <
9991 ctxt->input->buf->buffer->use) &&
9992 (buf[base + 1] == '!') &&
9993 (buf[base + 2] == '-') &&
9994 (buf[base + 3] == '-')) {
9995 for (;(unsigned int) base + 3 <
9996 ctxt->input->buf->buffer->use; base++) {
9997 if ((buf[base] == '-') &&
9998 (buf[base + 1] == '-') &&
9999 (buf[base + 2] == '>')) {
10000 found = 1;
10001 base += 2;
10002 break;
10003 }
10004 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010005 if (!found) {
10006#if 0
10007 fprintf(stderr, "unfinished comment\n");
10008#endif
10009 break; /* for */
10010 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010011 continue;
10012 }
10013 }
Owen Taylor3473f882001-02-23 17:55:21 +000010014 if (buf[base] == '"') {
10015 quote = '"';
10016 continue;
10017 }
10018 if (buf[base] == '\'') {
10019 quote = '\'';
10020 continue;
10021 }
10022 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010023#if 0
10024 fprintf(stderr, "%c%c%c%c: ", buf[base],
10025 buf[base + 1], buf[base + 2], buf[base + 3]);
10026#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010027 if ((unsigned int) base +1 >=
10028 ctxt->input->buf->buffer->use)
10029 break;
10030 if (buf[base + 1] == ']') {
10031 /* conditional crap, skip both ']' ! */
10032 base++;
10033 continue;
10034 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010035 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010036 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10037 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010038 if (buf[base + i] == '>') {
10039#if 0
10040 fprintf(stderr, "found\n");
10041#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010042 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010043 }
10044 if (!IS_BLANK_CH(buf[base + i])) {
10045#if 0
10046 fprintf(stderr, "not found\n");
10047#endif
10048 goto not_end_of_int_subset;
10049 }
Owen Taylor3473f882001-02-23 17:55:21 +000010050 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010051#if 0
10052 fprintf(stderr, "end of stream\n");
10053#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010054 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010055
Owen Taylor3473f882001-02-23 17:55:21 +000010056 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010057not_end_of_int_subset:
10058 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010059 }
10060 /*
10061 * We didn't found the end of the Internal subset
10062 */
Owen Taylor3473f882001-02-23 17:55:21 +000010063#ifdef DEBUG_PUSH
10064 if (next == 0)
10065 xmlGenericError(xmlGenericErrorContext,
10066 "PP: lookup of int subset end filed\n");
10067#endif
10068 goto done;
10069
10070found_end_int_subset:
10071 xmlParseInternalSubset(ctxt);
10072 ctxt->inSubset = 2;
10073 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10074 (ctxt->sax->externalSubset != NULL))
10075 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10076 ctxt->extSubSystem, ctxt->extSubURI);
10077 ctxt->inSubset = 0;
10078 ctxt->instate = XML_PARSER_PROLOG;
10079 ctxt->checkIndex = 0;
10080#ifdef DEBUG_PUSH
10081 xmlGenericError(xmlGenericErrorContext,
10082 "PP: entering PROLOG\n");
10083#endif
10084 break;
10085 }
10086 case XML_PARSER_COMMENT:
10087 xmlGenericError(xmlGenericErrorContext,
10088 "PP: internal error, state == COMMENT\n");
10089 ctxt->instate = XML_PARSER_CONTENT;
10090#ifdef DEBUG_PUSH
10091 xmlGenericError(xmlGenericErrorContext,
10092 "PP: entering CONTENT\n");
10093#endif
10094 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010095 case XML_PARSER_IGNORE:
10096 xmlGenericError(xmlGenericErrorContext,
10097 "PP: internal error, state == IGNORE");
10098 ctxt->instate = XML_PARSER_DTD;
10099#ifdef DEBUG_PUSH
10100 xmlGenericError(xmlGenericErrorContext,
10101 "PP: entering DTD\n");
10102#endif
10103 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010104 case XML_PARSER_PI:
10105 xmlGenericError(xmlGenericErrorContext,
10106 "PP: internal error, state == PI\n");
10107 ctxt->instate = XML_PARSER_CONTENT;
10108#ifdef DEBUG_PUSH
10109 xmlGenericError(xmlGenericErrorContext,
10110 "PP: entering CONTENT\n");
10111#endif
10112 break;
10113 case XML_PARSER_ENTITY_DECL:
10114 xmlGenericError(xmlGenericErrorContext,
10115 "PP: internal error, state == ENTITY_DECL\n");
10116 ctxt->instate = XML_PARSER_DTD;
10117#ifdef DEBUG_PUSH
10118 xmlGenericError(xmlGenericErrorContext,
10119 "PP: entering DTD\n");
10120#endif
10121 break;
10122 case XML_PARSER_ENTITY_VALUE:
10123 xmlGenericError(xmlGenericErrorContext,
10124 "PP: internal error, state == ENTITY_VALUE\n");
10125 ctxt->instate = XML_PARSER_CONTENT;
10126#ifdef DEBUG_PUSH
10127 xmlGenericError(xmlGenericErrorContext,
10128 "PP: entering DTD\n");
10129#endif
10130 break;
10131 case XML_PARSER_ATTRIBUTE_VALUE:
10132 xmlGenericError(xmlGenericErrorContext,
10133 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10134 ctxt->instate = XML_PARSER_START_TAG;
10135#ifdef DEBUG_PUSH
10136 xmlGenericError(xmlGenericErrorContext,
10137 "PP: entering START_TAG\n");
10138#endif
10139 break;
10140 case XML_PARSER_SYSTEM_LITERAL:
10141 xmlGenericError(xmlGenericErrorContext,
10142 "PP: internal error, state == SYSTEM_LITERAL\n");
10143 ctxt->instate = XML_PARSER_START_TAG;
10144#ifdef DEBUG_PUSH
10145 xmlGenericError(xmlGenericErrorContext,
10146 "PP: entering START_TAG\n");
10147#endif
10148 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010149 case XML_PARSER_PUBLIC_LITERAL:
10150 xmlGenericError(xmlGenericErrorContext,
10151 "PP: internal error, state == PUBLIC_LITERAL\n");
10152 ctxt->instate = XML_PARSER_START_TAG;
10153#ifdef DEBUG_PUSH
10154 xmlGenericError(xmlGenericErrorContext,
10155 "PP: entering START_TAG\n");
10156#endif
10157 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010158 }
10159 }
10160done:
10161#ifdef DEBUG_PUSH
10162 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10163#endif
10164 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010165encoding_error:
10166 {
10167 char buffer[150];
10168
10169 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10170 ctxt->input->cur[0], ctxt->input->cur[1],
10171 ctxt->input->cur[2], ctxt->input->cur[3]);
10172 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10173 "Input is not proper UTF-8, indicate encoding !\n%s",
10174 BAD_CAST buffer, NULL);
10175 }
10176 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010177}
10178
10179/**
Owen Taylor3473f882001-02-23 17:55:21 +000010180 * xmlParseChunk:
10181 * @ctxt: an XML parser context
10182 * @chunk: an char array
10183 * @size: the size in byte of the chunk
10184 * @terminate: last chunk indicator
10185 *
10186 * Parse a Chunk of memory
10187 *
10188 * Returns zero if no error, the xmlParserErrors otherwise.
10189 */
10190int
10191xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10192 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010193 if (ctxt == NULL)
10194 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010195 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010196 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010197 if (ctxt->instate == XML_PARSER_START)
10198 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010199 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10200 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10201 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10202 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010203 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010204
William M. Bracka3215c72004-07-31 16:24:01 +000010205 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10206 if (res < 0) {
10207 ctxt->errNo = XML_PARSER_EOF;
10208 ctxt->disableSAX = 1;
10209 return (XML_PARSER_EOF);
10210 }
Owen Taylor3473f882001-02-23 17:55:21 +000010211 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10212 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010213 ctxt->input->end =
10214 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010215#ifdef DEBUG_PUSH
10216 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10217#endif
10218
Owen Taylor3473f882001-02-23 17:55:21 +000010219 } else if (ctxt->instate != XML_PARSER_EOF) {
10220 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10221 xmlParserInputBufferPtr in = ctxt->input->buf;
10222 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10223 (in->raw != NULL)) {
10224 int nbchars;
10225
10226 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10227 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010228 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010229 xmlGenericError(xmlGenericErrorContext,
10230 "xmlParseChunk: encoder error\n");
10231 return(XML_ERR_INVALID_ENCODING);
10232 }
10233 }
10234 }
10235 }
10236 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillard14412512005-01-21 23:53:26 +000010237 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010238 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010239 if (terminate) {
10240 /*
10241 * Check for termination
10242 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010243 int avail = 0;
10244
10245 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010246 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010247 avail = ctxt->input->length -
10248 (ctxt->input->cur - ctxt->input->base);
10249 else
10250 avail = ctxt->input->buf->buffer->use -
10251 (ctxt->input->cur - ctxt->input->base);
10252 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010253
Owen Taylor3473f882001-02-23 17:55:21 +000010254 if ((ctxt->instate != XML_PARSER_EOF) &&
10255 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010256 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010257 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010258 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010259 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010260 }
Owen Taylor3473f882001-02-23 17:55:21 +000010261 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010262 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010263 ctxt->sax->endDocument(ctxt->userData);
10264 }
10265 ctxt->instate = XML_PARSER_EOF;
10266 }
10267 return((xmlParserErrors) ctxt->errNo);
10268}
10269
10270/************************************************************************
10271 * *
10272 * I/O front end functions to the parser *
10273 * *
10274 ************************************************************************/
10275
10276/**
Owen Taylor3473f882001-02-23 17:55:21 +000010277 * xmlCreatePushParserCtxt:
10278 * @sax: a SAX handler
10279 * @user_data: The user data returned on SAX callbacks
10280 * @chunk: a pointer to an array of chars
10281 * @size: number of chars in the array
10282 * @filename: an optional file name or URI
10283 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010284 * Create a parser context for using the XML parser in push mode.
10285 * If @buffer and @size are non-NULL, the data is used to detect
10286 * the encoding. The remaining characters will be parsed so they
10287 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010288 * To allow content encoding detection, @size should be >= 4
10289 * The value of @filename is used for fetching external entities
10290 * and error/warning reports.
10291 *
10292 * Returns the new parser context or NULL
10293 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010294
Owen Taylor3473f882001-02-23 17:55:21 +000010295xmlParserCtxtPtr
10296xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10297 const char *chunk, int size, const char *filename) {
10298 xmlParserCtxtPtr ctxt;
10299 xmlParserInputPtr inputStream;
10300 xmlParserInputBufferPtr buf;
10301 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10302
10303 /*
10304 * plug some encoding conversion routines
10305 */
10306 if ((chunk != NULL) && (size >= 4))
10307 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10308
10309 buf = xmlAllocParserInputBuffer(enc);
10310 if (buf == NULL) return(NULL);
10311
10312 ctxt = xmlNewParserCtxt();
10313 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010314 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010315 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010316 return(NULL);
10317 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010318 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010319 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10320 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010321 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010322 xmlFreeParserInputBuffer(buf);
10323 xmlFreeParserCtxt(ctxt);
10324 return(NULL);
10325 }
Owen Taylor3473f882001-02-23 17:55:21 +000010326 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010327#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010328 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010329#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010330 xmlFree(ctxt->sax);
10331 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10332 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010333 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010334 xmlFreeParserInputBuffer(buf);
10335 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010336 return(NULL);
10337 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010338 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10339 if (sax->initialized == XML_SAX2_MAGIC)
10340 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10341 else
10342 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010343 if (user_data != NULL)
10344 ctxt->userData = user_data;
10345 }
10346 if (filename == NULL) {
10347 ctxt->directory = NULL;
10348 } else {
10349 ctxt->directory = xmlParserGetDirectory(filename);
10350 }
10351
10352 inputStream = xmlNewInputStream(ctxt);
10353 if (inputStream == NULL) {
10354 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010355 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010356 return(NULL);
10357 }
10358
10359 if (filename == NULL)
10360 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010361 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010362 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010363 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010364 if (inputStream->filename == NULL) {
10365 xmlFreeParserCtxt(ctxt);
10366 xmlFreeParserInputBuffer(buf);
10367 return(NULL);
10368 }
10369 }
Owen Taylor3473f882001-02-23 17:55:21 +000010370 inputStream->buf = buf;
10371 inputStream->base = inputStream->buf->buffer->content;
10372 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010373 inputStream->end =
10374 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010375
10376 inputPush(ctxt, inputStream);
10377
William M. Brack3a1cd212005-02-11 14:35:54 +000010378 /*
10379 * If the caller didn't provide an initial 'chunk' for determining
10380 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10381 * that it can be automatically determined later
10382 */
10383 if ((size == 0) || (chunk == NULL)) {
10384 ctxt->charset = XML_CHAR_ENCODING_NONE;
10385 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010386 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10387 int cur = ctxt->input->cur - ctxt->input->base;
10388
Owen Taylor3473f882001-02-23 17:55:21 +000010389 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010390
10391 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10392 ctxt->input->cur = ctxt->input->base + cur;
10393 ctxt->input->end =
10394 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010395#ifdef DEBUG_PUSH
10396 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10397#endif
10398 }
10399
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010400 if (enc != XML_CHAR_ENCODING_NONE) {
10401 xmlSwitchEncoding(ctxt, enc);
10402 }
10403
Owen Taylor3473f882001-02-23 17:55:21 +000010404 return(ctxt);
10405}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010406#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010407
10408/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010409 * xmlStopParser:
10410 * @ctxt: an XML parser context
10411 *
10412 * Blocks further parser processing
10413 */
10414void
10415xmlStopParser(xmlParserCtxtPtr ctxt) {
10416 if (ctxt == NULL)
10417 return;
10418 ctxt->instate = XML_PARSER_EOF;
10419 ctxt->disableSAX = 1;
10420 if (ctxt->input != NULL) {
10421 ctxt->input->cur = BAD_CAST"";
10422 ctxt->input->base = ctxt->input->cur;
10423 }
10424}
10425
10426/**
Owen Taylor3473f882001-02-23 17:55:21 +000010427 * xmlCreateIOParserCtxt:
10428 * @sax: a SAX handler
10429 * @user_data: The user data returned on SAX callbacks
10430 * @ioread: an I/O read function
10431 * @ioclose: an I/O close function
10432 * @ioctx: an I/O handler
10433 * @enc: the charset encoding if known
10434 *
10435 * Create a parser context for using the XML parser with an existing
10436 * I/O stream
10437 *
10438 * Returns the new parser context or NULL
10439 */
10440xmlParserCtxtPtr
10441xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10442 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10443 void *ioctx, xmlCharEncoding enc) {
10444 xmlParserCtxtPtr ctxt;
10445 xmlParserInputPtr inputStream;
10446 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010447
10448 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010449
10450 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10451 if (buf == NULL) return(NULL);
10452
10453 ctxt = xmlNewParserCtxt();
10454 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010455 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010456 return(NULL);
10457 }
10458 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010459#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010460 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010461#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010462 xmlFree(ctxt->sax);
10463 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10464 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010465 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010466 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010467 return(NULL);
10468 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010469 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10470 if (sax->initialized == XML_SAX2_MAGIC)
10471 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10472 else
10473 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010474 if (user_data != NULL)
10475 ctxt->userData = user_data;
10476 }
10477
10478 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10479 if (inputStream == NULL) {
10480 xmlFreeParserCtxt(ctxt);
10481 return(NULL);
10482 }
10483 inputPush(ctxt, inputStream);
10484
10485 return(ctxt);
10486}
10487
Daniel Veillard4432df22003-09-28 18:58:27 +000010488#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010489/************************************************************************
10490 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010491 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010492 * *
10493 ************************************************************************/
10494
10495/**
10496 * xmlIOParseDTD:
10497 * @sax: the SAX handler block or NULL
10498 * @input: an Input Buffer
10499 * @enc: the charset encoding if known
10500 *
10501 * Load and parse a DTD
10502 *
10503 * Returns the resulting xmlDtdPtr or NULL in case of error.
10504 * @input will be freed at parsing end.
10505 */
10506
10507xmlDtdPtr
10508xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10509 xmlCharEncoding enc) {
10510 xmlDtdPtr ret = NULL;
10511 xmlParserCtxtPtr ctxt;
10512 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010513 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010514
10515 if (input == NULL)
10516 return(NULL);
10517
10518 ctxt = xmlNewParserCtxt();
10519 if (ctxt == NULL) {
10520 return(NULL);
10521 }
10522
10523 /*
10524 * Set-up the SAX context
10525 */
10526 if (sax != NULL) {
10527 if (ctxt->sax != NULL)
10528 xmlFree(ctxt->sax);
10529 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010530 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010531 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010532 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010533
10534 /*
10535 * generate a parser input from the I/O handler
10536 */
10537
Daniel Veillard43caefb2003-12-07 19:32:22 +000010538 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010539 if (pinput == NULL) {
10540 if (sax != NULL) ctxt->sax = NULL;
10541 xmlFreeParserCtxt(ctxt);
10542 return(NULL);
10543 }
10544
10545 /*
10546 * plug some encoding conversion routines here.
10547 */
10548 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010549 if (enc != XML_CHAR_ENCODING_NONE) {
10550 xmlSwitchEncoding(ctxt, enc);
10551 }
Owen Taylor3473f882001-02-23 17:55:21 +000010552
10553 pinput->filename = NULL;
10554 pinput->line = 1;
10555 pinput->col = 1;
10556 pinput->base = ctxt->input->cur;
10557 pinput->cur = ctxt->input->cur;
10558 pinput->free = NULL;
10559
10560 /*
10561 * let's parse that entity knowing it's an external subset.
10562 */
10563 ctxt->inSubset = 2;
10564 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10565 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10566 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010567
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010568 if ((enc == XML_CHAR_ENCODING_NONE) &&
10569 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010570 /*
10571 * Get the 4 first bytes and decode the charset
10572 * if enc != XML_CHAR_ENCODING_NONE
10573 * plug some encoding conversion routines.
10574 */
10575 start[0] = RAW;
10576 start[1] = NXT(1);
10577 start[2] = NXT(2);
10578 start[3] = NXT(3);
10579 enc = xmlDetectCharEncoding(start, 4);
10580 if (enc != XML_CHAR_ENCODING_NONE) {
10581 xmlSwitchEncoding(ctxt, enc);
10582 }
10583 }
10584
Owen Taylor3473f882001-02-23 17:55:21 +000010585 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10586
10587 if (ctxt->myDoc != NULL) {
10588 if (ctxt->wellFormed) {
10589 ret = ctxt->myDoc->extSubset;
10590 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010591 if (ret != NULL) {
10592 xmlNodePtr tmp;
10593
10594 ret->doc = NULL;
10595 tmp = ret->children;
10596 while (tmp != NULL) {
10597 tmp->doc = NULL;
10598 tmp = tmp->next;
10599 }
10600 }
Owen Taylor3473f882001-02-23 17:55:21 +000010601 } else {
10602 ret = NULL;
10603 }
10604 xmlFreeDoc(ctxt->myDoc);
10605 ctxt->myDoc = NULL;
10606 }
10607 if (sax != NULL) ctxt->sax = NULL;
10608 xmlFreeParserCtxt(ctxt);
10609
10610 return(ret);
10611}
10612
10613/**
10614 * xmlSAXParseDTD:
10615 * @sax: the SAX handler block
10616 * @ExternalID: a NAME* containing the External ID of the DTD
10617 * @SystemID: a NAME* containing the URL to the DTD
10618 *
10619 * Load and parse an external subset.
10620 *
10621 * Returns the resulting xmlDtdPtr or NULL in case of error.
10622 */
10623
10624xmlDtdPtr
10625xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10626 const xmlChar *SystemID) {
10627 xmlDtdPtr ret = NULL;
10628 xmlParserCtxtPtr ctxt;
10629 xmlParserInputPtr input = NULL;
10630 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010631 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010632
10633 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10634
10635 ctxt = xmlNewParserCtxt();
10636 if (ctxt == NULL) {
10637 return(NULL);
10638 }
10639
10640 /*
10641 * Set-up the SAX context
10642 */
10643 if (sax != NULL) {
10644 if (ctxt->sax != NULL)
10645 xmlFree(ctxt->sax);
10646 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010647 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010648 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010649
10650 /*
10651 * Canonicalise the system ID
10652 */
10653 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010654 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010655 xmlFreeParserCtxt(ctxt);
10656 return(NULL);
10657 }
Owen Taylor3473f882001-02-23 17:55:21 +000010658
10659 /*
10660 * Ask the Entity resolver to load the damn thing
10661 */
10662
10663 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010664 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010665 if (input == NULL) {
10666 if (sax != NULL) ctxt->sax = NULL;
10667 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010668 if (systemIdCanonic != NULL)
10669 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010670 return(NULL);
10671 }
10672
10673 /*
10674 * plug some encoding conversion routines here.
10675 */
10676 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010677 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10678 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10679 xmlSwitchEncoding(ctxt, enc);
10680 }
Owen Taylor3473f882001-02-23 17:55:21 +000010681
10682 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010683 input->filename = (char *) systemIdCanonic;
10684 else
10685 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010686 input->line = 1;
10687 input->col = 1;
10688 input->base = ctxt->input->cur;
10689 input->cur = ctxt->input->cur;
10690 input->free = NULL;
10691
10692 /*
10693 * let's parse that entity knowing it's an external subset.
10694 */
10695 ctxt->inSubset = 2;
10696 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10697 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10698 ExternalID, SystemID);
10699 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10700
10701 if (ctxt->myDoc != NULL) {
10702 if (ctxt->wellFormed) {
10703 ret = ctxt->myDoc->extSubset;
10704 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010705 if (ret != NULL) {
10706 xmlNodePtr tmp;
10707
10708 ret->doc = NULL;
10709 tmp = ret->children;
10710 while (tmp != NULL) {
10711 tmp->doc = NULL;
10712 tmp = tmp->next;
10713 }
10714 }
Owen Taylor3473f882001-02-23 17:55:21 +000010715 } else {
10716 ret = NULL;
10717 }
10718 xmlFreeDoc(ctxt->myDoc);
10719 ctxt->myDoc = NULL;
10720 }
10721 if (sax != NULL) ctxt->sax = NULL;
10722 xmlFreeParserCtxt(ctxt);
10723
10724 return(ret);
10725}
10726
Daniel Veillard4432df22003-09-28 18:58:27 +000010727
Owen Taylor3473f882001-02-23 17:55:21 +000010728/**
10729 * xmlParseDTD:
10730 * @ExternalID: a NAME* containing the External ID of the DTD
10731 * @SystemID: a NAME* containing the URL to the DTD
10732 *
10733 * Load and parse an external subset.
10734 *
10735 * Returns the resulting xmlDtdPtr or NULL in case of error.
10736 */
10737
10738xmlDtdPtr
10739xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10740 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10741}
Daniel Veillard4432df22003-09-28 18:58:27 +000010742#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010743
10744/************************************************************************
10745 * *
10746 * Front ends when parsing an Entity *
10747 * *
10748 ************************************************************************/
10749
10750/**
Owen Taylor3473f882001-02-23 17:55:21 +000010751 * xmlParseCtxtExternalEntity:
10752 * @ctx: the existing parsing context
10753 * @URL: the URL for the entity to load
10754 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010755 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010756 *
10757 * Parse an external general entity within an existing parsing context
10758 * An external general parsed entity is well-formed if it matches the
10759 * production labeled extParsedEnt.
10760 *
10761 * [78] extParsedEnt ::= TextDecl? content
10762 *
10763 * Returns 0 if the entity is well formed, -1 in case of args problem and
10764 * the parser error code otherwise
10765 */
10766
10767int
10768xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010769 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010770 xmlParserCtxtPtr ctxt;
10771 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010772 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010773 xmlSAXHandlerPtr oldsax = NULL;
10774 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010775 xmlChar start[4];
10776 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010777
Daniel Veillardce682bc2004-11-05 17:22:25 +000010778 if (ctx == NULL) return(-1);
10779
Owen Taylor3473f882001-02-23 17:55:21 +000010780 if (ctx->depth > 40) {
10781 return(XML_ERR_ENTITY_LOOP);
10782 }
10783
Daniel Veillardcda96922001-08-21 10:56:31 +000010784 if (lst != NULL)
10785 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010786 if ((URL == NULL) && (ID == NULL))
10787 return(-1);
10788 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10789 return(-1);
10790
10791
10792 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10793 if (ctxt == NULL) return(-1);
10794 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010795 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010796 oldsax = ctxt->sax;
10797 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010798 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010799 newDoc = xmlNewDoc(BAD_CAST "1.0");
10800 if (newDoc == NULL) {
10801 xmlFreeParserCtxt(ctxt);
10802 return(-1);
10803 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010804 if (ctx->myDoc->dict) {
10805 newDoc->dict = ctx->myDoc->dict;
10806 xmlDictReference(newDoc->dict);
10807 }
Owen Taylor3473f882001-02-23 17:55:21 +000010808 if (ctx->myDoc != NULL) {
10809 newDoc->intSubset = ctx->myDoc->intSubset;
10810 newDoc->extSubset = ctx->myDoc->extSubset;
10811 }
10812 if (ctx->myDoc->URL != NULL) {
10813 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10814 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010815 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10816 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010817 ctxt->sax = oldsax;
10818 xmlFreeParserCtxt(ctxt);
10819 newDoc->intSubset = NULL;
10820 newDoc->extSubset = NULL;
10821 xmlFreeDoc(newDoc);
10822 return(-1);
10823 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010824 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010825 nodePush(ctxt, newDoc->children);
10826 if (ctx->myDoc == NULL) {
10827 ctxt->myDoc = newDoc;
10828 } else {
10829 ctxt->myDoc = ctx->myDoc;
10830 newDoc->children->doc = ctx->myDoc;
10831 }
10832
Daniel Veillard87a764e2001-06-20 17:41:10 +000010833 /*
10834 * Get the 4 first bytes and decode the charset
10835 * if enc != XML_CHAR_ENCODING_NONE
10836 * plug some encoding conversion routines.
10837 */
10838 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010839 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10840 start[0] = RAW;
10841 start[1] = NXT(1);
10842 start[2] = NXT(2);
10843 start[3] = NXT(3);
10844 enc = xmlDetectCharEncoding(start, 4);
10845 if (enc != XML_CHAR_ENCODING_NONE) {
10846 xmlSwitchEncoding(ctxt, enc);
10847 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010848 }
10849
Owen Taylor3473f882001-02-23 17:55:21 +000010850 /*
10851 * Parse a possible text declaration first
10852 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010853 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010854 xmlParseTextDecl(ctxt);
10855 }
10856
10857 /*
10858 * Doing validity checking on chunk doesn't make sense
10859 */
10860 ctxt->instate = XML_PARSER_CONTENT;
10861 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010862 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010863 ctxt->loadsubset = ctx->loadsubset;
10864 ctxt->depth = ctx->depth + 1;
10865 ctxt->replaceEntities = ctx->replaceEntities;
10866 if (ctxt->validate) {
10867 ctxt->vctxt.error = ctx->vctxt.error;
10868 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010869 } else {
10870 ctxt->vctxt.error = NULL;
10871 ctxt->vctxt.warning = NULL;
10872 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010873 ctxt->vctxt.nodeTab = NULL;
10874 ctxt->vctxt.nodeNr = 0;
10875 ctxt->vctxt.nodeMax = 0;
10876 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010877 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10878 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010879 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10880 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10881 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010882 ctxt->dictNames = ctx->dictNames;
10883 ctxt->attsDefault = ctx->attsDefault;
10884 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010885 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010886
10887 xmlParseContent(ctxt);
10888
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010889 ctx->validate = ctxt->validate;
10890 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010891 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010892 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010893 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010894 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010895 }
10896 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010897 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010898 }
10899
10900 if (!ctxt->wellFormed) {
10901 if (ctxt->errNo == 0)
10902 ret = 1;
10903 else
10904 ret = ctxt->errNo;
10905 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010906 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010907 xmlNodePtr cur;
10908
10909 /*
10910 * Return the newly created nodeset after unlinking it from
10911 * they pseudo parent.
10912 */
10913 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010914 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010915 while (cur != NULL) {
10916 cur->parent = NULL;
10917 cur = cur->next;
10918 }
10919 newDoc->children->children = NULL;
10920 }
10921 ret = 0;
10922 }
10923 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010924 ctxt->dict = NULL;
10925 ctxt->attsDefault = NULL;
10926 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010927 xmlFreeParserCtxt(ctxt);
10928 newDoc->intSubset = NULL;
10929 newDoc->extSubset = NULL;
10930 xmlFreeDoc(newDoc);
10931
10932 return(ret);
10933}
10934
10935/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010936 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010937 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010938 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010939 * @sax: the SAX handler bloc (possibly NULL)
10940 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10941 * @depth: Used for loop detection, use 0
10942 * @URL: the URL for the entity to load
10943 * @ID: the System ID for the entity to load
10944 * @list: the return value for the set of parsed nodes
10945 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010946 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010947 *
10948 * Returns 0 if the entity is well formed, -1 in case of args problem and
10949 * the parser error code otherwise
10950 */
10951
Daniel Veillard7d515752003-09-26 19:12:37 +000010952static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010953xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10954 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010955 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010956 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010957 xmlParserCtxtPtr ctxt;
10958 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010959 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010960 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010961 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010962 xmlChar start[4];
10963 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010964
10965 if (depth > 40) {
10966 return(XML_ERR_ENTITY_LOOP);
10967 }
10968
10969
10970
10971 if (list != NULL)
10972 *list = NULL;
10973 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010974 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010975 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010976 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010977
10978
10979 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010980 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010981 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010982 if (oldctxt != NULL) {
10983 ctxt->_private = oldctxt->_private;
10984 ctxt->loadsubset = oldctxt->loadsubset;
10985 ctxt->validate = oldctxt->validate;
10986 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010987 ctxt->record_info = oldctxt->record_info;
10988 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10989 ctxt->node_seq.length = oldctxt->node_seq.length;
10990 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010991 } else {
10992 /*
10993 * Doing validity checking on chunk without context
10994 * doesn't make sense
10995 */
10996 ctxt->_private = NULL;
10997 ctxt->validate = 0;
10998 ctxt->external = 2;
10999 ctxt->loadsubset = 0;
11000 }
Owen Taylor3473f882001-02-23 17:55:21 +000011001 if (sax != NULL) {
11002 oldsax = ctxt->sax;
11003 ctxt->sax = sax;
11004 if (user_data != NULL)
11005 ctxt->userData = user_data;
11006 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011007 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011008 newDoc = xmlNewDoc(BAD_CAST "1.0");
11009 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011010 ctxt->node_seq.maximum = 0;
11011 ctxt->node_seq.length = 0;
11012 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011013 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011014 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011015 }
11016 if (doc != NULL) {
11017 newDoc->intSubset = doc->intSubset;
11018 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011019 newDoc->dict = doc->dict;
11020 } else if (oldctxt != NULL) {
11021 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000011022 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011023 xmlDictReference(newDoc->dict);
11024
Owen Taylor3473f882001-02-23 17:55:21 +000011025 if (doc->URL != NULL) {
11026 newDoc->URL = xmlStrdup(doc->URL);
11027 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011028 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11029 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011030 if (sax != NULL)
11031 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011032 ctxt->node_seq.maximum = 0;
11033 ctxt->node_seq.length = 0;
11034 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011035 xmlFreeParserCtxt(ctxt);
11036 newDoc->intSubset = NULL;
11037 newDoc->extSubset = NULL;
11038 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011039 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011040 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011041 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011042 nodePush(ctxt, newDoc->children);
11043 if (doc == NULL) {
11044 ctxt->myDoc = newDoc;
11045 } else {
11046 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011047 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011048 }
11049
Daniel Veillard87a764e2001-06-20 17:41:10 +000011050 /*
11051 * Get the 4 first bytes and decode the charset
11052 * if enc != XML_CHAR_ENCODING_NONE
11053 * plug some encoding conversion routines.
11054 */
11055 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011056 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11057 start[0] = RAW;
11058 start[1] = NXT(1);
11059 start[2] = NXT(2);
11060 start[3] = NXT(3);
11061 enc = xmlDetectCharEncoding(start, 4);
11062 if (enc != XML_CHAR_ENCODING_NONE) {
11063 xmlSwitchEncoding(ctxt, enc);
11064 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011065 }
11066
Owen Taylor3473f882001-02-23 17:55:21 +000011067 /*
11068 * Parse a possible text declaration first
11069 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011070 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011071 xmlParseTextDecl(ctxt);
11072 }
11073
Owen Taylor3473f882001-02-23 17:55:21 +000011074 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011075 ctxt->depth = depth;
11076
11077 xmlParseContent(ctxt);
11078
Daniel Veillard561b7f82002-03-20 21:55:57 +000011079 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011080 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011081 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011082 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011083 }
11084 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011085 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011086 }
11087
11088 if (!ctxt->wellFormed) {
11089 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011090 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011091 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011092 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011093 } else {
11094 if (list != NULL) {
11095 xmlNodePtr cur;
11096
11097 /*
11098 * Return the newly created nodeset after unlinking it from
11099 * they pseudo parent.
11100 */
11101 cur = newDoc->children->children;
11102 *list = cur;
11103 while (cur != NULL) {
11104 cur->parent = NULL;
11105 cur = cur->next;
11106 }
11107 newDoc->children->children = NULL;
11108 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011109 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011110 }
11111 if (sax != NULL)
11112 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011113 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11114 oldctxt->node_seq.length = ctxt->node_seq.length;
11115 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011116 ctxt->node_seq.maximum = 0;
11117 ctxt->node_seq.length = 0;
11118 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011119 xmlFreeParserCtxt(ctxt);
11120 newDoc->intSubset = NULL;
11121 newDoc->extSubset = NULL;
11122 xmlFreeDoc(newDoc);
11123
11124 return(ret);
11125}
11126
Daniel Veillard81273902003-09-30 00:43:48 +000011127#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011128/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011129 * xmlParseExternalEntity:
11130 * @doc: the document the chunk pertains to
11131 * @sax: the SAX handler bloc (possibly NULL)
11132 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11133 * @depth: Used for loop detection, use 0
11134 * @URL: the URL for the entity to load
11135 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011136 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011137 *
11138 * Parse an external general entity
11139 * An external general parsed entity is well-formed if it matches the
11140 * production labeled extParsedEnt.
11141 *
11142 * [78] extParsedEnt ::= TextDecl? content
11143 *
11144 * Returns 0 if the entity is well formed, -1 in case of args problem and
11145 * the parser error code otherwise
11146 */
11147
11148int
11149xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011150 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011151 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011152 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011153}
11154
11155/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011156 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011157 * @doc: the document the chunk pertains to
11158 * @sax: the SAX handler bloc (possibly NULL)
11159 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11160 * @depth: Used for loop detection, use 0
11161 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011162 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011163 *
11164 * Parse a well-balanced chunk of an XML document
11165 * called by the parser
11166 * The allowed sequence for the Well Balanced Chunk is the one defined by
11167 * the content production in the XML grammar:
11168 *
11169 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11170 *
11171 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11172 * the parser error code otherwise
11173 */
11174
11175int
11176xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011177 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011178 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11179 depth, string, lst, 0 );
11180}
Daniel Veillard81273902003-09-30 00:43:48 +000011181#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011182
11183/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011184 * xmlParseBalancedChunkMemoryInternal:
11185 * @oldctxt: the existing parsing context
11186 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11187 * @user_data: the user data field for the parser context
11188 * @lst: the return value for the set of parsed nodes
11189 *
11190 *
11191 * Parse a well-balanced chunk of an XML document
11192 * called by the parser
11193 * The allowed sequence for the Well Balanced Chunk is the one defined by
11194 * the content production in the XML grammar:
11195 *
11196 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11197 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011198 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11199 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011200 *
11201 * In case recover is set to 1, the nodelist will not be empty even if
11202 * the parsed chunk is not well balanced.
11203 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011204static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011205xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11206 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11207 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011208 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011209 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011210 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011211 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011212 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011213 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011214 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011215
11216 if (oldctxt->depth > 40) {
11217 return(XML_ERR_ENTITY_LOOP);
11218 }
11219
11220
11221 if (lst != NULL)
11222 *lst = NULL;
11223 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011224 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011225
11226 size = xmlStrlen(string);
11227
11228 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011229 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011230 if (user_data != NULL)
11231 ctxt->userData = user_data;
11232 else
11233 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011234 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11235 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011236 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11237 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11238 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011239
11240 oldsax = ctxt->sax;
11241 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011242 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011243 ctxt->replaceEntities = oldctxt->replaceEntities;
11244 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011245
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011246 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011247 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011248 newDoc = xmlNewDoc(BAD_CAST "1.0");
11249 if (newDoc == NULL) {
11250 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011251 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011252 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011253 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011254 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011255 newDoc->dict = ctxt->dict;
11256 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011257 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011258 } else {
11259 ctxt->myDoc = oldctxt->myDoc;
11260 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011261 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011262 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011263 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11264 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011265 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011266 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011267 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011268 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011269 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011270 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011271 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011272 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011273 ctxt->myDoc->children = NULL;
11274 ctxt->myDoc->last = NULL;
11275 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011276 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011277 ctxt->instate = XML_PARSER_CONTENT;
11278 ctxt->depth = oldctxt->depth + 1;
11279
Daniel Veillard328f48c2002-11-15 15:24:34 +000011280 ctxt->validate = 0;
11281 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011282 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11283 /*
11284 * ID/IDREF registration will be done in xmlValidateElement below
11285 */
11286 ctxt->loadsubset |= XML_SKIP_IDS;
11287 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011288 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011289 ctxt->attsDefault = oldctxt->attsDefault;
11290 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011291
Daniel Veillard68e9e742002-11-16 15:35:11 +000011292 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011293 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011294 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011295 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011296 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011297 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011298 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011299 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011300 }
11301
11302 if (!ctxt->wellFormed) {
11303 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011304 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011305 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011306 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011307 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011308 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011309 }
11310
William M. Brack7b9154b2003-09-27 19:23:50 +000011311 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011312 xmlNodePtr cur;
11313
11314 /*
11315 * Return the newly created nodeset after unlinking it from
11316 * they pseudo parent.
11317 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011318 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011319 *lst = cur;
11320 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011321#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011322 if (oldctxt->validate && oldctxt->wellFormed &&
11323 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11324 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11325 oldctxt->myDoc, cur);
11326 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011327#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011328 cur->parent = NULL;
11329 cur = cur->next;
11330 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011331 ctxt->myDoc->children->children = NULL;
11332 }
11333 if (ctxt->myDoc != NULL) {
11334 xmlFreeNode(ctxt->myDoc->children);
11335 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011336 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011337 }
11338
11339 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011340 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011341 ctxt->attsDefault = NULL;
11342 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011343 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011344 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011345 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011346 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011347
11348 return(ret);
11349}
11350
Daniel Veillard29b17482004-08-16 00:39:03 +000011351/**
11352 * xmlParseInNodeContext:
11353 * @node: the context node
11354 * @data: the input string
11355 * @datalen: the input string length in bytes
11356 * @options: a combination of xmlParserOption
11357 * @lst: the return value for the set of parsed nodes
11358 *
11359 * Parse a well-balanced chunk of an XML document
11360 * within the context (DTD, namespaces, etc ...) of the given node.
11361 *
11362 * The allowed sequence for the data is a Well Balanced Chunk defined by
11363 * the content production in the XML grammar:
11364 *
11365 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11366 *
11367 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11368 * error code otherwise
11369 */
11370xmlParserErrors
11371xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11372 int options, xmlNodePtr *lst) {
11373#ifdef SAX2
11374 xmlParserCtxtPtr ctxt;
11375 xmlDocPtr doc = NULL;
11376 xmlNodePtr fake, cur;
11377 int nsnr = 0;
11378
11379 xmlParserErrors ret = XML_ERR_OK;
11380
11381 /*
11382 * check all input parameters, grab the document
11383 */
11384 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11385 return(XML_ERR_INTERNAL_ERROR);
11386 switch (node->type) {
11387 case XML_ELEMENT_NODE:
11388 case XML_ATTRIBUTE_NODE:
11389 case XML_TEXT_NODE:
11390 case XML_CDATA_SECTION_NODE:
11391 case XML_ENTITY_REF_NODE:
11392 case XML_PI_NODE:
11393 case XML_COMMENT_NODE:
11394 case XML_DOCUMENT_NODE:
11395 case XML_HTML_DOCUMENT_NODE:
11396 break;
11397 default:
11398 return(XML_ERR_INTERNAL_ERROR);
11399
11400 }
11401 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11402 (node->type != XML_DOCUMENT_NODE) &&
11403 (node->type != XML_HTML_DOCUMENT_NODE))
11404 node = node->parent;
11405 if (node == NULL)
11406 return(XML_ERR_INTERNAL_ERROR);
11407 if (node->type == XML_ELEMENT_NODE)
11408 doc = node->doc;
11409 else
11410 doc = (xmlDocPtr) node;
11411 if (doc == NULL)
11412 return(XML_ERR_INTERNAL_ERROR);
11413
11414 /*
11415 * allocate a context and set-up everything not related to the
11416 * node position in the tree
11417 */
11418 if (doc->type == XML_DOCUMENT_NODE)
11419 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11420#ifdef LIBXML_HTML_ENABLED
11421 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11422 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11423#endif
11424 else
11425 return(XML_ERR_INTERNAL_ERROR);
11426
11427 if (ctxt == NULL)
11428 return(XML_ERR_NO_MEMORY);
11429 fake = xmlNewComment(NULL);
11430 if (fake == NULL) {
11431 xmlFreeParserCtxt(ctxt);
11432 return(XML_ERR_NO_MEMORY);
11433 }
11434 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011435
11436 /*
11437 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11438 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11439 * we must wait until the last moment to free the original one.
11440 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011441 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011442 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011443 xmlDictFree(ctxt->dict);
11444 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011445 } else
11446 options |= XML_PARSE_NODICT;
11447
11448 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011449 xmlDetectSAX2(ctxt);
11450 ctxt->myDoc = doc;
11451
11452 if (node->type == XML_ELEMENT_NODE) {
11453 nodePush(ctxt, node);
11454 /*
11455 * initialize the SAX2 namespaces stack
11456 */
11457 cur = node;
11458 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11459 xmlNsPtr ns = cur->nsDef;
11460 const xmlChar *iprefix, *ihref;
11461
11462 while (ns != NULL) {
11463 if (ctxt->dict) {
11464 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11465 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11466 } else {
11467 iprefix = ns->prefix;
11468 ihref = ns->href;
11469 }
11470
11471 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11472 nsPush(ctxt, iprefix, ihref);
11473 nsnr++;
11474 }
11475 ns = ns->next;
11476 }
11477 cur = cur->parent;
11478 }
11479 ctxt->instate = XML_PARSER_CONTENT;
11480 }
11481
11482 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11483 /*
11484 * ID/IDREF registration will be done in xmlValidateElement below
11485 */
11486 ctxt->loadsubset |= XML_SKIP_IDS;
11487 }
11488
11489 xmlParseContent(ctxt);
11490 nsPop(ctxt, nsnr);
11491 if ((RAW == '<') && (NXT(1) == '/')) {
11492 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11493 } else if (RAW != 0) {
11494 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11495 }
11496 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11497 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11498 ctxt->wellFormed = 0;
11499 }
11500
11501 if (!ctxt->wellFormed) {
11502 if (ctxt->errNo == 0)
11503 ret = XML_ERR_INTERNAL_ERROR;
11504 else
11505 ret = (xmlParserErrors)ctxt->errNo;
11506 } else {
11507 ret = XML_ERR_OK;
11508 }
11509
11510 /*
11511 * Return the newly created nodeset after unlinking it from
11512 * the pseudo sibling.
11513 */
11514
11515 cur = fake->next;
11516 fake->next = NULL;
11517 node->last = fake;
11518
11519 if (cur != NULL) {
11520 cur->prev = NULL;
11521 }
11522
11523 *lst = cur;
11524
11525 while (cur != NULL) {
11526 cur->parent = NULL;
11527 cur = cur->next;
11528 }
11529
11530 xmlUnlinkNode(fake);
11531 xmlFreeNode(fake);
11532
11533
11534 if (ret != XML_ERR_OK) {
11535 xmlFreeNodeList(*lst);
11536 *lst = NULL;
11537 }
William M. Brackc3f81342004-10-03 01:22:44 +000011538
William M. Brackb7b54de2004-10-06 16:38:01 +000011539 if (doc->dict != NULL)
11540 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011541 xmlFreeParserCtxt(ctxt);
11542
11543 return(ret);
11544#else /* !SAX2 */
11545 return(XML_ERR_INTERNAL_ERROR);
11546#endif
11547}
11548
Daniel Veillard81273902003-09-30 00:43:48 +000011549#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011550/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011551 * xmlParseBalancedChunkMemoryRecover:
11552 * @doc: the document the chunk pertains to
11553 * @sax: the SAX handler bloc (possibly NULL)
11554 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11555 * @depth: Used for loop detection, use 0
11556 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11557 * @lst: the return value for the set of parsed nodes
11558 * @recover: return nodes even if the data is broken (use 0)
11559 *
11560 *
11561 * Parse a well-balanced chunk of an XML document
11562 * called by the parser
11563 * The allowed sequence for the Well Balanced Chunk is the one defined by
11564 * the content production in the XML grammar:
11565 *
11566 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11567 *
11568 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11569 * the parser error code otherwise
11570 *
11571 * In case recover is set to 1, the nodelist will not be empty even if
11572 * the parsed chunk is not well balanced.
11573 */
11574int
11575xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11576 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11577 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011578 xmlParserCtxtPtr ctxt;
11579 xmlDocPtr newDoc;
11580 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011581 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011582 int size;
11583 int ret = 0;
11584
11585 if (depth > 40) {
11586 return(XML_ERR_ENTITY_LOOP);
11587 }
11588
11589
Daniel Veillardcda96922001-08-21 10:56:31 +000011590 if (lst != NULL)
11591 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011592 if (string == NULL)
11593 return(-1);
11594
11595 size = xmlStrlen(string);
11596
11597 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11598 if (ctxt == NULL) return(-1);
11599 ctxt->userData = ctxt;
11600 if (sax != NULL) {
11601 oldsax = ctxt->sax;
11602 ctxt->sax = sax;
11603 if (user_data != NULL)
11604 ctxt->userData = user_data;
11605 }
11606 newDoc = xmlNewDoc(BAD_CAST "1.0");
11607 if (newDoc == NULL) {
11608 xmlFreeParserCtxt(ctxt);
11609 return(-1);
11610 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011611 if ((doc != NULL) && (doc->dict != NULL)) {
11612 xmlDictFree(ctxt->dict);
11613 ctxt->dict = doc->dict;
11614 xmlDictReference(ctxt->dict);
11615 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11616 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11617 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11618 ctxt->dictNames = 1;
11619 } else {
11620 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11621 }
Owen Taylor3473f882001-02-23 17:55:21 +000011622 if (doc != NULL) {
11623 newDoc->intSubset = doc->intSubset;
11624 newDoc->extSubset = doc->extSubset;
11625 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011626 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11627 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011628 if (sax != NULL)
11629 ctxt->sax = oldsax;
11630 xmlFreeParserCtxt(ctxt);
11631 newDoc->intSubset = NULL;
11632 newDoc->extSubset = NULL;
11633 xmlFreeDoc(newDoc);
11634 return(-1);
11635 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011636 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11637 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011638 if (doc == NULL) {
11639 ctxt->myDoc = newDoc;
11640 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011641 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011642 newDoc->children->doc = doc;
11643 }
11644 ctxt->instate = XML_PARSER_CONTENT;
11645 ctxt->depth = depth;
11646
11647 /*
11648 * Doing validity checking on chunk doesn't make sense
11649 */
11650 ctxt->validate = 0;
11651 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011652 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011653
Daniel Veillardb39bc392002-10-26 19:29:51 +000011654 if ( doc != NULL ){
11655 content = doc->children;
11656 doc->children = NULL;
11657 xmlParseContent(ctxt);
11658 doc->children = content;
11659 }
11660 else {
11661 xmlParseContent(ctxt);
11662 }
Owen Taylor3473f882001-02-23 17:55:21 +000011663 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011664 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011665 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011666 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011667 }
11668 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011669 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011670 }
11671
11672 if (!ctxt->wellFormed) {
11673 if (ctxt->errNo == 0)
11674 ret = 1;
11675 else
11676 ret = ctxt->errNo;
11677 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011678 ret = 0;
11679 }
11680
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011681 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11682 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011683
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011684 /*
11685 * Return the newly created nodeset after unlinking it from
11686 * they pseudo parent.
11687 */
11688 cur = newDoc->children->children;
11689 *lst = cur;
11690 while (cur != NULL) {
11691 xmlSetTreeDoc(cur, doc);
11692 cur->parent = NULL;
11693 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011694 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011695 newDoc->children->children = NULL;
11696 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011697
Owen Taylor3473f882001-02-23 17:55:21 +000011698 if (sax != NULL)
11699 ctxt->sax = oldsax;
11700 xmlFreeParserCtxt(ctxt);
11701 newDoc->intSubset = NULL;
11702 newDoc->extSubset = NULL;
11703 xmlFreeDoc(newDoc);
11704
11705 return(ret);
11706}
11707
11708/**
11709 * xmlSAXParseEntity:
11710 * @sax: the SAX handler block
11711 * @filename: the filename
11712 *
11713 * parse an XML external entity out of context and build a tree.
11714 * It use the given SAX function block to handle the parsing callback.
11715 * If sax is NULL, fallback to the default DOM tree building routines.
11716 *
11717 * [78] extParsedEnt ::= TextDecl? content
11718 *
11719 * This correspond to a "Well Balanced" chunk
11720 *
11721 * Returns the resulting document tree
11722 */
11723
11724xmlDocPtr
11725xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11726 xmlDocPtr ret;
11727 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011728
11729 ctxt = xmlCreateFileParserCtxt(filename);
11730 if (ctxt == NULL) {
11731 return(NULL);
11732 }
11733 if (sax != NULL) {
11734 if (ctxt->sax != NULL)
11735 xmlFree(ctxt->sax);
11736 ctxt->sax = sax;
11737 ctxt->userData = NULL;
11738 }
11739
Owen Taylor3473f882001-02-23 17:55:21 +000011740 xmlParseExtParsedEnt(ctxt);
11741
11742 if (ctxt->wellFormed)
11743 ret = ctxt->myDoc;
11744 else {
11745 ret = NULL;
11746 xmlFreeDoc(ctxt->myDoc);
11747 ctxt->myDoc = NULL;
11748 }
11749 if (sax != NULL)
11750 ctxt->sax = NULL;
11751 xmlFreeParserCtxt(ctxt);
11752
11753 return(ret);
11754}
11755
11756/**
11757 * xmlParseEntity:
11758 * @filename: the filename
11759 *
11760 * parse an XML external entity out of context and build a tree.
11761 *
11762 * [78] extParsedEnt ::= TextDecl? content
11763 *
11764 * This correspond to a "Well Balanced" chunk
11765 *
11766 * Returns the resulting document tree
11767 */
11768
11769xmlDocPtr
11770xmlParseEntity(const char *filename) {
11771 return(xmlSAXParseEntity(NULL, filename));
11772}
Daniel Veillard81273902003-09-30 00:43:48 +000011773#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011774
11775/**
11776 * xmlCreateEntityParserCtxt:
11777 * @URL: the entity URL
11778 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011779 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011780 *
11781 * Create a parser context for an external entity
11782 * Automatic support for ZLIB/Compress compressed document is provided
11783 * by default if found at compile-time.
11784 *
11785 * Returns the new parser context or NULL
11786 */
11787xmlParserCtxtPtr
11788xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11789 const xmlChar *base) {
11790 xmlParserCtxtPtr ctxt;
11791 xmlParserInputPtr inputStream;
11792 char *directory = NULL;
11793 xmlChar *uri;
11794
11795 ctxt = xmlNewParserCtxt();
11796 if (ctxt == NULL) {
11797 return(NULL);
11798 }
11799
11800 uri = xmlBuildURI(URL, base);
11801
11802 if (uri == NULL) {
11803 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11804 if (inputStream == NULL) {
11805 xmlFreeParserCtxt(ctxt);
11806 return(NULL);
11807 }
11808
11809 inputPush(ctxt, inputStream);
11810
11811 if ((ctxt->directory == NULL) && (directory == NULL))
11812 directory = xmlParserGetDirectory((char *)URL);
11813 if ((ctxt->directory == NULL) && (directory != NULL))
11814 ctxt->directory = directory;
11815 } else {
11816 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11817 if (inputStream == NULL) {
11818 xmlFree(uri);
11819 xmlFreeParserCtxt(ctxt);
11820 return(NULL);
11821 }
11822
11823 inputPush(ctxt, inputStream);
11824
11825 if ((ctxt->directory == NULL) && (directory == NULL))
11826 directory = xmlParserGetDirectory((char *)uri);
11827 if ((ctxt->directory == NULL) && (directory != NULL))
11828 ctxt->directory = directory;
11829 xmlFree(uri);
11830 }
Owen Taylor3473f882001-02-23 17:55:21 +000011831 return(ctxt);
11832}
11833
11834/************************************************************************
11835 * *
11836 * Front ends when parsing from a file *
11837 * *
11838 ************************************************************************/
11839
11840/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011841 * xmlCreateURLParserCtxt:
11842 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011843 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011844 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011845 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011846 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011847 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011848 *
11849 * Returns the new parser context or NULL
11850 */
11851xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011852xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011853{
11854 xmlParserCtxtPtr ctxt;
11855 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011856 char *directory = NULL;
11857
Owen Taylor3473f882001-02-23 17:55:21 +000011858 ctxt = xmlNewParserCtxt();
11859 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011860 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011861 return(NULL);
11862 }
11863
Daniel Veillarddf292f72005-01-16 19:00:15 +000011864 if (options)
11865 xmlCtxtUseOptions(ctxt, options);
11866 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000011867
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011868 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011869 if (inputStream == NULL) {
11870 xmlFreeParserCtxt(ctxt);
11871 return(NULL);
11872 }
11873
Owen Taylor3473f882001-02-23 17:55:21 +000011874 inputPush(ctxt, inputStream);
11875 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011876 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011877 if ((ctxt->directory == NULL) && (directory != NULL))
11878 ctxt->directory = directory;
11879
11880 return(ctxt);
11881}
11882
Daniel Veillard61b93382003-11-03 14:28:31 +000011883/**
11884 * xmlCreateFileParserCtxt:
11885 * @filename: the filename
11886 *
11887 * Create a parser context for a file content.
11888 * Automatic support for ZLIB/Compress compressed document is provided
11889 * by default if found at compile-time.
11890 *
11891 * Returns the new parser context or NULL
11892 */
11893xmlParserCtxtPtr
11894xmlCreateFileParserCtxt(const char *filename)
11895{
11896 return(xmlCreateURLParserCtxt(filename, 0));
11897}
11898
Daniel Veillard81273902003-09-30 00:43:48 +000011899#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011900/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011901 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011902 * @sax: the SAX handler block
11903 * @filename: the filename
11904 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11905 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011906 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011907 *
11908 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11909 * compressed document is provided by default if found at compile-time.
11910 * It use the given SAX function block to handle the parsing callback.
11911 * If sax is NULL, fallback to the default DOM tree building routines.
11912 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011913 * User data (void *) is stored within the parser context in the
11914 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011915 *
Owen Taylor3473f882001-02-23 17:55:21 +000011916 * Returns the resulting document tree
11917 */
11918
11919xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011920xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11921 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011922 xmlDocPtr ret;
11923 xmlParserCtxtPtr ctxt;
11924 char *directory = NULL;
11925
Daniel Veillard635ef722001-10-29 11:48:19 +000011926 xmlInitParser();
11927
Owen Taylor3473f882001-02-23 17:55:21 +000011928 ctxt = xmlCreateFileParserCtxt(filename);
11929 if (ctxt == NULL) {
11930 return(NULL);
11931 }
11932 if (sax != NULL) {
11933 if (ctxt->sax != NULL)
11934 xmlFree(ctxt->sax);
11935 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011936 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011937 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011938 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011939 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011940 }
Owen Taylor3473f882001-02-23 17:55:21 +000011941
11942 if ((ctxt->directory == NULL) && (directory == NULL))
11943 directory = xmlParserGetDirectory(filename);
11944 if ((ctxt->directory == NULL) && (directory != NULL))
11945 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11946
Daniel Veillarddad3f682002-11-17 16:47:27 +000011947 ctxt->recovery = recovery;
11948
Owen Taylor3473f882001-02-23 17:55:21 +000011949 xmlParseDocument(ctxt);
11950
William M. Brackc07329e2003-09-08 01:57:30 +000011951 if ((ctxt->wellFormed) || recovery) {
11952 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011953 if (ret != NULL) {
11954 if (ctxt->input->buf->compressed > 0)
11955 ret->compression = 9;
11956 else
11957 ret->compression = ctxt->input->buf->compressed;
11958 }
William M. Brackc07329e2003-09-08 01:57:30 +000011959 }
Owen Taylor3473f882001-02-23 17:55:21 +000011960 else {
11961 ret = NULL;
11962 xmlFreeDoc(ctxt->myDoc);
11963 ctxt->myDoc = NULL;
11964 }
11965 if (sax != NULL)
11966 ctxt->sax = NULL;
11967 xmlFreeParserCtxt(ctxt);
11968
11969 return(ret);
11970}
11971
11972/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011973 * xmlSAXParseFile:
11974 * @sax: the SAX handler block
11975 * @filename: the filename
11976 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11977 * documents
11978 *
11979 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11980 * compressed document is provided by default if found at compile-time.
11981 * It use the given SAX function block to handle the parsing callback.
11982 * If sax is NULL, fallback to the default DOM tree building routines.
11983 *
11984 * Returns the resulting document tree
11985 */
11986
11987xmlDocPtr
11988xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11989 int recovery) {
11990 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11991}
11992
11993/**
Owen Taylor3473f882001-02-23 17:55:21 +000011994 * xmlRecoverDoc:
11995 * @cur: a pointer to an array of xmlChar
11996 *
11997 * parse an XML in-memory document and build a tree.
11998 * In the case the document is not Well Formed, a tree is built anyway
11999 *
12000 * Returns the resulting document tree
12001 */
12002
12003xmlDocPtr
12004xmlRecoverDoc(xmlChar *cur) {
12005 return(xmlSAXParseDoc(NULL, cur, 1));
12006}
12007
12008/**
12009 * xmlParseFile:
12010 * @filename: the filename
12011 *
12012 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12013 * compressed document is provided by default if found at compile-time.
12014 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012015 * Returns the resulting document tree if the file was wellformed,
12016 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012017 */
12018
12019xmlDocPtr
12020xmlParseFile(const char *filename) {
12021 return(xmlSAXParseFile(NULL, filename, 0));
12022}
12023
12024/**
12025 * xmlRecoverFile:
12026 * @filename: the filename
12027 *
12028 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12029 * compressed document is provided by default if found at compile-time.
12030 * In the case the document is not Well Formed, a tree is built anyway
12031 *
12032 * Returns the resulting document tree
12033 */
12034
12035xmlDocPtr
12036xmlRecoverFile(const char *filename) {
12037 return(xmlSAXParseFile(NULL, filename, 1));
12038}
12039
12040
12041/**
12042 * xmlSetupParserForBuffer:
12043 * @ctxt: an XML parser context
12044 * @buffer: a xmlChar * buffer
12045 * @filename: a file name
12046 *
12047 * Setup the parser context to parse a new buffer; Clears any prior
12048 * contents from the parser context. The buffer parameter must not be
12049 * NULL, but the filename parameter can be
12050 */
12051void
12052xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12053 const char* filename)
12054{
12055 xmlParserInputPtr input;
12056
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012057 if ((ctxt == NULL) || (buffer == NULL))
12058 return;
12059
Owen Taylor3473f882001-02-23 17:55:21 +000012060 input = xmlNewInputStream(ctxt);
12061 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012062 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012063 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012064 return;
12065 }
12066
12067 xmlClearParserCtxt(ctxt);
12068 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012069 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012070 input->base = buffer;
12071 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012072 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012073 inputPush(ctxt, input);
12074}
12075
12076/**
12077 * xmlSAXUserParseFile:
12078 * @sax: a SAX handler
12079 * @user_data: The user data returned on SAX callbacks
12080 * @filename: a file name
12081 *
12082 * parse an XML file and call the given SAX handler routines.
12083 * Automatic support for ZLIB/Compress compressed document is provided
12084 *
12085 * Returns 0 in case of success or a error number otherwise
12086 */
12087int
12088xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12089 const char *filename) {
12090 int ret = 0;
12091 xmlParserCtxtPtr ctxt;
12092
12093 ctxt = xmlCreateFileParserCtxt(filename);
12094 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012095#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012096 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012097#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012098 xmlFree(ctxt->sax);
12099 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012100 xmlDetectSAX2(ctxt);
12101
Owen Taylor3473f882001-02-23 17:55:21 +000012102 if (user_data != NULL)
12103 ctxt->userData = user_data;
12104
12105 xmlParseDocument(ctxt);
12106
12107 if (ctxt->wellFormed)
12108 ret = 0;
12109 else {
12110 if (ctxt->errNo != 0)
12111 ret = ctxt->errNo;
12112 else
12113 ret = -1;
12114 }
12115 if (sax != NULL)
12116 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012117 if (ctxt->myDoc != NULL) {
12118 xmlFreeDoc(ctxt->myDoc);
12119 ctxt->myDoc = NULL;
12120 }
Owen Taylor3473f882001-02-23 17:55:21 +000012121 xmlFreeParserCtxt(ctxt);
12122
12123 return ret;
12124}
Daniel Veillard81273902003-09-30 00:43:48 +000012125#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012126
12127/************************************************************************
12128 * *
12129 * Front ends when parsing from memory *
12130 * *
12131 ************************************************************************/
12132
12133/**
12134 * xmlCreateMemoryParserCtxt:
12135 * @buffer: a pointer to a char array
12136 * @size: the size of the array
12137 *
12138 * Create a parser context for an XML in-memory document.
12139 *
12140 * Returns the new parser context or NULL
12141 */
12142xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012143xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012144 xmlParserCtxtPtr ctxt;
12145 xmlParserInputPtr input;
12146 xmlParserInputBufferPtr buf;
12147
12148 if (buffer == NULL)
12149 return(NULL);
12150 if (size <= 0)
12151 return(NULL);
12152
12153 ctxt = xmlNewParserCtxt();
12154 if (ctxt == NULL)
12155 return(NULL);
12156
Daniel Veillard53350552003-09-18 13:35:51 +000012157 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012158 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012159 if (buf == NULL) {
12160 xmlFreeParserCtxt(ctxt);
12161 return(NULL);
12162 }
Owen Taylor3473f882001-02-23 17:55:21 +000012163
12164 input = xmlNewInputStream(ctxt);
12165 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012166 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012167 xmlFreeParserCtxt(ctxt);
12168 return(NULL);
12169 }
12170
12171 input->filename = NULL;
12172 input->buf = buf;
12173 input->base = input->buf->buffer->content;
12174 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012175 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012176
12177 inputPush(ctxt, input);
12178 return(ctxt);
12179}
12180
Daniel Veillard81273902003-09-30 00:43:48 +000012181#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012182/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012183 * xmlSAXParseMemoryWithData:
12184 * @sax: the SAX handler block
12185 * @buffer: an pointer to a char array
12186 * @size: the size of the array
12187 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12188 * documents
12189 * @data: the userdata
12190 *
12191 * parse an XML in-memory block and use the given SAX function block
12192 * to handle the parsing callback. If sax is NULL, fallback to the default
12193 * DOM tree building routines.
12194 *
12195 * User data (void *) is stored within the parser context in the
12196 * context's _private member, so it is available nearly everywhere in libxml
12197 *
12198 * Returns the resulting document tree
12199 */
12200
12201xmlDocPtr
12202xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12203 int size, int recovery, void *data) {
12204 xmlDocPtr ret;
12205 xmlParserCtxtPtr ctxt;
12206
12207 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12208 if (ctxt == NULL) return(NULL);
12209 if (sax != NULL) {
12210 if (ctxt->sax != NULL)
12211 xmlFree(ctxt->sax);
12212 ctxt->sax = sax;
12213 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012214 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012215 if (data!=NULL) {
12216 ctxt->_private=data;
12217 }
12218
Daniel Veillardadba5f12003-04-04 16:09:01 +000012219 ctxt->recovery = recovery;
12220
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012221 xmlParseDocument(ctxt);
12222
12223 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12224 else {
12225 ret = NULL;
12226 xmlFreeDoc(ctxt->myDoc);
12227 ctxt->myDoc = NULL;
12228 }
12229 if (sax != NULL)
12230 ctxt->sax = NULL;
12231 xmlFreeParserCtxt(ctxt);
12232
12233 return(ret);
12234}
12235
12236/**
Owen Taylor3473f882001-02-23 17:55:21 +000012237 * xmlSAXParseMemory:
12238 * @sax: the SAX handler block
12239 * @buffer: an pointer to a char array
12240 * @size: the size of the array
12241 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12242 * documents
12243 *
12244 * parse an XML in-memory block and use the given SAX function block
12245 * to handle the parsing callback. If sax is NULL, fallback to the default
12246 * DOM tree building routines.
12247 *
12248 * Returns the resulting document tree
12249 */
12250xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012251xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12252 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012253 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012254}
12255
12256/**
12257 * xmlParseMemory:
12258 * @buffer: an pointer to a char array
12259 * @size: the size of the array
12260 *
12261 * parse an XML in-memory block and build a tree.
12262 *
12263 * Returns the resulting document tree
12264 */
12265
Daniel Veillard50822cb2001-07-26 20:05:51 +000012266xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012267 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12268}
12269
12270/**
12271 * xmlRecoverMemory:
12272 * @buffer: an pointer to a char array
12273 * @size: the size of the array
12274 *
12275 * parse an XML in-memory block and build a tree.
12276 * In the case the document is not Well Formed, a tree is built anyway
12277 *
12278 * Returns the resulting document tree
12279 */
12280
Daniel Veillard50822cb2001-07-26 20:05:51 +000012281xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012282 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12283}
12284
12285/**
12286 * xmlSAXUserParseMemory:
12287 * @sax: a SAX handler
12288 * @user_data: The user data returned on SAX callbacks
12289 * @buffer: an in-memory XML document input
12290 * @size: the length of the XML document in bytes
12291 *
12292 * A better SAX parsing routine.
12293 * parse an XML in-memory buffer and call the given SAX handler routines.
12294 *
12295 * Returns 0 in case of success or a error number otherwise
12296 */
12297int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012298 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012299 int ret = 0;
12300 xmlParserCtxtPtr ctxt;
12301 xmlSAXHandlerPtr oldsax = NULL;
12302
Daniel Veillard9e923512002-08-14 08:48:52 +000012303 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012304 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12305 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012306 oldsax = ctxt->sax;
12307 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012308 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012309 if (user_data != NULL)
12310 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012311
12312 xmlParseDocument(ctxt);
12313
12314 if (ctxt->wellFormed)
12315 ret = 0;
12316 else {
12317 if (ctxt->errNo != 0)
12318 ret = ctxt->errNo;
12319 else
12320 ret = -1;
12321 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012322 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012323 if (ctxt->myDoc != NULL) {
12324 xmlFreeDoc(ctxt->myDoc);
12325 ctxt->myDoc = NULL;
12326 }
Owen Taylor3473f882001-02-23 17:55:21 +000012327 xmlFreeParserCtxt(ctxt);
12328
12329 return ret;
12330}
Daniel Veillard81273902003-09-30 00:43:48 +000012331#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012332
12333/**
12334 * xmlCreateDocParserCtxt:
12335 * @cur: a pointer to an array of xmlChar
12336 *
12337 * Creates a parser context for an XML in-memory document.
12338 *
12339 * Returns the new parser context or NULL
12340 */
12341xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012342xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012343 int len;
12344
12345 if (cur == NULL)
12346 return(NULL);
12347 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012348 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012349}
12350
Daniel Veillard81273902003-09-30 00:43:48 +000012351#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012352/**
12353 * xmlSAXParseDoc:
12354 * @sax: the SAX handler block
12355 * @cur: a pointer to an array of xmlChar
12356 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12357 * documents
12358 *
12359 * parse an XML in-memory document and build a tree.
12360 * It use the given SAX function block to handle the parsing callback.
12361 * If sax is NULL, fallback to the default DOM tree building routines.
12362 *
12363 * Returns the resulting document tree
12364 */
12365
12366xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012367xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012368 xmlDocPtr ret;
12369 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012370 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012371
Daniel Veillard38936062004-11-04 17:45:11 +000012372 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012373
12374
12375 ctxt = xmlCreateDocParserCtxt(cur);
12376 if (ctxt == NULL) return(NULL);
12377 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012378 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012379 ctxt->sax = sax;
12380 ctxt->userData = NULL;
12381 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012382 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012383
12384 xmlParseDocument(ctxt);
12385 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12386 else {
12387 ret = NULL;
12388 xmlFreeDoc(ctxt->myDoc);
12389 ctxt->myDoc = NULL;
12390 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012391 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012392 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012393 xmlFreeParserCtxt(ctxt);
12394
12395 return(ret);
12396}
12397
12398/**
12399 * xmlParseDoc:
12400 * @cur: a pointer to an array of xmlChar
12401 *
12402 * parse an XML in-memory document and build a tree.
12403 *
12404 * Returns the resulting document tree
12405 */
12406
12407xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012408xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012409 return(xmlSAXParseDoc(NULL, cur, 0));
12410}
Daniel Veillard81273902003-09-30 00:43:48 +000012411#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012412
Daniel Veillard81273902003-09-30 00:43:48 +000012413#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012414/************************************************************************
12415 * *
12416 * Specific function to keep track of entities references *
12417 * and used by the XSLT debugger *
12418 * *
12419 ************************************************************************/
12420
12421static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12422
12423/**
12424 * xmlAddEntityReference:
12425 * @ent : A valid entity
12426 * @firstNode : A valid first node for children of entity
12427 * @lastNode : A valid last node of children entity
12428 *
12429 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12430 */
12431static void
12432xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12433 xmlNodePtr lastNode)
12434{
12435 if (xmlEntityRefFunc != NULL) {
12436 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12437 }
12438}
12439
12440
12441/**
12442 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012443 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012444 *
12445 * Set the function to call call back when a xml reference has been made
12446 */
12447void
12448xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12449{
12450 xmlEntityRefFunc = func;
12451}
Daniel Veillard81273902003-09-30 00:43:48 +000012452#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012453
12454/************************************************************************
12455 * *
12456 * Miscellaneous *
12457 * *
12458 ************************************************************************/
12459
12460#ifdef LIBXML_XPATH_ENABLED
12461#include <libxml/xpath.h>
12462#endif
12463
Daniel Veillardffa3c742005-07-21 13:24:09 +000012464extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012465static int xmlParserInitialized = 0;
12466
12467/**
12468 * xmlInitParser:
12469 *
12470 * Initialization function for the XML parser.
12471 * This is not reentrant. Call once before processing in case of
12472 * use in multithreaded programs.
12473 */
12474
12475void
12476xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012477 if (xmlParserInitialized != 0)
12478 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012479
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012480 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12481 (xmlGenericError == NULL))
12482 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012483 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012484 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012485 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012486 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012487 xmlDefaultSAXHandlerInit();
12488 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012489#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012490 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012491#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012492#ifdef LIBXML_HTML_ENABLED
12493 htmlInitAutoClose();
12494 htmlDefaultSAXHandlerInit();
12495#endif
12496#ifdef LIBXML_XPATH_ENABLED
12497 xmlXPathInit();
12498#endif
12499 xmlParserInitialized = 1;
12500}
12501
12502/**
12503 * xmlCleanupParser:
12504 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012505 * Cleanup function for the XML library. It tries to reclaim all
12506 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012507 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012508 * function should not prevent reusing the library but one should
12509 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012510 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012511 */
12512
12513void
12514xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012515 if (!xmlParserInitialized)
12516 return;
12517
Owen Taylor3473f882001-02-23 17:55:21 +000012518 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012519#ifdef LIBXML_CATALOG_ENABLED
12520 xmlCatalogCleanup();
12521#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012522 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012523 xmlCleanupInputCallbacks();
12524#ifdef LIBXML_OUTPUT_ENABLED
12525 xmlCleanupOutputCallbacks();
12526#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012527#ifdef LIBXML_SCHEMAS_ENABLED
12528 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012529 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012530#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012531 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012532 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012533 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012534 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012535 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012536}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012537
12538/************************************************************************
12539 * *
12540 * New set (2.6.0) of simpler and more flexible APIs *
12541 * *
12542 ************************************************************************/
12543
12544/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012545 * DICT_FREE:
12546 * @str: a string
12547 *
12548 * Free a string if it is not owned by the "dict" dictionnary in the
12549 * current scope
12550 */
12551#define DICT_FREE(str) \
12552 if ((str) && ((!dict) || \
12553 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12554 xmlFree((char *)(str));
12555
12556/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012557 * xmlCtxtReset:
12558 * @ctxt: an XML parser context
12559 *
12560 * Reset a parser context
12561 */
12562void
12563xmlCtxtReset(xmlParserCtxtPtr ctxt)
12564{
12565 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012566 xmlDictPtr dict;
12567
12568 if (ctxt == NULL)
12569 return;
12570
12571 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012572
12573 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12574 xmlFreeInputStream(input);
12575 }
12576 ctxt->inputNr = 0;
12577 ctxt->input = NULL;
12578
12579 ctxt->spaceNr = 0;
12580 ctxt->spaceTab[0] = -1;
12581 ctxt->space = &ctxt->spaceTab[0];
12582
12583
12584 ctxt->nodeNr = 0;
12585 ctxt->node = NULL;
12586
12587 ctxt->nameNr = 0;
12588 ctxt->name = NULL;
12589
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012590 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012591 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012592 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012593 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012594 DICT_FREE(ctxt->directory);
12595 ctxt->directory = NULL;
12596 DICT_FREE(ctxt->extSubURI);
12597 ctxt->extSubURI = NULL;
12598 DICT_FREE(ctxt->extSubSystem);
12599 ctxt->extSubSystem = NULL;
12600 if (ctxt->myDoc != NULL)
12601 xmlFreeDoc(ctxt->myDoc);
12602 ctxt->myDoc = NULL;
12603
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012604 ctxt->standalone = -1;
12605 ctxt->hasExternalSubset = 0;
12606 ctxt->hasPErefs = 0;
12607 ctxt->html = 0;
12608 ctxt->external = 0;
12609 ctxt->instate = XML_PARSER_START;
12610 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012611
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012612 ctxt->wellFormed = 1;
12613 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012614 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012615 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012616#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012617 ctxt->vctxt.userData = ctxt;
12618 ctxt->vctxt.error = xmlParserValidityError;
12619 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012620#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012621 ctxt->record_info = 0;
12622 ctxt->nbChars = 0;
12623 ctxt->checkIndex = 0;
12624 ctxt->inSubset = 0;
12625 ctxt->errNo = XML_ERR_OK;
12626 ctxt->depth = 0;
12627 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12628 ctxt->catalogs = NULL;
12629 xmlInitNodeInfoSeq(&ctxt->node_seq);
12630
12631 if (ctxt->attsDefault != NULL) {
12632 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12633 ctxt->attsDefault = NULL;
12634 }
12635 if (ctxt->attsSpecial != NULL) {
12636 xmlHashFree(ctxt->attsSpecial, NULL);
12637 ctxt->attsSpecial = NULL;
12638 }
12639
Daniel Veillard4432df22003-09-28 18:58:27 +000012640#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012641 if (ctxt->catalogs != NULL)
12642 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012643#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012644 if (ctxt->lastError.code != XML_ERR_OK)
12645 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012646}
12647
12648/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012649 * xmlCtxtResetPush:
12650 * @ctxt: an XML parser context
12651 * @chunk: a pointer to an array of chars
12652 * @size: number of chars in the array
12653 * @filename: an optional file name or URI
12654 * @encoding: the document encoding, or NULL
12655 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012656 * Reset a push parser context
12657 *
12658 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012659 */
12660int
12661xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12662 int size, const char *filename, const char *encoding)
12663{
12664 xmlParserInputPtr inputStream;
12665 xmlParserInputBufferPtr buf;
12666 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12667
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012668 if (ctxt == NULL)
12669 return(1);
12670
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012671 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12672 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12673
12674 buf = xmlAllocParserInputBuffer(enc);
12675 if (buf == NULL)
12676 return(1);
12677
12678 if (ctxt == NULL) {
12679 xmlFreeParserInputBuffer(buf);
12680 return(1);
12681 }
12682
12683 xmlCtxtReset(ctxt);
12684
12685 if (ctxt->pushTab == NULL) {
12686 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12687 sizeof(xmlChar *));
12688 if (ctxt->pushTab == NULL) {
12689 xmlErrMemory(ctxt, NULL);
12690 xmlFreeParserInputBuffer(buf);
12691 return(1);
12692 }
12693 }
12694
12695 if (filename == NULL) {
12696 ctxt->directory = NULL;
12697 } else {
12698 ctxt->directory = xmlParserGetDirectory(filename);
12699 }
12700
12701 inputStream = xmlNewInputStream(ctxt);
12702 if (inputStream == NULL) {
12703 xmlFreeParserInputBuffer(buf);
12704 return(1);
12705 }
12706
12707 if (filename == NULL)
12708 inputStream->filename = NULL;
12709 else
12710 inputStream->filename = (char *)
12711 xmlCanonicPath((const xmlChar *) filename);
12712 inputStream->buf = buf;
12713 inputStream->base = inputStream->buf->buffer->content;
12714 inputStream->cur = inputStream->buf->buffer->content;
12715 inputStream->end =
12716 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12717
12718 inputPush(ctxt, inputStream);
12719
12720 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12721 (ctxt->input->buf != NULL)) {
12722 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12723 int cur = ctxt->input->cur - ctxt->input->base;
12724
12725 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12726
12727 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12728 ctxt->input->cur = ctxt->input->base + cur;
12729 ctxt->input->end =
12730 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12731 use];
12732#ifdef DEBUG_PUSH
12733 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12734#endif
12735 }
12736
12737 if (encoding != NULL) {
12738 xmlCharEncodingHandlerPtr hdlr;
12739
12740 hdlr = xmlFindCharEncodingHandler(encoding);
12741 if (hdlr != NULL) {
12742 xmlSwitchToEncoding(ctxt, hdlr);
12743 } else {
12744 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12745 "Unsupported encoding %s\n", BAD_CAST encoding);
12746 }
12747 } else if (enc != XML_CHAR_ENCODING_NONE) {
12748 xmlSwitchEncoding(ctxt, enc);
12749 }
12750
12751 return(0);
12752}
12753
12754/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012755 * xmlCtxtUseOptions:
12756 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012757 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012758 *
12759 * Applies the options to the parser context
12760 *
12761 * Returns 0 in case of success, the set of unknown or unimplemented options
12762 * in case of error.
12763 */
12764int
12765xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12766{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012767 if (ctxt == NULL)
12768 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012769 if (options & XML_PARSE_RECOVER) {
12770 ctxt->recovery = 1;
12771 options -= XML_PARSE_RECOVER;
12772 } else
12773 ctxt->recovery = 0;
12774 if (options & XML_PARSE_DTDLOAD) {
12775 ctxt->loadsubset = XML_DETECT_IDS;
12776 options -= XML_PARSE_DTDLOAD;
12777 } else
12778 ctxt->loadsubset = 0;
12779 if (options & XML_PARSE_DTDATTR) {
12780 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12781 options -= XML_PARSE_DTDATTR;
12782 }
12783 if (options & XML_PARSE_NOENT) {
12784 ctxt->replaceEntities = 1;
12785 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12786 options -= XML_PARSE_NOENT;
12787 } else
12788 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012789 if (options & XML_PARSE_PEDANTIC) {
12790 ctxt->pedantic = 1;
12791 options -= XML_PARSE_PEDANTIC;
12792 } else
12793 ctxt->pedantic = 0;
12794 if (options & XML_PARSE_NOBLANKS) {
12795 ctxt->keepBlanks = 0;
12796 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12797 options -= XML_PARSE_NOBLANKS;
12798 } else
12799 ctxt->keepBlanks = 1;
12800 if (options & XML_PARSE_DTDVALID) {
12801 ctxt->validate = 1;
12802 if (options & XML_PARSE_NOWARNING)
12803 ctxt->vctxt.warning = NULL;
12804 if (options & XML_PARSE_NOERROR)
12805 ctxt->vctxt.error = NULL;
12806 options -= XML_PARSE_DTDVALID;
12807 } else
12808 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000012809 if (options & XML_PARSE_NOWARNING) {
12810 ctxt->sax->warning = NULL;
12811 options -= XML_PARSE_NOWARNING;
12812 }
12813 if (options & XML_PARSE_NOERROR) {
12814 ctxt->sax->error = NULL;
12815 ctxt->sax->fatalError = NULL;
12816 options -= XML_PARSE_NOERROR;
12817 }
Daniel Veillard81273902003-09-30 00:43:48 +000012818#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012819 if (options & XML_PARSE_SAX1) {
12820 ctxt->sax->startElement = xmlSAX2StartElement;
12821 ctxt->sax->endElement = xmlSAX2EndElement;
12822 ctxt->sax->startElementNs = NULL;
12823 ctxt->sax->endElementNs = NULL;
12824 ctxt->sax->initialized = 1;
12825 options -= XML_PARSE_SAX1;
12826 }
Daniel Veillard81273902003-09-30 00:43:48 +000012827#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012828 if (options & XML_PARSE_NODICT) {
12829 ctxt->dictNames = 0;
12830 options -= XML_PARSE_NODICT;
12831 } else {
12832 ctxt->dictNames = 1;
12833 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012834 if (options & XML_PARSE_NOCDATA) {
12835 ctxt->sax->cdataBlock = NULL;
12836 options -= XML_PARSE_NOCDATA;
12837 }
12838 if (options & XML_PARSE_NSCLEAN) {
12839 ctxt->options |= XML_PARSE_NSCLEAN;
12840 options -= XML_PARSE_NSCLEAN;
12841 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012842 if (options & XML_PARSE_NONET) {
12843 ctxt->options |= XML_PARSE_NONET;
12844 options -= XML_PARSE_NONET;
12845 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012846 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012847 return (options);
12848}
12849
12850/**
12851 * xmlDoRead:
12852 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012853 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012854 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012855 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012856 * @reuse: keep the context for reuse
12857 *
12858 * Common front-end for the xmlRead functions
12859 *
12860 * Returns the resulting document tree or NULL
12861 */
12862static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012863xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12864 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012865{
12866 xmlDocPtr ret;
12867
12868 xmlCtxtUseOptions(ctxt, options);
12869 if (encoding != NULL) {
12870 xmlCharEncodingHandlerPtr hdlr;
12871
12872 hdlr = xmlFindCharEncodingHandler(encoding);
12873 if (hdlr != NULL)
12874 xmlSwitchToEncoding(ctxt, hdlr);
12875 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012876 if ((URL != NULL) && (ctxt->input != NULL) &&
12877 (ctxt->input->filename == NULL))
12878 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012879 xmlParseDocument(ctxt);
12880 if ((ctxt->wellFormed) || ctxt->recovery)
12881 ret = ctxt->myDoc;
12882 else {
12883 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012884 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012885 xmlFreeDoc(ctxt->myDoc);
12886 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012887 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012888 ctxt->myDoc = NULL;
12889 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012890 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012891 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012892
12893 return (ret);
12894}
12895
12896/**
12897 * xmlReadDoc:
12898 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012899 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012900 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012901 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012902 *
12903 * parse an XML in-memory document and build a tree.
12904 *
12905 * Returns the resulting document tree
12906 */
12907xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012908xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012909{
12910 xmlParserCtxtPtr ctxt;
12911
12912 if (cur == NULL)
12913 return (NULL);
12914
12915 ctxt = xmlCreateDocParserCtxt(cur);
12916 if (ctxt == NULL)
12917 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012918 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012919}
12920
12921/**
12922 * xmlReadFile:
12923 * @filename: a file or URL
12924 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012925 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012926 *
12927 * parse an XML file from the filesystem or the network.
12928 *
12929 * Returns the resulting document tree
12930 */
12931xmlDocPtr
12932xmlReadFile(const char *filename, const char *encoding, int options)
12933{
12934 xmlParserCtxtPtr ctxt;
12935
Daniel Veillard61b93382003-11-03 14:28:31 +000012936 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012937 if (ctxt == NULL)
12938 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012939 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012940}
12941
12942/**
12943 * xmlReadMemory:
12944 * @buffer: a pointer to a char array
12945 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012946 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012947 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012948 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012949 *
12950 * parse an XML in-memory document and build a tree.
12951 *
12952 * Returns the resulting document tree
12953 */
12954xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012955xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012956{
12957 xmlParserCtxtPtr ctxt;
12958
12959 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12960 if (ctxt == NULL)
12961 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012962 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012963}
12964
12965/**
12966 * xmlReadFd:
12967 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012968 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012969 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012970 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012971 *
12972 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012973 * NOTE that the file descriptor will not be closed when the
12974 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012975 *
12976 * Returns the resulting document tree
12977 */
12978xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012979xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012980{
12981 xmlParserCtxtPtr ctxt;
12982 xmlParserInputBufferPtr input;
12983 xmlParserInputPtr stream;
12984
12985 if (fd < 0)
12986 return (NULL);
12987
12988 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12989 if (input == NULL)
12990 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012991 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012992 ctxt = xmlNewParserCtxt();
12993 if (ctxt == NULL) {
12994 xmlFreeParserInputBuffer(input);
12995 return (NULL);
12996 }
12997 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12998 if (stream == NULL) {
12999 xmlFreeParserInputBuffer(input);
13000 xmlFreeParserCtxt(ctxt);
13001 return (NULL);
13002 }
13003 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013004 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013005}
13006
13007/**
13008 * xmlReadIO:
13009 * @ioread: an I/O read function
13010 * @ioclose: an I/O close function
13011 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013012 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013013 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013014 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013015 *
13016 * parse an XML document from I/O functions and source and build a tree.
13017 *
13018 * Returns the resulting document tree
13019 */
13020xmlDocPtr
13021xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013022 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013023{
13024 xmlParserCtxtPtr ctxt;
13025 xmlParserInputBufferPtr input;
13026 xmlParserInputPtr stream;
13027
13028 if (ioread == NULL)
13029 return (NULL);
13030
13031 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13032 XML_CHAR_ENCODING_NONE);
13033 if (input == NULL)
13034 return (NULL);
13035 ctxt = xmlNewParserCtxt();
13036 if (ctxt == NULL) {
13037 xmlFreeParserInputBuffer(input);
13038 return (NULL);
13039 }
13040 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13041 if (stream == NULL) {
13042 xmlFreeParserInputBuffer(input);
13043 xmlFreeParserCtxt(ctxt);
13044 return (NULL);
13045 }
13046 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013047 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013048}
13049
13050/**
13051 * xmlCtxtReadDoc:
13052 * @ctxt: an XML parser context
13053 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013054 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013055 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013056 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013057 *
13058 * parse an XML in-memory document and build a tree.
13059 * This reuses the existing @ctxt parser context
13060 *
13061 * Returns the resulting document tree
13062 */
13063xmlDocPtr
13064xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013065 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013066{
13067 xmlParserInputPtr stream;
13068
13069 if (cur == NULL)
13070 return (NULL);
13071 if (ctxt == NULL)
13072 return (NULL);
13073
13074 xmlCtxtReset(ctxt);
13075
13076 stream = xmlNewStringInputStream(ctxt, cur);
13077 if (stream == NULL) {
13078 return (NULL);
13079 }
13080 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013081 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013082}
13083
13084/**
13085 * xmlCtxtReadFile:
13086 * @ctxt: an XML parser context
13087 * @filename: a file or URL
13088 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013089 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013090 *
13091 * parse an XML file from the filesystem or the network.
13092 * This reuses the existing @ctxt parser context
13093 *
13094 * Returns the resulting document tree
13095 */
13096xmlDocPtr
13097xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13098 const char *encoding, int options)
13099{
13100 xmlParserInputPtr stream;
13101
13102 if (filename == NULL)
13103 return (NULL);
13104 if (ctxt == NULL)
13105 return (NULL);
13106
13107 xmlCtxtReset(ctxt);
13108
Daniel Veillard29614c72004-11-26 10:47:26 +000013109 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013110 if (stream == NULL) {
13111 return (NULL);
13112 }
13113 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013114 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013115}
13116
13117/**
13118 * xmlCtxtReadMemory:
13119 * @ctxt: an XML parser context
13120 * @buffer: a pointer to a char array
13121 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013122 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013123 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013124 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013125 *
13126 * parse an XML in-memory document and build a tree.
13127 * This reuses the existing @ctxt parser context
13128 *
13129 * Returns the resulting document tree
13130 */
13131xmlDocPtr
13132xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013133 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013134{
13135 xmlParserInputBufferPtr input;
13136 xmlParserInputPtr stream;
13137
13138 if (ctxt == NULL)
13139 return (NULL);
13140 if (buffer == NULL)
13141 return (NULL);
13142
13143 xmlCtxtReset(ctxt);
13144
13145 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13146 if (input == NULL) {
13147 return(NULL);
13148 }
13149
13150 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13151 if (stream == NULL) {
13152 xmlFreeParserInputBuffer(input);
13153 return(NULL);
13154 }
13155
13156 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013157 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013158}
13159
13160/**
13161 * xmlCtxtReadFd:
13162 * @ctxt: an XML parser context
13163 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013164 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013165 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013166 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013167 *
13168 * parse an XML from a file descriptor and build a tree.
13169 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013170 * NOTE that the file descriptor will not be closed when the
13171 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013172 *
13173 * Returns the resulting document tree
13174 */
13175xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013176xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13177 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013178{
13179 xmlParserInputBufferPtr input;
13180 xmlParserInputPtr stream;
13181
13182 if (fd < 0)
13183 return (NULL);
13184 if (ctxt == NULL)
13185 return (NULL);
13186
13187 xmlCtxtReset(ctxt);
13188
13189
13190 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13191 if (input == NULL)
13192 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013193 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013194 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13195 if (stream == NULL) {
13196 xmlFreeParserInputBuffer(input);
13197 return (NULL);
13198 }
13199 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013200 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013201}
13202
13203/**
13204 * xmlCtxtReadIO:
13205 * @ctxt: an XML parser context
13206 * @ioread: an I/O read function
13207 * @ioclose: an I/O close function
13208 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013209 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013210 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013211 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013212 *
13213 * parse an XML document from I/O functions and source and build a tree.
13214 * This reuses the existing @ctxt parser context
13215 *
13216 * Returns the resulting document tree
13217 */
13218xmlDocPtr
13219xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13220 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013221 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013222 const char *encoding, int options)
13223{
13224 xmlParserInputBufferPtr input;
13225 xmlParserInputPtr stream;
13226
13227 if (ioread == NULL)
13228 return (NULL);
13229 if (ctxt == NULL)
13230 return (NULL);
13231
13232 xmlCtxtReset(ctxt);
13233
13234 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13235 XML_CHAR_ENCODING_NONE);
13236 if (input == NULL)
13237 return (NULL);
13238 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13239 if (stream == NULL) {
13240 xmlFreeParserInputBuffer(input);
13241 return (NULL);
13242 }
13243 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013244 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013245}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013246
13247#define bottom_parser
13248#include "elfgcchack.h"