blob: ce6272ce9c1bab6089b57183e7ed028a59e7da45 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000081 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000082 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000087unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
Daniel Veillarda07050d2003-10-19 14:46:32 +0000105
Owen Taylor3473f882001-02-23 17:55:21 +0000106/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000107xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
108 const xmlChar **str);
109
Daniel Veillard7d515752003-09-26 19:12:37 +0000110static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000111xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
112 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000113 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000115
Daniel Veillard81273902003-09-30 00:43:48 +0000116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000117static void
118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
119 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000120#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000121
Daniel Veillard7d515752003-09-26 19:12:37 +0000122static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000123xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
124 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000125
126/************************************************************************
127 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000128 * Some factorized error routines *
129 * *
130 ************************************************************************/
131
132/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000133 * xmlErrAttributeDup:
134 * @ctxt: an XML parser context
135 * @prefix: the attribute prefix
136 * @localname: the attribute localname
137 *
138 * Handle a redefinition of attribute error
139 */
140static void
141xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
142 const xmlChar * localname)
143{
144 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000145 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000146 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000147 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
148 (const char *) localname, NULL, NULL, 0, 0,
149 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000150 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000151 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000152 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
153 (const char *) prefix, (const char *) localname,
154 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
155 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000156 ctxt->wellFormed = 0;
157 if (ctxt->recovery == 0)
158 ctxt->disableSAX = 1;
159}
160
161/**
162 * xmlFatalErr:
163 * @ctxt: an XML parser context
164 * @error: the error number
165 * @extra: extra information string
166 *
167 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
168 */
169static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000170xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000171{
172 const char *errmsg;
173
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174 switch (error) {
175 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176 errmsg = "CharRef: invalid hexadecimal value\n";
177 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000178 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179 errmsg = "CharRef: invalid decimal value\n";
180 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000181 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000182 errmsg = "CharRef: invalid value\n";
183 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000184 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "internal error";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "PEReference at end of document\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "PEReference in prolog\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "PEReference in epilog\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference: no name\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference: expecting ';'\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "Detected an entity reference loop\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "EntityValue: \" or ' expected\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReferences forbidden in internal subset\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "EntityValue: \" or ' expected\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "AttValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "Unescaped '<' not allowed in attributes values\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "SystemLiteral \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "Unfinished System or Public ID \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Sequence ']]>' not allowed in content\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "PUBLIC, the Public Identifier is missing\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Comment must not contain '--' (double-hyphen)\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "xmlParsePI : no target name\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Invalid PI name\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "NOTATION: Name expected here\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "'>' required to close NOTATION declaration\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Entity value required\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Fragment not allowed";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'(' required to start ATTLIST enumeration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "NmToken expected in ATTLIST enumeration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "')' required to finish ATTLIST enumeration\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "ContentDecl : Name or '(' expected\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg =
279 "PEReference: forbidden within markup decl in internal subset\n";
280 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000281 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000282 errmsg = "expected '>'\n";
283 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000284 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000285 errmsg = "XML conditional section '[' expected\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "Content error in the external subset\n";
289 break;
290 case XML_ERR_CONDSEC_INVALID_KEYWORD:
291 errmsg =
292 "conditional section INCLUDE or IGNORE keyword expected\n";
293 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000294 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000295 errmsg = "XML conditional section not closed\n";
296 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000297 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "Text declaration '<?xml' required\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "parsing XML declaration: '?>' expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "external parsed entities cannot be standalone\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "EntityRef: expecting ';'\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "DOCTYPE improperly terminated\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "EndTag: '</' not found\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "expected '='\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "String not closed expecting \" or '\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "String not started expecting ' or \"\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "Invalid XML encoding name\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "standalone accepts only 'yes' or 'no'\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Document is empty\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Extra content at the end of the document\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "chunk is not well balanced\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "extra content at the end of well balanced chunk\n";
341 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000342 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Malformed declaration expecting version\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 case:
347 errmsg = "\n";
348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350 default:
351 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 }
353 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000354 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
356 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 ctxt->wellFormed = 0;
358 if (ctxt->recovery == 0)
359 ctxt->disableSAX = 1;
360}
361
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000362/**
363 * xmlFatalErrMsg:
364 * @ctxt: an XML parser context
365 * @error: the error number
366 * @msg: the error message
367 *
368 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
369 */
370static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
372 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000373{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000374 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000375 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377 ctxt->wellFormed = 0;
378 if (ctxt->recovery == 0)
379 ctxt->disableSAX = 1;
380}
381
382/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000383 * xmlWarningMsg:
384 * @ctxt: an XML parser context
385 * @error: the error number
386 * @msg: the error message
387 * @str1: extra data
388 * @str2: extra data
389 *
390 * Handle a warning.
391 */
392static void
393xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
394 const char *msg, const xmlChar *str1, const xmlChar *str2)
395{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000396 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000397
Daniel Veillard24eb9782003-10-04 21:08:09 +0000398 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000399 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000400 schannel = ctxt->sax->serror;
401 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000402 (ctxt->sax) ? ctxt->sax->warning : NULL,
403 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 ctxt, NULL, XML_FROM_PARSER, error,
405 XML_ERR_WARNING, NULL, 0,
406 (const char *) str1, (const char *) str2, NULL, 0, 0,
407 msg, (const char *) str1, (const char *) str2);
408}
409
410/**
411 * xmlValidityError:
412 * @ctxt: an XML parser context
413 * @error: the error number
414 * @msg: the error message
415 * @str1: extra data
416 *
417 * Handle a warning.
418 */
419static void
420xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
421 const char *msg, const xmlChar *str1)
422{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000423 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000424 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000425 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000426 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000427 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000428 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000429 ctxt, NULL, XML_FROM_DTD, error,
430 XML_ERR_ERROR, NULL, 0, (const char *) str1,
431 NULL, NULL, 0, 0,
432 msg, (const char *) str1);
433 ctxt->valid = 0;
434}
435
436/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000437 * xmlFatalErrMsgInt:
438 * @ctxt: an XML parser context
439 * @error: the error number
440 * @msg: the error message
441 * @val: an integer value
442 *
443 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
444 */
445static void
446xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000448{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000449 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000450 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
452 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000453 ctxt->wellFormed = 0;
454 if (ctxt->recovery == 0)
455 ctxt->disableSAX = 1;
456}
457
458/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000459 * xmlFatalErrMsgStrIntStr:
460 * @ctxt: an XML parser context
461 * @error: the error number
462 * @msg: the error message
463 * @str1: an string info
464 * @val: an integer value
465 * @str2: an string info
466 *
467 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
468 */
469static void
470xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
471 const char *msg, const xmlChar *str1, int val,
472 const xmlChar *str2)
473{
474 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000475 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000476 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
477 NULL, 0, (const char *) str1, (const char *) str2,
478 NULL, val, 0, msg, str1, val, str2);
479 ctxt->wellFormed = 0;
480 if (ctxt->recovery == 0)
481 ctxt->disableSAX = 1;
482}
483
484/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000485 * xmlFatalErrMsgStr:
486 * @ctxt: an XML parser context
487 * @error: the error number
488 * @msg: the error message
489 * @val: a string value
490 *
491 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
492 */
493static void
494xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000495 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000496{
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000497 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000498 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000499 XML_FROM_PARSER, error, XML_ERR_FATAL,
500 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
501 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000502 ctxt->wellFormed = 0;
503 if (ctxt->recovery == 0)
504 ctxt->disableSAX = 1;
505}
506
507/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000508 * xmlErrMsgStr:
509 * @ctxt: an XML parser context
510 * @error: the error number
511 * @msg: the error message
512 * @val: a string value
513 *
514 * Handle a non fatal parser error
515 */
516static void
517xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
518 const char *msg, const xmlChar * val)
519{
520 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000521 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000522 XML_FROM_PARSER, error, XML_ERR_ERROR,
523 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
524 val);
525}
526
527/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000528 * xmlNsErr:
529 * @ctxt: an XML parser context
530 * @error: the error number
531 * @msg: the message
532 * @info1: extra information string
533 * @info2: extra information string
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
539 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000540 const xmlChar * info1, const xmlChar * info2,
541 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000542{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000543 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000544 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000545 XML_ERR_ERROR, NULL, 0, (const char *) info1,
546 (const char *) info2, (const char *) info3, 0, 0, msg,
547 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000548 ctxt->nsWellFormed = 0;
549}
550
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000551/************************************************************************
552 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000553 * SAX2 defaulted attributes handling *
554 * *
555 ************************************************************************/
556
557/**
558 * xmlDetectSAX2:
559 * @ctxt: an XML parser context
560 *
561 * Do the SAX2 detection and specific intialization
562 */
563static void
564xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
565 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000566#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000567 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
568 ((ctxt->sax->startElementNs != NULL) ||
569 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000570#else
571 ctxt->sax2 = 1;
572#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000573
574 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
575 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
576 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
577}
578
Daniel Veillarde57ec792003-09-10 10:50:59 +0000579typedef struct _xmlDefAttrs xmlDefAttrs;
580typedef xmlDefAttrs *xmlDefAttrsPtr;
581struct _xmlDefAttrs {
582 int nbAttrs; /* number of defaulted attributes on that element */
583 int maxAttrs; /* the size of the array */
584 const xmlChar *values[4]; /* array of localname/prefix/values */
585};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000586
587/**
588 * xmlAddDefAttrs:
589 * @ctxt: an XML parser context
590 * @fullname: the element fullname
591 * @fullattr: the attribute fullname
592 * @value: the attribute value
593 *
594 * Add a defaulted attribute for an element
595 */
596static void
597xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
598 const xmlChar *fullname,
599 const xmlChar *fullattr,
600 const xmlChar *value) {
601 xmlDefAttrsPtr defaults;
602 int len;
603 const xmlChar *name;
604 const xmlChar *prefix;
605
606 if (ctxt->attsDefault == NULL) {
607 ctxt->attsDefault = xmlHashCreate(10);
608 if (ctxt->attsDefault == NULL)
609 goto mem_error;
610 }
611
612 /*
613 * plit the element name into prefix:localname , the string found
614 * are within the DTD and hen not associated to namespace names.
615 */
616 name = xmlSplitQName3(fullname, &len);
617 if (name == NULL) {
618 name = xmlDictLookup(ctxt->dict, fullname, -1);
619 prefix = NULL;
620 } else {
621 name = xmlDictLookup(ctxt->dict, name, -1);
622 prefix = xmlDictLookup(ctxt->dict, fullname, len);
623 }
624
625 /*
626 * make sure there is some storage
627 */
628 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
629 if (defaults == NULL) {
630 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
631 12 * sizeof(const xmlChar *));
632 if (defaults == NULL)
633 goto mem_error;
634 defaults->maxAttrs = 4;
635 defaults->nbAttrs = 0;
636 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
637 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
638 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
639 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
640 if (defaults == NULL)
641 goto mem_error;
642 defaults->maxAttrs *= 2;
643 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
644 }
645
646 /*
647 * plit the element name into prefix:localname , the string found
648 * are within the DTD and hen not associated to namespace names.
649 */
650 name = xmlSplitQName3(fullattr, &len);
651 if (name == NULL) {
652 name = xmlDictLookup(ctxt->dict, fullattr, -1);
653 prefix = NULL;
654 } else {
655 name = xmlDictLookup(ctxt->dict, name, -1);
656 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
657 }
658
659 defaults->values[4 * defaults->nbAttrs] = name;
660 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
661 /* intern the string and precompute the end */
662 len = xmlStrlen(value);
663 value = xmlDictLookup(ctxt->dict, value, len);
664 defaults->values[4 * defaults->nbAttrs + 2] = value;
665 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
666 defaults->nbAttrs++;
667
668 return;
669
670mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000671 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000672 return;
673}
674
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000675/**
676 * xmlAddSpecialAttr:
677 * @ctxt: an XML parser context
678 * @fullname: the element fullname
679 * @fullattr: the attribute fullname
680 * @type: the attribute type
681 *
682 * Register that this attribute is not CDATA
683 */
684static void
685xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
686 const xmlChar *fullname,
687 const xmlChar *fullattr,
688 int type)
689{
690 if (ctxt->attsSpecial == NULL) {
691 ctxt->attsSpecial = xmlHashCreate(10);
692 if (ctxt->attsSpecial == NULL)
693 goto mem_error;
694 }
695
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000696 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
697 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000698 return;
699
700mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000701 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000702 return;
703}
704
Daniel Veillard4432df22003-09-28 18:58:27 +0000705/**
706 * xmlCheckLanguageID:
707 * @lang: pointer to the string value
708 *
709 * Checks that the value conforms to the LanguageID production:
710 *
711 * NOTE: this is somewhat deprecated, those productions were removed from
712 * the XML Second edition.
713 *
714 * [33] LanguageID ::= Langcode ('-' Subcode)*
715 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
716 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
717 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
718 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
719 * [38] Subcode ::= ([a-z] | [A-Z])+
720 *
721 * Returns 1 if correct 0 otherwise
722 **/
723int
724xmlCheckLanguageID(const xmlChar * lang)
725{
726 const xmlChar *cur = lang;
727
728 if (cur == NULL)
729 return (0);
730 if (((cur[0] == 'i') && (cur[1] == '-')) ||
731 ((cur[0] == 'I') && (cur[1] == '-'))) {
732 /*
733 * IANA code
734 */
735 cur += 2;
736 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
737 ((cur[0] >= 'a') && (cur[0] <= 'z')))
738 cur++;
739 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
740 ((cur[0] == 'X') && (cur[1] == '-'))) {
741 /*
742 * User code
743 */
744 cur += 2;
745 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
746 ((cur[0] >= 'a') && (cur[0] <= 'z')))
747 cur++;
748 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
749 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
750 /*
751 * ISO639
752 */
753 cur++;
754 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
755 ((cur[0] >= 'a') && (cur[0] <= 'z')))
756 cur++;
757 else
758 return (0);
759 } else
760 return (0);
761 while (cur[0] != 0) { /* non input consuming */
762 if (cur[0] != '-')
763 return (0);
764 cur++;
765 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
766 ((cur[0] >= 'a') && (cur[0] <= 'z')))
767 cur++;
768 else
769 return (0);
770 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
771 ((cur[0] >= 'a') && (cur[0] <= 'z')))
772 cur++;
773 }
774 return (1);
775}
776
Owen Taylor3473f882001-02-23 17:55:21 +0000777/************************************************************************
778 * *
779 * Parser stacks related functions and macros *
780 * *
781 ************************************************************************/
782
783xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
784 const xmlChar ** str);
785
Daniel Veillard0fb18932003-09-07 09:14:37 +0000786#ifdef SAX2
787/**
788 * nsPush:
789 * @ctxt: an XML parser context
790 * @prefix: the namespace prefix or NULL
791 * @URL: the namespace name
792 *
793 * Pushes a new parser namespace on top of the ns stack
794 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000795 * Returns -1 in case of error, -2 if the namespace should be discarded
796 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000797 */
798static int
799nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
800{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000801 if (ctxt->options & XML_PARSE_NSCLEAN) {
802 int i;
803 for (i = 0;i < ctxt->nsNr;i += 2) {
804 if (ctxt->nsTab[i] == prefix) {
805 /* in scope */
806 if (ctxt->nsTab[i + 1] == URL)
807 return(-2);
808 /* out of scope keep it */
809 break;
810 }
811 }
812 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000813 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
814 ctxt->nsMax = 10;
815 ctxt->nsNr = 0;
816 ctxt->nsTab = (const xmlChar **)
817 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
818 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000819 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000820 ctxt->nsMax = 0;
821 return (-1);
822 }
823 } else if (ctxt->nsNr >= ctxt->nsMax) {
824 ctxt->nsMax *= 2;
825 ctxt->nsTab = (const xmlChar **)
826 xmlRealloc(ctxt->nsTab,
827 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
828 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000829 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000830 ctxt->nsMax /= 2;
831 return (-1);
832 }
833 }
834 ctxt->nsTab[ctxt->nsNr++] = prefix;
835 ctxt->nsTab[ctxt->nsNr++] = URL;
836 return (ctxt->nsNr);
837}
838/**
839 * nsPop:
840 * @ctxt: an XML parser context
841 * @nr: the number to pop
842 *
843 * Pops the top @nr parser prefix/namespace from the ns stack
844 *
845 * Returns the number of namespaces removed
846 */
847static int
848nsPop(xmlParserCtxtPtr ctxt, int nr)
849{
850 int i;
851
852 if (ctxt->nsTab == NULL) return(0);
853 if (ctxt->nsNr < nr) {
854 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
855 nr = ctxt->nsNr;
856 }
857 if (ctxt->nsNr <= 0)
858 return (0);
859
860 for (i = 0;i < nr;i++) {
861 ctxt->nsNr--;
862 ctxt->nsTab[ctxt->nsNr] = NULL;
863 }
864 return(nr);
865}
866#endif
867
868static int
869xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
870 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000871 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000872 int maxatts;
873
874 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000875 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000876 atts = (const xmlChar **)
877 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000878 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000879 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000880 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
881 if (attallocs == NULL) goto mem_error;
882 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000883 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000884 } else if (nr + 5 > ctxt->maxatts) {
885 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000886 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
887 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000889 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
891 (maxatts / 5) * sizeof(int));
892 if (attallocs == NULL) goto mem_error;
893 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000894 ctxt->maxatts = maxatts;
895 }
896 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000897mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000898 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000899 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000900}
901
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000902/**
903 * inputPush:
904 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000905 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000906 *
907 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000908 *
909 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000910 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000911extern int
912inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
913{
914 if (ctxt->inputNr >= ctxt->inputMax) {
915 ctxt->inputMax *= 2;
916 ctxt->inputTab =
917 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
918 ctxt->inputMax *
919 sizeof(ctxt->inputTab[0]));
920 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000921 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000922 return (0);
923 }
924 }
925 ctxt->inputTab[ctxt->inputNr] = value;
926 ctxt->input = value;
927 return (ctxt->inputNr++);
928}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000929/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000930 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000931 * @ctxt: an XML parser context
932 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000933 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000934 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000935 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000936 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000937extern xmlParserInputPtr
938inputPop(xmlParserCtxtPtr ctxt)
939{
940 xmlParserInputPtr ret;
941
942 if (ctxt->inputNr <= 0)
943 return (0);
944 ctxt->inputNr--;
945 if (ctxt->inputNr > 0)
946 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
947 else
948 ctxt->input = NULL;
949 ret = ctxt->inputTab[ctxt->inputNr];
950 ctxt->inputTab[ctxt->inputNr] = 0;
951 return (ret);
952}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000953/**
954 * nodePush:
955 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000956 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000957 *
958 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000959 *
960 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000961 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000962extern int
963nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
964{
965 if (ctxt->nodeNr >= ctxt->nodeMax) {
966 ctxt->nodeMax *= 2;
967 ctxt->nodeTab =
968 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
969 ctxt->nodeMax *
970 sizeof(ctxt->nodeTab[0]));
971 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000972 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000973 return (0);
974 }
975 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000976 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000977 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000978 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
979 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000980 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000981 return(0);
982 }
Daniel Veillard1c732d22002-11-30 11:22:59 +0000983 ctxt->nodeTab[ctxt->nodeNr] = value;
984 ctxt->node = value;
985 return (ctxt->nodeNr++);
986}
987/**
988 * nodePop:
989 * @ctxt: an XML parser context
990 *
991 * Pops the top element node from the node stack
992 *
993 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000994 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000995extern xmlNodePtr
996nodePop(xmlParserCtxtPtr ctxt)
997{
998 xmlNodePtr ret;
999
1000 if (ctxt->nodeNr <= 0)
1001 return (0);
1002 ctxt->nodeNr--;
1003 if (ctxt->nodeNr > 0)
1004 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1005 else
1006 ctxt->node = NULL;
1007 ret = ctxt->nodeTab[ctxt->nodeNr];
1008 ctxt->nodeTab[ctxt->nodeNr] = 0;
1009 return (ret);
1010}
1011/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001012 * nameNsPush:
1013 * @ctxt: an XML parser context
1014 * @value: the element name
1015 * @prefix: the element prefix
1016 * @URI: the element namespace name
1017 *
1018 * Pushes a new element name/prefix/URL on top of the name stack
1019 *
1020 * Returns -1 in case of error, the index in the stack otherwise
1021 */
1022static int
1023nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1024 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1025{
1026 if (ctxt->nameNr >= ctxt->nameMax) {
1027 const xmlChar * *tmp;
1028 void **tmp2;
1029 ctxt->nameMax *= 2;
1030 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1031 ctxt->nameMax *
1032 sizeof(ctxt->nameTab[0]));
1033 if (tmp == NULL) {
1034 ctxt->nameMax /= 2;
1035 goto mem_error;
1036 }
1037 ctxt->nameTab = tmp;
1038 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1039 ctxt->nameMax * 3 *
1040 sizeof(ctxt->pushTab[0]));
1041 if (tmp2 == NULL) {
1042 ctxt->nameMax /= 2;
1043 goto mem_error;
1044 }
1045 ctxt->pushTab = tmp2;
1046 }
1047 ctxt->nameTab[ctxt->nameNr] = value;
1048 ctxt->name = value;
1049 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1050 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001051 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001052 return (ctxt->nameNr++);
1053mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001054 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001055 return (-1);
1056}
1057/**
1058 * nameNsPop:
1059 * @ctxt: an XML parser context
1060 *
1061 * Pops the top element/prefix/URI name from the name stack
1062 *
1063 * Returns the name just removed
1064 */
1065static const xmlChar *
1066nameNsPop(xmlParserCtxtPtr ctxt)
1067{
1068 const xmlChar *ret;
1069
1070 if (ctxt->nameNr <= 0)
1071 return (0);
1072 ctxt->nameNr--;
1073 if (ctxt->nameNr > 0)
1074 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1075 else
1076 ctxt->name = NULL;
1077 ret = ctxt->nameTab[ctxt->nameNr];
1078 ctxt->nameTab[ctxt->nameNr] = NULL;
1079 return (ret);
1080}
1081
1082/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001083 * namePush:
1084 * @ctxt: an XML parser context
1085 * @value: the element name
1086 *
1087 * Pushes a new element name on top of the name stack
1088 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001089 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001090 */
1091extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001092namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001093{
1094 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001095 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001096 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001097 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001098 ctxt->nameMax *
1099 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001100 if (tmp == NULL) {
1101 ctxt->nameMax /= 2;
1102 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001103 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001104 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001105 }
1106 ctxt->nameTab[ctxt->nameNr] = value;
1107 ctxt->name = value;
1108 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001109mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001110 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001111 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001112}
1113/**
1114 * namePop:
1115 * @ctxt: an XML parser context
1116 *
1117 * Pops the top element name from the name stack
1118 *
1119 * Returns the name just removed
1120 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001121extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001122namePop(xmlParserCtxtPtr ctxt)
1123{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001124 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001125
1126 if (ctxt->nameNr <= 0)
1127 return (0);
1128 ctxt->nameNr--;
1129 if (ctxt->nameNr > 0)
1130 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1131 else
1132 ctxt->name = NULL;
1133 ret = ctxt->nameTab[ctxt->nameNr];
1134 ctxt->nameTab[ctxt->nameNr] = 0;
1135 return (ret);
1136}
Owen Taylor3473f882001-02-23 17:55:21 +00001137
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001138static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001139 if (ctxt->spaceNr >= ctxt->spaceMax) {
1140 ctxt->spaceMax *= 2;
1141 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1142 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1143 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001144 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001145 return(0);
1146 }
1147 }
1148 ctxt->spaceTab[ctxt->spaceNr] = val;
1149 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1150 return(ctxt->spaceNr++);
1151}
1152
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001153static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001154 int ret;
1155 if (ctxt->spaceNr <= 0) return(0);
1156 ctxt->spaceNr--;
1157 if (ctxt->spaceNr > 0)
1158 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1159 else
1160 ctxt->space = NULL;
1161 ret = ctxt->spaceTab[ctxt->spaceNr];
1162 ctxt->spaceTab[ctxt->spaceNr] = -1;
1163 return(ret);
1164}
1165
1166/*
1167 * Macros for accessing the content. Those should be used only by the parser,
1168 * and not exported.
1169 *
1170 * Dirty macros, i.e. one often need to make assumption on the context to
1171 * use them
1172 *
1173 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1174 * To be used with extreme caution since operations consuming
1175 * characters may move the input buffer to a different location !
1176 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1177 * This should be used internally by the parser
1178 * only to compare to ASCII values otherwise it would break when
1179 * running with UTF-8 encoding.
1180 * RAW same as CUR but in the input buffer, bypass any token
1181 * extraction that may have been done
1182 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1183 * to compare on ASCII based substring.
1184 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001185 * strings without newlines within the parser.
1186 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1187 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001188 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1189 *
1190 * NEXT Skip to the next character, this does the proper decoding
1191 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001192 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001193 * CUR_CHAR(l) returns the current unicode character (int), set l
1194 * to the number of xmlChars used for the encoding [0-5].
1195 * CUR_SCHAR same but operate on a string instead of the context
1196 * COPY_BUF copy the current unicode char to the target buffer, increment
1197 * the index
1198 * GROW, SHRINK handling of input buffers
1199 */
1200
Daniel Veillardfdc91562002-07-01 21:52:03 +00001201#define RAW (*ctxt->input->cur)
1202#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001203#define NXT(val) ctxt->input->cur[(val)]
1204#define CUR_PTR ctxt->input->cur
1205
Daniel Veillarda07050d2003-10-19 14:46:32 +00001206#define CMP4( s, c1, c2, c3, c4 ) \
1207 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1208 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1209#define CMP5( s, c1, c2, c3, c4, c5 ) \
1210 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1211#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1212 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1213#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1214 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1215#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1216 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1217#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1218 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1219 ((unsigned char *) s)[ 8 ] == c9 )
1220#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1221 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1222 ((unsigned char *) s)[ 9 ] == c10 )
1223
Owen Taylor3473f882001-02-23 17:55:21 +00001224#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001225 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001226 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001227 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001228 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1229 xmlPopInput(ctxt); \
1230 } while (0)
1231
Daniel Veillarda880b122003-04-21 21:36:41 +00001232#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001233 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1234 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001235 xmlSHRINK (ctxt);
1236
1237static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1238 xmlParserInputShrink(ctxt->input);
1239 if ((*ctxt->input->cur == 0) &&
1240 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1241 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001242 }
Owen Taylor3473f882001-02-23 17:55:21 +00001243
Daniel Veillarda880b122003-04-21 21:36:41 +00001244#define GROW if ((ctxt->progressive == 0) && \
1245 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001246 xmlGROW (ctxt);
1247
1248static void xmlGROW (xmlParserCtxtPtr ctxt) {
1249 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1250 if ((*ctxt->input->cur == 0) &&
1251 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1252 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001253}
Owen Taylor3473f882001-02-23 17:55:21 +00001254
1255#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1256
1257#define NEXT xmlNextChar(ctxt)
1258
Daniel Veillard21a0f912001-02-25 19:54:14 +00001259#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001260 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001261 ctxt->input->cur++; \
1262 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001263 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001264 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1265 }
1266
Owen Taylor3473f882001-02-23 17:55:21 +00001267#define NEXTL(l) do { \
1268 if (*(ctxt->input->cur) == '\n') { \
1269 ctxt->input->line++; ctxt->input->col = 1; \
1270 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001271 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001272 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001273 } while (0)
1274
1275#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1276#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1277
1278#define COPY_BUF(l,b,i,v) \
1279 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001280 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001281
1282/**
1283 * xmlSkipBlankChars:
1284 * @ctxt: the XML parser context
1285 *
1286 * skip all blanks character found at that point in the input streams.
1287 * It pops up finished entities in the process if allowable at that point.
1288 *
1289 * Returns the number of space chars skipped
1290 */
1291
1292int
1293xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001294 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001295
1296 /*
1297 * It's Okay to use CUR/NEXT here since all the blanks are on
1298 * the ASCII range.
1299 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001300 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1301 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001302 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001303 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001304 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001305 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001306 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001307 if (*cur == '\n') {
1308 ctxt->input->line++; ctxt->input->col = 1;
1309 }
1310 cur++;
1311 res++;
1312 if (*cur == 0) {
1313 ctxt->input->cur = cur;
1314 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1315 cur = ctxt->input->cur;
1316 }
1317 }
1318 ctxt->input->cur = cur;
1319 } else {
1320 int cur;
1321 do {
1322 cur = CUR;
1323 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1324 NEXT;
1325 cur = CUR;
1326 res++;
1327 }
1328 while ((cur == 0) && (ctxt->inputNr > 1) &&
1329 (ctxt->instate != XML_PARSER_COMMENT)) {
1330 xmlPopInput(ctxt);
1331 cur = CUR;
1332 }
1333 /*
1334 * Need to handle support of entities branching here
1335 */
1336 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1337 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1338 }
Owen Taylor3473f882001-02-23 17:55:21 +00001339 return(res);
1340}
1341
1342/************************************************************************
1343 * *
1344 * Commodity functions to handle entities *
1345 * *
1346 ************************************************************************/
1347
1348/**
1349 * xmlPopInput:
1350 * @ctxt: an XML parser context
1351 *
1352 * xmlPopInput: the current input pointed by ctxt->input came to an end
1353 * pop it and return the next char.
1354 *
1355 * Returns the current xmlChar in the parser context
1356 */
1357xmlChar
1358xmlPopInput(xmlParserCtxtPtr ctxt) {
1359 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1360 if (xmlParserDebugEntities)
1361 xmlGenericError(xmlGenericErrorContext,
1362 "Popping input %d\n", ctxt->inputNr);
1363 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001364 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001365 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1366 return(xmlPopInput(ctxt));
1367 return(CUR);
1368}
1369
1370/**
1371 * xmlPushInput:
1372 * @ctxt: an XML parser context
1373 * @input: an XML parser input fragment (entity, XML fragment ...).
1374 *
1375 * xmlPushInput: switch to a new input stream which is stacked on top
1376 * of the previous one(s).
1377 */
1378void
1379xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1380 if (input == NULL) return;
1381
1382 if (xmlParserDebugEntities) {
1383 if ((ctxt->input != NULL) && (ctxt->input->filename))
1384 xmlGenericError(xmlGenericErrorContext,
1385 "%s(%d): ", ctxt->input->filename,
1386 ctxt->input->line);
1387 xmlGenericError(xmlGenericErrorContext,
1388 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1389 }
1390 inputPush(ctxt, input);
1391 GROW;
1392}
1393
1394/**
1395 * xmlParseCharRef:
1396 * @ctxt: an XML parser context
1397 *
1398 * parse Reference declarations
1399 *
1400 * [66] CharRef ::= '&#' [0-9]+ ';' |
1401 * '&#x' [0-9a-fA-F]+ ';'
1402 *
1403 * [ WFC: Legal Character ]
1404 * Characters referred to using character references must match the
1405 * production for Char.
1406 *
1407 * Returns the value parsed (as an int), 0 in case of error
1408 */
1409int
1410xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001411 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001412 int count = 0;
1413
Owen Taylor3473f882001-02-23 17:55:21 +00001414 /*
1415 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1416 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001417 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001418 (NXT(2) == 'x')) {
1419 SKIP(3);
1420 GROW;
1421 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001422 if (count++ > 20) {
1423 count = 0;
1424 GROW;
1425 }
1426 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001427 val = val * 16 + (CUR - '0');
1428 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1429 val = val * 16 + (CUR - 'a') + 10;
1430 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1431 val = val * 16 + (CUR - 'A') + 10;
1432 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001433 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001434 val = 0;
1435 break;
1436 }
1437 NEXT;
1438 count++;
1439 }
1440 if (RAW == ';') {
1441 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001442 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001443 ctxt->nbChars ++;
1444 ctxt->input->cur++;
1445 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001446 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001447 SKIP(2);
1448 GROW;
1449 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001450 if (count++ > 20) {
1451 count = 0;
1452 GROW;
1453 }
1454 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001455 val = val * 10 + (CUR - '0');
1456 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001457 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001458 val = 0;
1459 break;
1460 }
1461 NEXT;
1462 count++;
1463 }
1464 if (RAW == ';') {
1465 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001466 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001467 ctxt->nbChars ++;
1468 ctxt->input->cur++;
1469 }
1470 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001471 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001472 }
1473
1474 /*
1475 * [ WFC: Legal Character ]
1476 * Characters referred to using character references must match the
1477 * production for Char.
1478 */
William M. Brack871611b2003-10-18 04:53:14 +00001479 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001480 return(val);
1481 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001482 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1483 "xmlParseCharRef: invalid xmlChar value %d\n",
1484 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001485 }
1486 return(0);
1487}
1488
1489/**
1490 * xmlParseStringCharRef:
1491 * @ctxt: an XML parser context
1492 * @str: a pointer to an index in the string
1493 *
1494 * parse Reference declarations, variant parsing from a string rather
1495 * than an an input flow.
1496 *
1497 * [66] CharRef ::= '&#' [0-9]+ ';' |
1498 * '&#x' [0-9a-fA-F]+ ';'
1499 *
1500 * [ WFC: Legal Character ]
1501 * Characters referred to using character references must match the
1502 * production for Char.
1503 *
1504 * Returns the value parsed (as an int), 0 in case of error, str will be
1505 * updated to the current value of the index
1506 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001507static int
Owen Taylor3473f882001-02-23 17:55:21 +00001508xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1509 const xmlChar *ptr;
1510 xmlChar cur;
1511 int val = 0;
1512
1513 if ((str == NULL) || (*str == NULL)) return(0);
1514 ptr = *str;
1515 cur = *ptr;
1516 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1517 ptr += 3;
1518 cur = *ptr;
1519 while (cur != ';') { /* Non input consuming loop */
1520 if ((cur >= '0') && (cur <= '9'))
1521 val = val * 16 + (cur - '0');
1522 else if ((cur >= 'a') && (cur <= 'f'))
1523 val = val * 16 + (cur - 'a') + 10;
1524 else if ((cur >= 'A') && (cur <= 'F'))
1525 val = val * 16 + (cur - 'A') + 10;
1526 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001527 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001528 val = 0;
1529 break;
1530 }
1531 ptr++;
1532 cur = *ptr;
1533 }
1534 if (cur == ';')
1535 ptr++;
1536 } else if ((cur == '&') && (ptr[1] == '#')){
1537 ptr += 2;
1538 cur = *ptr;
1539 while (cur != ';') { /* Non input consuming loops */
1540 if ((cur >= '0') && (cur <= '9'))
1541 val = val * 10 + (cur - '0');
1542 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001543 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001544 val = 0;
1545 break;
1546 }
1547 ptr++;
1548 cur = *ptr;
1549 }
1550 if (cur == ';')
1551 ptr++;
1552 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001553 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001554 return(0);
1555 }
1556 *str = ptr;
1557
1558 /*
1559 * [ WFC: Legal Character ]
1560 * Characters referred to using character references must match the
1561 * production for Char.
1562 */
William M. Brack871611b2003-10-18 04:53:14 +00001563 if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001564 return(val);
1565 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001566 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1567 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1568 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001569 }
1570 return(0);
1571}
1572
1573/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001574 * xmlNewBlanksWrapperInputStream:
1575 * @ctxt: an XML parser context
1576 * @entity: an Entity pointer
1577 *
1578 * Create a new input stream for wrapping
1579 * blanks around a PEReference
1580 *
1581 * Returns the new input stream or NULL
1582 */
1583
1584static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1585
Daniel Veillardf4862f02002-09-10 11:13:43 +00001586static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001587xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1588 xmlParserInputPtr input;
1589 xmlChar *buffer;
1590 size_t length;
1591 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001592 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1593 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001594 return(NULL);
1595 }
1596 if (xmlParserDebugEntities)
1597 xmlGenericError(xmlGenericErrorContext,
1598 "new blanks wrapper for entity: %s\n", entity->name);
1599 input = xmlNewInputStream(ctxt);
1600 if (input == NULL) {
1601 return(NULL);
1602 }
1603 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001604 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001605 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001606 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001607 return(NULL);
1608 }
1609 buffer [0] = ' ';
1610 buffer [1] = '%';
1611 buffer [length-3] = ';';
1612 buffer [length-2] = ' ';
1613 buffer [length-1] = 0;
1614 memcpy(buffer + 2, entity->name, length - 5);
1615 input->free = deallocblankswrapper;
1616 input->base = buffer;
1617 input->cur = buffer;
1618 input->length = length;
1619 input->end = &buffer[length];
1620 return(input);
1621}
1622
1623/**
Owen Taylor3473f882001-02-23 17:55:21 +00001624 * xmlParserHandlePEReference:
1625 * @ctxt: the parser context
1626 *
1627 * [69] PEReference ::= '%' Name ';'
1628 *
1629 * [ WFC: No Recursion ]
1630 * A parsed entity must not contain a recursive
1631 * reference to itself, either directly or indirectly.
1632 *
1633 * [ WFC: Entity Declared ]
1634 * In a document without any DTD, a document with only an internal DTD
1635 * subset which contains no parameter entity references, or a document
1636 * with "standalone='yes'", ... ... The declaration of a parameter
1637 * entity must precede any reference to it...
1638 *
1639 * [ VC: Entity Declared ]
1640 * In a document with an external subset or external parameter entities
1641 * with "standalone='no'", ... ... The declaration of a parameter entity
1642 * must precede any reference to it...
1643 *
1644 * [ WFC: In DTD ]
1645 * Parameter-entity references may only appear in the DTD.
1646 * NOTE: misleading but this is handled.
1647 *
1648 * A PEReference may have been detected in the current input stream
1649 * the handling is done accordingly to
1650 * http://www.w3.org/TR/REC-xml#entproc
1651 * i.e.
1652 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001653 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001654 */
1655void
1656xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001657 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001658 xmlEntityPtr entity = NULL;
1659 xmlParserInputPtr input;
1660
Owen Taylor3473f882001-02-23 17:55:21 +00001661 if (RAW != '%') return;
1662 switch(ctxt->instate) {
1663 case XML_PARSER_CDATA_SECTION:
1664 return;
1665 case XML_PARSER_COMMENT:
1666 return;
1667 case XML_PARSER_START_TAG:
1668 return;
1669 case XML_PARSER_END_TAG:
1670 return;
1671 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001672 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001673 return;
1674 case XML_PARSER_PROLOG:
1675 case XML_PARSER_START:
1676 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001677 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001678 return;
1679 case XML_PARSER_ENTITY_DECL:
1680 case XML_PARSER_CONTENT:
1681 case XML_PARSER_ATTRIBUTE_VALUE:
1682 case XML_PARSER_PI:
1683 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001684 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001685 /* we just ignore it there */
1686 return;
1687 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001688 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001689 return;
1690 case XML_PARSER_ENTITY_VALUE:
1691 /*
1692 * NOTE: in the case of entity values, we don't do the
1693 * substitution here since we need the literal
1694 * entity value to be able to save the internal
1695 * subset of the document.
1696 * This will be handled by xmlStringDecodeEntities
1697 */
1698 return;
1699 case XML_PARSER_DTD:
1700 /*
1701 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1702 * In the internal DTD subset, parameter-entity references
1703 * can occur only where markup declarations can occur, not
1704 * within markup declarations.
1705 * In that case this is handled in xmlParseMarkupDecl
1706 */
1707 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1708 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001709 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001710 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001711 break;
1712 case XML_PARSER_IGNORE:
1713 return;
1714 }
1715
1716 NEXT;
1717 name = xmlParseName(ctxt);
1718 if (xmlParserDebugEntities)
1719 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001720 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001721 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001722 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001723 } else {
1724 if (RAW == ';') {
1725 NEXT;
1726 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1727 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1728 if (entity == NULL) {
1729
1730 /*
1731 * [ WFC: Entity Declared ]
1732 * In a document without any DTD, a document with only an
1733 * internal DTD subset which contains no parameter entity
1734 * references, or a document with "standalone='yes'", ...
1735 * ... The declaration of a parameter entity must precede
1736 * any reference to it...
1737 */
1738 if ((ctxt->standalone == 1) ||
1739 ((ctxt->hasExternalSubset == 0) &&
1740 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001741 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001742 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001743 } else {
1744 /*
1745 * [ VC: Entity Declared ]
1746 * In a document with an external subset or external
1747 * parameter entities with "standalone='no'", ...
1748 * ... The declaration of a parameter entity must precede
1749 * any reference to it...
1750 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001751 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1752 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1753 "PEReference: %%%s; not found\n",
1754 name);
1755 } else
1756 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1757 "PEReference: %%%s; not found\n",
1758 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001759 ctxt->valid = 0;
1760 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001761 } else if (ctxt->input->free != deallocblankswrapper) {
1762 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1763 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001764 } else {
1765 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1766 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001767 xmlChar start[4];
1768 xmlCharEncoding enc;
1769
Owen Taylor3473f882001-02-23 17:55:21 +00001770 /*
1771 * handle the extra spaces added before and after
1772 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001773 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001774 */
1775 input = xmlNewEntityInputStream(ctxt, entity);
1776 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001777
1778 /*
1779 * Get the 4 first bytes and decode the charset
1780 * if enc != XML_CHAR_ENCODING_NONE
1781 * plug some encoding conversion routines.
1782 */
1783 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001784 if (entity->length >= 4) {
1785 start[0] = RAW;
1786 start[1] = NXT(1);
1787 start[2] = NXT(2);
1788 start[3] = NXT(3);
1789 enc = xmlDetectCharEncoding(start, 4);
1790 if (enc != XML_CHAR_ENCODING_NONE) {
1791 xmlSwitchEncoding(ctxt, enc);
1792 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001793 }
1794
Owen Taylor3473f882001-02-23 17:55:21 +00001795 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001796 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1797 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001798 xmlParseTextDecl(ctxt);
1799 }
Owen Taylor3473f882001-02-23 17:55:21 +00001800 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001801 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1802 "PEReference: %s is not a parameter entity\n",
1803 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001804 }
1805 }
1806 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001807 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001808 }
Owen Taylor3473f882001-02-23 17:55:21 +00001809 }
1810}
1811
1812/*
1813 * Macro used to grow the current buffer.
1814 */
1815#define growBuffer(buffer) { \
1816 buffer##_size *= 2; \
1817 buffer = (xmlChar *) \
1818 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001819 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001820}
1821
1822/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001823 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001824 * @ctxt: the parser context
1825 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001826 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001827 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1828 * @end: an end marker xmlChar, 0 if none
1829 * @end2: an end marker xmlChar, 0 if none
1830 * @end3: an end marker xmlChar, 0 if none
1831 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001832 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001833 *
1834 * [67] Reference ::= EntityRef | CharRef
1835 *
1836 * [69] PEReference ::= '%' Name ';'
1837 *
1838 * Returns A newly allocated string with the substitution done. The caller
1839 * must deallocate it !
1840 */
1841xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001842xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1843 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001844 xmlChar *buffer = NULL;
1845 int buffer_size = 0;
1846
1847 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001848 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001849 xmlEntityPtr ent;
1850 int c,l;
1851 int nbchars = 0;
1852
Daniel Veillarde57ec792003-09-10 10:50:59 +00001853 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001854 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001855 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001856
1857 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001858 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001859 return(NULL);
1860 }
1861
1862 /*
1863 * allocate a translation buffer.
1864 */
1865 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001866 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001867 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001868
1869 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001870 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001871 * we are operating on already parsed values.
1872 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873 if (str < last)
1874 c = CUR_SCHAR(str, l);
1875 else
1876 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001877 while ((c != 0) && (c != end) && /* non input consuming loop */
1878 (c != end2) && (c != end3)) {
1879
1880 if (c == 0) break;
1881 if ((c == '&') && (str[1] == '#')) {
1882 int val = xmlParseStringCharRef(ctxt, &str);
1883 if (val != 0) {
1884 COPY_BUF(0,buffer,nbchars,val);
1885 }
1886 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1887 if (xmlParserDebugEntities)
1888 xmlGenericError(xmlGenericErrorContext,
1889 "String decoding Entity Reference: %.30s\n",
1890 str);
1891 ent = xmlParseStringEntityRef(ctxt, &str);
1892 if ((ent != NULL) &&
1893 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1894 if (ent->content != NULL) {
1895 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1896 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001897 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1898 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001899 }
1900 } else if ((ent != NULL) && (ent->content != NULL)) {
1901 xmlChar *rep;
1902
1903 ctxt->depth++;
1904 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1905 0, 0, 0);
1906 ctxt->depth--;
1907 if (rep != NULL) {
1908 current = rep;
1909 while (*current != 0) { /* non input consuming loop */
1910 buffer[nbchars++] = *current++;
1911 if (nbchars >
1912 buffer_size - XML_PARSER_BUFFER_SIZE) {
1913 growBuffer(buffer);
1914 }
1915 }
1916 xmlFree(rep);
1917 }
1918 } else if (ent != NULL) {
1919 int i = xmlStrlen(ent->name);
1920 const xmlChar *cur = ent->name;
1921
1922 buffer[nbchars++] = '&';
1923 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1924 growBuffer(buffer);
1925 }
1926 for (;i > 0;i--)
1927 buffer[nbchars++] = *cur++;
1928 buffer[nbchars++] = ';';
1929 }
1930 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1931 if (xmlParserDebugEntities)
1932 xmlGenericError(xmlGenericErrorContext,
1933 "String decoding PE Reference: %.30s\n", str);
1934 ent = xmlParseStringPEReference(ctxt, &str);
1935 if (ent != NULL) {
1936 xmlChar *rep;
1937
1938 ctxt->depth++;
1939 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1940 0, 0, 0);
1941 ctxt->depth--;
1942 if (rep != NULL) {
1943 current = rep;
1944 while (*current != 0) { /* non input consuming loop */
1945 buffer[nbchars++] = *current++;
1946 if (nbchars >
1947 buffer_size - XML_PARSER_BUFFER_SIZE) {
1948 growBuffer(buffer);
1949 }
1950 }
1951 xmlFree(rep);
1952 }
1953 }
1954 } else {
1955 COPY_BUF(l,buffer,nbchars,c);
1956 str += l;
1957 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1958 growBuffer(buffer);
1959 }
1960 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001961 if (str < last)
1962 c = CUR_SCHAR(str, l);
1963 else
1964 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001965 }
1966 buffer[nbchars++] = 0;
1967 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001968
1969mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001970 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001971 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001972}
1973
Daniel Veillarde57ec792003-09-10 10:50:59 +00001974/**
1975 * xmlStringDecodeEntities:
1976 * @ctxt: the parser context
1977 * @str: the input string
1978 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1979 * @end: an end marker xmlChar, 0 if none
1980 * @end2: an end marker xmlChar, 0 if none
1981 * @end3: an end marker xmlChar, 0 if none
1982 *
1983 * Takes a entity string content and process to do the adequate substitutions.
1984 *
1985 * [67] Reference ::= EntityRef | CharRef
1986 *
1987 * [69] PEReference ::= '%' Name ';'
1988 *
1989 * Returns A newly allocated string with the substitution done. The caller
1990 * must deallocate it !
1991 */
1992xmlChar *
1993xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1994 xmlChar end, xmlChar end2, xmlChar end3) {
1995 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
1996 end, end2, end3));
1997}
Owen Taylor3473f882001-02-23 17:55:21 +00001998
1999/************************************************************************
2000 * *
2001 * Commodity functions to handle xmlChars *
2002 * *
2003 ************************************************************************/
2004
2005/**
2006 * xmlStrndup:
2007 * @cur: the input xmlChar *
2008 * @len: the len of @cur
2009 *
2010 * a strndup for array of xmlChar's
2011 *
2012 * Returns a new xmlChar * or NULL
2013 */
2014xmlChar *
2015xmlStrndup(const xmlChar *cur, int len) {
2016 xmlChar *ret;
2017
2018 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002019 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002020 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002021 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002022 return(NULL);
2023 }
2024 memcpy(ret, cur, len * sizeof(xmlChar));
2025 ret[len] = 0;
2026 return(ret);
2027}
2028
2029/**
2030 * xmlStrdup:
2031 * @cur: the input xmlChar *
2032 *
2033 * a strdup for array of xmlChar's. Since they are supposed to be
2034 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2035 * a termination mark of '0'.
2036 *
2037 * Returns a new xmlChar * or NULL
2038 */
2039xmlChar *
2040xmlStrdup(const xmlChar *cur) {
2041 const xmlChar *p = cur;
2042
2043 if (cur == NULL) return(NULL);
2044 while (*p != 0) p++; /* non input consuming */
2045 return(xmlStrndup(cur, p - cur));
2046}
2047
2048/**
2049 * xmlCharStrndup:
2050 * @cur: the input char *
2051 * @len: the len of @cur
2052 *
2053 * a strndup for char's to xmlChar's
2054 *
2055 * Returns a new xmlChar * or NULL
2056 */
2057
2058xmlChar *
2059xmlCharStrndup(const char *cur, int len) {
2060 int i;
2061 xmlChar *ret;
2062
2063 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002064 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002065 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002066 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002067 return(NULL);
2068 }
2069 for (i = 0;i < len;i++)
2070 ret[i] = (xmlChar) cur[i];
2071 ret[len] = 0;
2072 return(ret);
2073}
2074
2075/**
2076 * xmlCharStrdup:
2077 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00002078 *
2079 * a strdup for char's to xmlChar's
2080 *
2081 * Returns a new xmlChar * or NULL
2082 */
2083
2084xmlChar *
2085xmlCharStrdup(const char *cur) {
2086 const char *p = cur;
2087
2088 if (cur == NULL) return(NULL);
2089 while (*p != '\0') p++; /* non input consuming */
2090 return(xmlCharStrndup(cur, p - cur));
2091}
2092
2093/**
2094 * xmlStrcmp:
2095 * @str1: the first xmlChar *
2096 * @str2: the second xmlChar *
2097 *
2098 * a strcmp for xmlChar's
2099 *
2100 * Returns the integer result of the comparison
2101 */
2102
2103int
2104xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
2105 register int tmp;
2106
2107 if (str1 == str2) return(0);
2108 if (str1 == NULL) return(-1);
2109 if (str2 == NULL) return(1);
2110 do {
2111 tmp = *str1++ - *str2;
2112 if (tmp != 0) return(tmp);
2113 } while (*str2++ != 0);
2114 return 0;
2115}
2116
2117/**
2118 * xmlStrEqual:
2119 * @str1: the first xmlChar *
2120 * @str2: the second xmlChar *
2121 *
2122 * Check if both string are equal of have same content
2123 * Should be a bit more readable and faster than xmlStrEqual()
2124 *
2125 * Returns 1 if they are equal, 0 if they are different
2126 */
2127
2128int
2129xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
2130 if (str1 == str2) return(1);
2131 if (str1 == NULL) return(0);
2132 if (str2 == NULL) return(0);
2133 do {
2134 if (*str1++ != *str2) return(0);
2135 } while (*str2++);
2136 return(1);
2137}
2138
2139/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002140 * xmlStrQEqual:
2141 * @pref: the prefix of the QName
2142 * @name: the localname of the QName
2143 * @str: the second xmlChar *
2144 *
2145 * Check if a QName is Equal to a given string
2146 *
2147 * Returns 1 if they are equal, 0 if they are different
2148 */
2149
2150int
2151xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2152 if (pref == NULL) return(xmlStrEqual(name, str));
2153 if (name == NULL) return(0);
2154 if (str == NULL) return(0);
2155
2156 do {
2157 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002158 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002159 if (*str++ != ':') return(0);
2160 do {
2161 if (*name++ != *str) return(0);
2162 } while (*str++);
2163 return(1);
2164}
2165
2166/**
Owen Taylor3473f882001-02-23 17:55:21 +00002167 * xmlStrncmp:
2168 * @str1: the first xmlChar *
2169 * @str2: the second xmlChar *
2170 * @len: the max comparison length
2171 *
2172 * a strncmp for xmlChar's
2173 *
2174 * Returns the integer result of the comparison
2175 */
2176
2177int
2178xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2179 register int tmp;
2180
2181 if (len <= 0) return(0);
2182 if (str1 == str2) return(0);
2183 if (str1 == NULL) return(-1);
2184 if (str2 == NULL) return(1);
2185 do {
2186 tmp = *str1++ - *str2;
2187 if (tmp != 0 || --len == 0) return(tmp);
2188 } while (*str2++ != 0);
2189 return 0;
2190}
2191
Daniel Veillardb44025c2001-10-11 22:55:55 +00002192static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002193 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2194 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2195 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2196 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2197 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2198 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2199 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2200 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2201 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2202 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2203 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2204 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2205 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2206 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2207 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2208 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2209 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2210 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2211 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2212 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2213 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2214 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2215 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2216 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2217 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2218 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2219 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2220 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2221 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2222 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2223 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2224 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2225};
2226
2227/**
2228 * xmlStrcasecmp:
2229 * @str1: the first xmlChar *
2230 * @str2: the second xmlChar *
2231 *
2232 * a strcasecmp for xmlChar's
2233 *
2234 * Returns the integer result of the comparison
2235 */
2236
2237int
2238xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2239 register int tmp;
2240
2241 if (str1 == str2) return(0);
2242 if (str1 == NULL) return(-1);
2243 if (str2 == NULL) return(1);
2244 do {
2245 tmp = casemap[*str1++] - casemap[*str2];
2246 if (tmp != 0) return(tmp);
2247 } while (*str2++ != 0);
2248 return 0;
2249}
2250
2251/**
2252 * xmlStrncasecmp:
2253 * @str1: the first xmlChar *
2254 * @str2: the second xmlChar *
2255 * @len: the max comparison length
2256 *
2257 * a strncasecmp for xmlChar's
2258 *
2259 * Returns the integer result of the comparison
2260 */
2261
2262int
2263xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2264 register int tmp;
2265
2266 if (len <= 0) return(0);
2267 if (str1 == str2) return(0);
2268 if (str1 == NULL) return(-1);
2269 if (str2 == NULL) return(1);
2270 do {
2271 tmp = casemap[*str1++] - casemap[*str2];
2272 if (tmp != 0 || --len == 0) return(tmp);
2273 } while (*str2++ != 0);
2274 return 0;
2275}
2276
2277/**
2278 * xmlStrchr:
2279 * @str: the xmlChar * array
2280 * @val: the xmlChar to search
2281 *
2282 * a strchr for xmlChar's
2283 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002284 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002285 */
2286
2287const xmlChar *
2288xmlStrchr(const xmlChar *str, xmlChar val) {
2289 if (str == NULL) return(NULL);
2290 while (*str != 0) { /* non input consuming */
2291 if (*str == val) return((xmlChar *) str);
2292 str++;
2293 }
2294 return(NULL);
2295}
2296
2297/**
2298 * xmlStrstr:
2299 * @str: the xmlChar * array (haystack)
2300 * @val: the xmlChar to search (needle)
2301 *
2302 * a strstr for xmlChar's
2303 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002304 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002305 */
2306
2307const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002308xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002309 int n;
2310
2311 if (str == NULL) return(NULL);
2312 if (val == NULL) return(NULL);
2313 n = xmlStrlen(val);
2314
2315 if (n == 0) return(str);
2316 while (*str != 0) { /* non input consuming */
2317 if (*str == *val) {
2318 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2319 }
2320 str++;
2321 }
2322 return(NULL);
2323}
2324
2325/**
2326 * xmlStrcasestr:
2327 * @str: the xmlChar * array (haystack)
2328 * @val: the xmlChar to search (needle)
2329 *
2330 * a case-ignoring strstr for xmlChar's
2331 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002332 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002333 */
2334
2335const xmlChar *
2336xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2337 int n;
2338
2339 if (str == NULL) return(NULL);
2340 if (val == NULL) return(NULL);
2341 n = xmlStrlen(val);
2342
2343 if (n == 0) return(str);
2344 while (*str != 0) { /* non input consuming */
2345 if (casemap[*str] == casemap[*val])
2346 if (!xmlStrncasecmp(str, val, n)) return(str);
2347 str++;
2348 }
2349 return(NULL);
2350}
2351
2352/**
2353 * xmlStrsub:
2354 * @str: the xmlChar * array (haystack)
2355 * @start: the index of the first char (zero based)
2356 * @len: the length of the substring
2357 *
2358 * Extract a substring of a given string
2359 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002360 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002361 */
2362
2363xmlChar *
2364xmlStrsub(const xmlChar *str, int start, int len) {
2365 int i;
2366
2367 if (str == NULL) return(NULL);
2368 if (start < 0) return(NULL);
2369 if (len < 0) return(NULL);
2370
2371 for (i = 0;i < start;i++) {
2372 if (*str == 0) return(NULL);
2373 str++;
2374 }
2375 if (*str == 0) return(NULL);
2376 return(xmlStrndup(str, len));
2377}
2378
2379/**
2380 * xmlStrlen:
2381 * @str: the xmlChar * array
2382 *
2383 * length of a xmlChar's string
2384 *
2385 * Returns the number of xmlChar contained in the ARRAY.
2386 */
2387
2388int
2389xmlStrlen(const xmlChar *str) {
2390 int len = 0;
2391
2392 if (str == NULL) return(0);
2393 while (*str != 0) { /* non input consuming */
2394 str++;
2395 len++;
2396 }
2397 return(len);
2398}
2399
2400/**
2401 * xmlStrncat:
2402 * @cur: the original xmlChar * array
2403 * @add: the xmlChar * array added
2404 * @len: the length of @add
2405 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002406 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002407 * first bytes of @add.
2408 *
2409 * Returns a new xmlChar *, the original @cur is reallocated if needed
2410 * and should not be freed
2411 */
2412
2413xmlChar *
2414xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2415 int size;
2416 xmlChar *ret;
2417
2418 if ((add == NULL) || (len == 0))
2419 return(cur);
2420 if (cur == NULL)
2421 return(xmlStrndup(add, len));
2422
2423 size = xmlStrlen(cur);
2424 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2425 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002426 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002427 return(cur);
2428 }
2429 memcpy(&ret[size], add, len * sizeof(xmlChar));
2430 ret[size + len] = 0;
2431 return(ret);
2432}
2433
2434/**
2435 * xmlStrcat:
2436 * @cur: the original xmlChar * array
2437 * @add: the xmlChar * array added
2438 *
2439 * a strcat for array of xmlChar's. Since they are supposed to be
2440 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2441 * a termination mark of '0'.
2442 *
2443 * Returns a new xmlChar * containing the concatenated string.
2444 */
2445xmlChar *
2446xmlStrcat(xmlChar *cur, const xmlChar *add) {
2447 const xmlChar *p = add;
2448
2449 if (add == NULL) return(cur);
2450 if (cur == NULL)
2451 return(xmlStrdup(add));
2452
2453 while (*p != 0) p++; /* non input consuming */
2454 return(xmlStrncat(cur, add, p - add));
2455}
2456
Aleksey Sanine7acf432003-10-02 20:05:27 +00002457/**
2458 * xmlStrPrintf:
2459 * @buf: the result buffer.
2460 * @len: the result buffer length.
2461 * @msg: the message with printf formatting.
2462 * @...: extra parameters for the message.
2463 *
2464 * Formats @msg and places result into @buf.
2465 *
2466 * Returns the number of characters written to @buf or -1 if an error occurs.
2467 */
2468int
2469xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
2470 va_list args;
2471 int ret;
2472
2473 if((buf == NULL) || (msg == NULL)) {
2474 return(-1);
2475 }
2476
2477 va_start(args, msg);
Daniel Veillardbb5abab2003-10-03 22:21:51 +00002478 ret = vsnprintf((char *) buf, len, (const char *) msg, args);
Aleksey Sanine7acf432003-10-02 20:05:27 +00002479 va_end(args);
Daniel Veillardd96f6d32003-10-07 21:25:12 +00002480 buf[len - 1] = 0; /* be safe ! */
Aleksey Sanine7acf432003-10-02 20:05:27 +00002481
2482 return(ret);
2483}
2484
Owen Taylor3473f882001-02-23 17:55:21 +00002485/************************************************************************
2486 * *
2487 * Commodity functions, cleanup needed ? *
2488 * *
2489 ************************************************************************/
2490
2491/**
2492 * areBlanks:
2493 * @ctxt: an XML parser context
2494 * @str: a xmlChar *
2495 * @len: the size of @str
2496 *
2497 * Is this a sequence of blank chars that one can ignore ?
2498 *
2499 * Returns 1 if ignorable 0 otherwise.
2500 */
2501
2502static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2503 int i, ret;
2504 xmlNodePtr lastChild;
2505
Daniel Veillard05c13a22001-09-09 08:38:09 +00002506 /*
2507 * Don't spend time trying to differentiate them, the same callback is
2508 * used !
2509 */
2510 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002511 return(0);
2512
Owen Taylor3473f882001-02-23 17:55:21 +00002513 /*
2514 * Check for xml:space value.
2515 */
2516 if (*(ctxt->space) == 1)
2517 return(0);
2518
2519 /*
2520 * Check that the string is made of blanks
2521 */
2522 for (i = 0;i < len;i++)
William M. Brack76e95df2003-10-18 16:20:14 +00002523 if (!(IS_BLANK_CH(str[i]))) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002524
2525 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002526 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002527 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002528 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 if (ctxt->myDoc != NULL) {
2530 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2531 if (ret == 0) return(1);
2532 if (ret == 1) return(0);
2533 }
2534
2535 /*
2536 * Otherwise, heuristic :-\
2537 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002538 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002539 if ((ctxt->node->children == NULL) &&
2540 (RAW == '<') && (NXT(1) == '/')) return(0);
2541
2542 lastChild = xmlGetLastChild(ctxt->node);
2543 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002544 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2545 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002546 } else if (xmlNodeIsText(lastChild))
2547 return(0);
2548 else if ((ctxt->node->children != NULL) &&
2549 (xmlNodeIsText(ctxt->node->children)))
2550 return(0);
2551 return(1);
2552}
2553
Owen Taylor3473f882001-02-23 17:55:21 +00002554/************************************************************************
2555 * *
2556 * Extra stuff for namespace support *
2557 * Relates to http://www.w3.org/TR/WD-xml-names *
2558 * *
2559 ************************************************************************/
2560
2561/**
2562 * xmlSplitQName:
2563 * @ctxt: an XML parser context
2564 * @name: an XML parser context
2565 * @prefix: a xmlChar **
2566 *
2567 * parse an UTF8 encoded XML qualified name string
2568 *
2569 * [NS 5] QName ::= (Prefix ':')? LocalPart
2570 *
2571 * [NS 6] Prefix ::= NCName
2572 *
2573 * [NS 7] LocalPart ::= NCName
2574 *
2575 * Returns the local part, and prefix is updated
2576 * to get the Prefix if any.
2577 */
2578
2579xmlChar *
2580xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2581 xmlChar buf[XML_MAX_NAMELEN + 5];
2582 xmlChar *buffer = NULL;
2583 int len = 0;
2584 int max = XML_MAX_NAMELEN;
2585 xmlChar *ret = NULL;
2586 const xmlChar *cur = name;
2587 int c;
2588
2589 *prefix = NULL;
2590
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002591 if (cur == NULL) return(NULL);
2592
Owen Taylor3473f882001-02-23 17:55:21 +00002593#ifndef XML_XML_NAMESPACE
2594 /* xml: prefix is not really a namespace */
2595 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2596 (cur[2] == 'l') && (cur[3] == ':'))
2597 return(xmlStrdup(name));
2598#endif
2599
Daniel Veillard597bc482003-07-24 16:08:28 +00002600 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002601 if (cur[0] == ':')
2602 return(xmlStrdup(name));
2603
2604 c = *cur++;
2605 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2606 buf[len++] = c;
2607 c = *cur++;
2608 }
2609 if (len >= max) {
2610 /*
2611 * Okay someone managed to make a huge name, so he's ready to pay
2612 * for the processing speed.
2613 */
2614 max = len * 2;
2615
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002616 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002617 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002618 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002619 return(NULL);
2620 }
2621 memcpy(buffer, buf, len);
2622 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2623 if (len + 10 > max) {
2624 max *= 2;
2625 buffer = (xmlChar *) xmlRealloc(buffer,
2626 max * sizeof(xmlChar));
2627 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002628 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002629 return(NULL);
2630 }
2631 }
2632 buffer[len++] = c;
2633 c = *cur++;
2634 }
2635 buffer[len] = 0;
2636 }
2637
Daniel Veillard597bc482003-07-24 16:08:28 +00002638 /* nasty but well=formed
2639 if ((c == ':') && (*cur == 0)) {
2640 return(xmlStrdup(name));
2641 } */
2642
Owen Taylor3473f882001-02-23 17:55:21 +00002643 if (buffer == NULL)
2644 ret = xmlStrndup(buf, len);
2645 else {
2646 ret = buffer;
2647 buffer = NULL;
2648 max = XML_MAX_NAMELEN;
2649 }
2650
2651
2652 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002653 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002654 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002655 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002656 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002657 }
Owen Taylor3473f882001-02-23 17:55:21 +00002658 len = 0;
2659
Daniel Veillardbb284f42002-10-16 18:02:47 +00002660 /*
2661 * Check that the first character is proper to start
2662 * a new name
2663 */
2664 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2665 ((c >= 0x41) && (c <= 0x5A)) ||
2666 (c == '_') || (c == ':'))) {
2667 int l;
2668 int first = CUR_SCHAR(cur, l);
2669
2670 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002671 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002672 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002673 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002674 }
2675 }
2676 cur++;
2677
Owen Taylor3473f882001-02-23 17:55:21 +00002678 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2679 buf[len++] = c;
2680 c = *cur++;
2681 }
2682 if (len >= max) {
2683 /*
2684 * Okay someone managed to make a huge name, so he's ready to pay
2685 * for the processing speed.
2686 */
2687 max = len * 2;
2688
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002689 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002690 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002691 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002692 return(NULL);
2693 }
2694 memcpy(buffer, buf, len);
2695 while (c != 0) { /* tested bigname2.xml */
2696 if (len + 10 > max) {
2697 max *= 2;
2698 buffer = (xmlChar *) xmlRealloc(buffer,
2699 max * sizeof(xmlChar));
2700 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002701 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002702 return(NULL);
2703 }
2704 }
2705 buffer[len++] = c;
2706 c = *cur++;
2707 }
2708 buffer[len] = 0;
2709 }
2710
2711 if (buffer == NULL)
2712 ret = xmlStrndup(buf, len);
2713 else {
2714 ret = buffer;
2715 }
2716 }
2717
2718 return(ret);
2719}
2720
2721/************************************************************************
2722 * *
2723 * The parser itself *
2724 * Relates to http://www.w3.org/TR/REC-xml *
2725 * *
2726 ************************************************************************/
2727
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002728static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002729static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002730 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002731
Owen Taylor3473f882001-02-23 17:55:21 +00002732/**
2733 * xmlParseName:
2734 * @ctxt: an XML parser context
2735 *
2736 * parse an XML name.
2737 *
2738 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2739 * CombiningChar | Extender
2740 *
2741 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2742 *
2743 * [6] Names ::= Name (S Name)*
2744 *
2745 * Returns the Name parsed or NULL
2746 */
2747
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002748const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002749xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002750 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002751 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002752 int count = 0;
2753
2754 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002755
2756 /*
2757 * Accelerator for simple ASCII names
2758 */
2759 in = ctxt->input->cur;
2760 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2761 ((*in >= 0x41) && (*in <= 0x5A)) ||
2762 (*in == '_') || (*in == ':')) {
2763 in++;
2764 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2765 ((*in >= 0x41) && (*in <= 0x5A)) ||
2766 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002767 (*in == '_') || (*in == '-') ||
2768 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002769 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002770 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002771 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002772 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002773 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002774 ctxt->nbChars += count;
2775 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002776 if (ret == NULL)
2777 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002778 return(ret);
2779 }
2780 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002781 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002782}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002783
Daniel Veillard46de64e2002-05-29 08:21:33 +00002784/**
2785 * xmlParseNameAndCompare:
2786 * @ctxt: an XML parser context
2787 *
2788 * parse an XML name and compares for match
2789 * (specialized for endtag parsing)
2790 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002791 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2792 * and the name for mismatch
2793 */
2794
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002795static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002796xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2797 const xmlChar *cmp = other;
2798 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002799 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002800
2801 GROW;
2802
2803 in = ctxt->input->cur;
2804 while (*in != 0 && *in == *cmp) {
2805 ++in;
2806 ++cmp;
2807 }
William M. Brack76e95df2003-10-18 16:20:14 +00002808 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002809 /* success */
2810 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002811 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002812 }
2813 /* failure (or end of input buffer), check with full function */
2814 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002815 /* strings coming from the dictionnary direct compare possible */
2816 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002817 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002818 }
2819 return ret;
2820}
2821
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002822static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002823xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002824 int len = 0, l;
2825 int c;
2826 int count = 0;
2827
2828 /*
2829 * Handler for more complex cases
2830 */
2831 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002832 c = CUR_CHAR(l);
2833 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2834 (!IS_LETTER(c) && (c != '_') &&
2835 (c != ':'))) {
2836 return(NULL);
2837 }
2838
2839 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002840 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002841 (c == '.') || (c == '-') ||
2842 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002843 (IS_COMBINING(c)) ||
2844 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002845 if (count++ > 100) {
2846 count = 0;
2847 GROW;
2848 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002849 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002850 NEXTL(l);
2851 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002852 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002853 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002854}
2855
2856/**
2857 * xmlParseStringName:
2858 * @ctxt: an XML parser context
2859 * @str: a pointer to the string pointer (IN/OUT)
2860 *
2861 * parse an XML name.
2862 *
2863 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2864 * CombiningChar | Extender
2865 *
2866 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2867 *
2868 * [6] Names ::= Name (S Name)*
2869 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002870 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002871 * is updated to the current location in the string.
2872 */
2873
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002874static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002875xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2876 xmlChar buf[XML_MAX_NAMELEN + 5];
2877 const xmlChar *cur = *str;
2878 int len = 0, l;
2879 int c;
2880
2881 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002882 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002883 (c != ':')) {
2884 return(NULL);
2885 }
2886
William M. Brack871611b2003-10-18 04:53:14 +00002887 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002888 (c == '.') || (c == '-') ||
2889 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002890 (IS_COMBINING(c)) ||
2891 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002892 COPY_BUF(l,buf,len,c);
2893 cur += l;
2894 c = CUR_SCHAR(cur, l);
2895 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2896 /*
2897 * Okay someone managed to make a huge name, so he's ready to pay
2898 * for the processing speed.
2899 */
2900 xmlChar *buffer;
2901 int max = len * 2;
2902
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002903 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002904 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002905 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002906 return(NULL);
2907 }
2908 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002909 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002910 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002911 (c == '.') || (c == '-') ||
2912 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002913 (IS_COMBINING(c)) ||
2914 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002915 if (len + 10 > max) {
2916 max *= 2;
2917 buffer = (xmlChar *) xmlRealloc(buffer,
2918 max * sizeof(xmlChar));
2919 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002920 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002921 return(NULL);
2922 }
2923 }
2924 COPY_BUF(l,buffer,len,c);
2925 cur += l;
2926 c = CUR_SCHAR(cur, l);
2927 }
2928 buffer[len] = 0;
2929 *str = cur;
2930 return(buffer);
2931 }
2932 }
2933 *str = cur;
2934 return(xmlStrndup(buf, len));
2935}
2936
2937/**
2938 * xmlParseNmtoken:
2939 * @ctxt: an XML parser context
2940 *
2941 * parse an XML Nmtoken.
2942 *
2943 * [7] Nmtoken ::= (NameChar)+
2944 *
2945 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2946 *
2947 * Returns the Nmtoken parsed or NULL
2948 */
2949
2950xmlChar *
2951xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2952 xmlChar buf[XML_MAX_NAMELEN + 5];
2953 int len = 0, l;
2954 int c;
2955 int count = 0;
2956
2957 GROW;
2958 c = CUR_CHAR(l);
2959
William M. Brack871611b2003-10-18 04:53:14 +00002960 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002961 (c == '.') || (c == '-') ||
2962 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002963 (IS_COMBINING(c)) ||
2964 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002965 if (count++ > 100) {
2966 count = 0;
2967 GROW;
2968 }
2969 COPY_BUF(l,buf,len,c);
2970 NEXTL(l);
2971 c = CUR_CHAR(l);
2972 if (len >= XML_MAX_NAMELEN) {
2973 /*
2974 * Okay someone managed to make a huge token, so he's ready to pay
2975 * for the processing speed.
2976 */
2977 xmlChar *buffer;
2978 int max = len * 2;
2979
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002980 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002981 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002982 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002983 return(NULL);
2984 }
2985 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002986 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002987 (c == '.') || (c == '-') ||
2988 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002989 (IS_COMBINING(c)) ||
2990 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002991 if (count++ > 100) {
2992 count = 0;
2993 GROW;
2994 }
2995 if (len + 10 > max) {
2996 max *= 2;
2997 buffer = (xmlChar *) xmlRealloc(buffer,
2998 max * sizeof(xmlChar));
2999 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003000 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003001 return(NULL);
3002 }
3003 }
3004 COPY_BUF(l,buffer,len,c);
3005 NEXTL(l);
3006 c = CUR_CHAR(l);
3007 }
3008 buffer[len] = 0;
3009 return(buffer);
3010 }
3011 }
3012 if (len == 0)
3013 return(NULL);
3014 return(xmlStrndup(buf, len));
3015}
3016
3017/**
3018 * xmlParseEntityValue:
3019 * @ctxt: an XML parser context
3020 * @orig: if non-NULL store a copy of the original entity value
3021 *
3022 * parse a value for ENTITY declarations
3023 *
3024 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3025 * "'" ([^%&'] | PEReference | Reference)* "'"
3026 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003027 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003028 */
3029
3030xmlChar *
3031xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3032 xmlChar *buf = NULL;
3033 int len = 0;
3034 int size = XML_PARSER_BUFFER_SIZE;
3035 int c, l;
3036 xmlChar stop;
3037 xmlChar *ret = NULL;
3038 const xmlChar *cur = NULL;
3039 xmlParserInputPtr input;
3040
3041 if (RAW == '"') stop = '"';
3042 else if (RAW == '\'') stop = '\'';
3043 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003044 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003045 return(NULL);
3046 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003047 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003048 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003049 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003050 return(NULL);
3051 }
3052
3053 /*
3054 * The content of the entity definition is copied in a buffer.
3055 */
3056
3057 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3058 input = ctxt->input;
3059 GROW;
3060 NEXT;
3061 c = CUR_CHAR(l);
3062 /*
3063 * NOTE: 4.4.5 Included in Literal
3064 * When a parameter entity reference appears in a literal entity
3065 * value, ... a single or double quote character in the replacement
3066 * text is always treated as a normal data character and will not
3067 * terminate the literal.
3068 * In practice it means we stop the loop only when back at parsing
3069 * the initial entity and the quote is found
3070 */
William M. Brack871611b2003-10-18 04:53:14 +00003071 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003072 (ctxt->input != input))) {
3073 if (len + 5 >= size) {
3074 size *= 2;
3075 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3076 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003077 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003078 return(NULL);
3079 }
3080 }
3081 COPY_BUF(l,buf,len,c);
3082 NEXTL(l);
3083 /*
3084 * Pop-up of finished entities.
3085 */
3086 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3087 xmlPopInput(ctxt);
3088
3089 GROW;
3090 c = CUR_CHAR(l);
3091 if (c == 0) {
3092 GROW;
3093 c = CUR_CHAR(l);
3094 }
3095 }
3096 buf[len] = 0;
3097
3098 /*
3099 * Raise problem w.r.t. '&' and '%' being used in non-entities
3100 * reference constructs. Note Charref will be handled in
3101 * xmlStringDecodeEntities()
3102 */
3103 cur = buf;
3104 while (*cur != 0) { /* non input consuming */
3105 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3106 xmlChar *name;
3107 xmlChar tmp = *cur;
3108
3109 cur++;
3110 name = xmlParseStringName(ctxt, &cur);
3111 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003112 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003113 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003114 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003115 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003116 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3117 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003118 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003119 }
3120 if (name != NULL)
3121 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003122 if (*cur == 0)
3123 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003124 }
3125 cur++;
3126 }
3127
3128 /*
3129 * Then PEReference entities are substituted.
3130 */
3131 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003132 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003133 xmlFree(buf);
3134 } else {
3135 NEXT;
3136 /*
3137 * NOTE: 4.4.7 Bypassed
3138 * When a general entity reference appears in the EntityValue in
3139 * an entity declaration, it is bypassed and left as is.
3140 * so XML_SUBSTITUTE_REF is not set here.
3141 */
3142 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3143 0, 0, 0);
3144 if (orig != NULL)
3145 *orig = buf;
3146 else
3147 xmlFree(buf);
3148 }
3149
3150 return(ret);
3151}
3152
3153/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003154 * xmlParseAttValueComplex:
3155 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003156 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003157 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003158 *
3159 * parse a value for an attribute, this is the fallback function
3160 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003161 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003162 *
3163 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3164 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003165static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003166xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003167 xmlChar limit = 0;
3168 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003169 int len = 0;
3170 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003171 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003172 xmlChar *current = NULL;
3173 xmlEntityPtr ent;
3174
Owen Taylor3473f882001-02-23 17:55:21 +00003175 if (NXT(0) == '"') {
3176 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3177 limit = '"';
3178 NEXT;
3179 } else if (NXT(0) == '\'') {
3180 limit = '\'';
3181 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3182 NEXT;
3183 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003184 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003185 return(NULL);
3186 }
3187
3188 /*
3189 * allocate a translation buffer.
3190 */
3191 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003192 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003193 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003194
3195 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003196 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003197 */
3198 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003199 while ((NXT(0) != limit) && /* checked */
3200 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003201 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003202 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003203 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003204 if (NXT(1) == '#') {
3205 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003206
Owen Taylor3473f882001-02-23 17:55:21 +00003207 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003208 if (ctxt->replaceEntities) {
3209 if (len > buf_size - 10) {
3210 growBuffer(buf);
3211 }
3212 buf[len++] = '&';
3213 } else {
3214 /*
3215 * The reparsing will be done in xmlStringGetNodeList()
3216 * called by the attribute() function in SAX.c
3217 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003218 if (len > buf_size - 10) {
3219 growBuffer(buf);
3220 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003221 buf[len++] = '&';
3222 buf[len++] = '#';
3223 buf[len++] = '3';
3224 buf[len++] = '8';
3225 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003226 }
3227 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003228 if (len > buf_size - 10) {
3229 growBuffer(buf);
3230 }
Owen Taylor3473f882001-02-23 17:55:21 +00003231 len += xmlCopyChar(0, &buf[len], val);
3232 }
3233 } else {
3234 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003235 if ((ent != NULL) &&
3236 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3237 if (len > buf_size - 10) {
3238 growBuffer(buf);
3239 }
3240 if ((ctxt->replaceEntities == 0) &&
3241 (ent->content[0] == '&')) {
3242 buf[len++] = '&';
3243 buf[len++] = '#';
3244 buf[len++] = '3';
3245 buf[len++] = '8';
3246 buf[len++] = ';';
3247 } else {
3248 buf[len++] = ent->content[0];
3249 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003250 } else if ((ent != NULL) &&
3251 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003252 xmlChar *rep;
3253
3254 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3255 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003256 XML_SUBSTITUTE_REF,
3257 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003258 if (rep != NULL) {
3259 current = rep;
3260 while (*current != 0) { /* non input consuming */
3261 buf[len++] = *current++;
3262 if (len > buf_size - 10) {
3263 growBuffer(buf);
3264 }
3265 }
3266 xmlFree(rep);
3267 }
3268 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003269 if (len > buf_size - 10) {
3270 growBuffer(buf);
3271 }
Owen Taylor3473f882001-02-23 17:55:21 +00003272 if (ent->content != NULL)
3273 buf[len++] = ent->content[0];
3274 }
3275 } else if (ent != NULL) {
3276 int i = xmlStrlen(ent->name);
3277 const xmlChar *cur = ent->name;
3278
3279 /*
3280 * This may look absurd but is needed to detect
3281 * entities problems
3282 */
3283 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3284 (ent->content != NULL)) {
3285 xmlChar *rep;
3286 rep = xmlStringDecodeEntities(ctxt, ent->content,
3287 XML_SUBSTITUTE_REF, 0, 0, 0);
3288 if (rep != NULL)
3289 xmlFree(rep);
3290 }
3291
3292 /*
3293 * Just output the reference
3294 */
3295 buf[len++] = '&';
3296 if (len > buf_size - i - 10) {
3297 growBuffer(buf);
3298 }
3299 for (;i > 0;i--)
3300 buf[len++] = *cur++;
3301 buf[len++] = ';';
3302 }
3303 }
3304 } else {
3305 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003306 if ((len != 0) || (!normalize)) {
3307 if ((!normalize) || (!in_space)) {
3308 COPY_BUF(l,buf,len,0x20);
3309 if (len > buf_size - 10) {
3310 growBuffer(buf);
3311 }
3312 }
3313 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003314 }
3315 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003316 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003317 COPY_BUF(l,buf,len,c);
3318 if (len > buf_size - 10) {
3319 growBuffer(buf);
3320 }
3321 }
3322 NEXTL(l);
3323 }
3324 GROW;
3325 c = CUR_CHAR(l);
3326 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003327 if ((in_space) && (normalize)) {
3328 while (buf[len - 1] == 0x20) len--;
3329 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003330 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003331 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003332 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003333 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003334 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3335 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003336 } else
3337 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003338 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003339 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003340
3341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003342 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003343 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003344}
3345
3346/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003347 * xmlParseAttValue:
3348 * @ctxt: an XML parser context
3349 *
3350 * parse a value for an attribute
3351 * Note: the parser won't do substitution of entities here, this
3352 * will be handled later in xmlStringGetNodeList
3353 *
3354 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3355 * "'" ([^<&'] | Reference)* "'"
3356 *
3357 * 3.3.3 Attribute-Value Normalization:
3358 * Before the value of an attribute is passed to the application or
3359 * checked for validity, the XML processor must normalize it as follows:
3360 * - a character reference is processed by appending the referenced
3361 * character to the attribute value
3362 * - an entity reference is processed by recursively processing the
3363 * replacement text of the entity
3364 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3365 * appending #x20 to the normalized value, except that only a single
3366 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3367 * parsed entity or the literal entity value of an internal parsed entity
3368 * - other characters are processed by appending them to the normalized value
3369 * If the declared value is not CDATA, then the XML processor must further
3370 * process the normalized attribute value by discarding any leading and
3371 * trailing space (#x20) characters, and by replacing sequences of space
3372 * (#x20) characters by a single space (#x20) character.
3373 * All attributes for which no declaration has been read should be treated
3374 * by a non-validating parser as if declared CDATA.
3375 *
3376 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3377 */
3378
3379
3380xmlChar *
3381xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003382 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003383}
3384
3385/**
Owen Taylor3473f882001-02-23 17:55:21 +00003386 * xmlParseSystemLiteral:
3387 * @ctxt: an XML parser context
3388 *
3389 * parse an XML Literal
3390 *
3391 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3392 *
3393 * Returns the SystemLiteral parsed or NULL
3394 */
3395
3396xmlChar *
3397xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3398 xmlChar *buf = NULL;
3399 int len = 0;
3400 int size = XML_PARSER_BUFFER_SIZE;
3401 int cur, l;
3402 xmlChar stop;
3403 int state = ctxt->instate;
3404 int count = 0;
3405
3406 SHRINK;
3407 if (RAW == '"') {
3408 NEXT;
3409 stop = '"';
3410 } else if (RAW == '\'') {
3411 NEXT;
3412 stop = '\'';
3413 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003414 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003415 return(NULL);
3416 }
3417
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003418 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003419 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003420 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003421 return(NULL);
3422 }
3423 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3424 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003425 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003426 if (len + 5 >= size) {
3427 size *= 2;
3428 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3429 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003430 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003431 ctxt->instate = (xmlParserInputState) state;
3432 return(NULL);
3433 }
3434 }
3435 count++;
3436 if (count > 50) {
3437 GROW;
3438 count = 0;
3439 }
3440 COPY_BUF(l,buf,len,cur);
3441 NEXTL(l);
3442 cur = CUR_CHAR(l);
3443 if (cur == 0) {
3444 GROW;
3445 SHRINK;
3446 cur = CUR_CHAR(l);
3447 }
3448 }
3449 buf[len] = 0;
3450 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003451 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003452 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003453 } else {
3454 NEXT;
3455 }
3456 return(buf);
3457}
3458
3459/**
3460 * xmlParsePubidLiteral:
3461 * @ctxt: an XML parser context
3462 *
3463 * parse an XML public literal
3464 *
3465 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3466 *
3467 * Returns the PubidLiteral parsed or NULL.
3468 */
3469
3470xmlChar *
3471xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3472 xmlChar *buf = NULL;
3473 int len = 0;
3474 int size = XML_PARSER_BUFFER_SIZE;
3475 xmlChar cur;
3476 xmlChar stop;
3477 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003478 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003479
3480 SHRINK;
3481 if (RAW == '"') {
3482 NEXT;
3483 stop = '"';
3484 } else if (RAW == '\'') {
3485 NEXT;
3486 stop = '\'';
3487 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003488 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003489 return(NULL);
3490 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003491 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003492 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003493 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003494 return(NULL);
3495 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003496 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003497 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003498 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003499 if (len + 1 >= size) {
3500 size *= 2;
3501 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3502 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003503 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003504 return(NULL);
3505 }
3506 }
3507 buf[len++] = cur;
3508 count++;
3509 if (count > 50) {
3510 GROW;
3511 count = 0;
3512 }
3513 NEXT;
3514 cur = CUR;
3515 if (cur == 0) {
3516 GROW;
3517 SHRINK;
3518 cur = CUR;
3519 }
3520 }
3521 buf[len] = 0;
3522 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003523 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003524 } else {
3525 NEXT;
3526 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003527 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003528 return(buf);
3529}
3530
Daniel Veillard48b2f892001-02-25 16:11:03 +00003531void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003532/**
3533 * xmlParseCharData:
3534 * @ctxt: an XML parser context
3535 * @cdata: int indicating whether we are within a CDATA section
3536 *
3537 * parse a CharData section.
3538 * if we are within a CDATA section ']]>' marks an end of section.
3539 *
3540 * The right angle bracket (>) may be represented using the string "&gt;",
3541 * and must, for compatibility, be escaped using "&gt;" or a character
3542 * reference when it appears in the string "]]>" in content, when that
3543 * string is not marking the end of a CDATA section.
3544 *
3545 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3546 */
3547
3548void
3549xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003550 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003551 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003552 int line = ctxt->input->line;
3553 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003554
3555 SHRINK;
3556 GROW;
3557 /*
3558 * Accelerated common case where input don't need to be
3559 * modified before passing it to the handler.
3560 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003561 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003562 in = ctxt->input->cur;
3563 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003564get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003565 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3566 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003567 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003568 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003569 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003570 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003571 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003572 ctxt->input->line++;
3573 in++;
3574 }
3575 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003576 }
3577 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003578 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003579 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003580 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003581 return;
3582 }
3583 in++;
3584 goto get_more;
3585 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003586 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003587 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003588 if ((ctxt->sax->ignorableWhitespace !=
3589 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003590 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003591 const xmlChar *tmp = ctxt->input->cur;
3592 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003593
Daniel Veillarda7374592001-05-10 14:17:55 +00003594 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003595 ctxt->sax->ignorableWhitespace(ctxt->userData,
3596 tmp, nbchar);
3597 } else if (ctxt->sax->characters != NULL)
3598 ctxt->sax->characters(ctxt->userData,
3599 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003600 line = ctxt->input->line;
3601 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003602 } else {
3603 if (ctxt->sax->characters != NULL)
3604 ctxt->sax->characters(ctxt->userData,
3605 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003606 line = ctxt->input->line;
3607 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003608 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003609 }
3610 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003611 if (*in == 0xD) {
3612 in++;
3613 if (*in == 0xA) {
3614 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003615 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003616 ctxt->input->line++;
3617 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003618 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003619 in--;
3620 }
3621 if (*in == '<') {
3622 return;
3623 }
3624 if (*in == '&') {
3625 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003626 }
3627 SHRINK;
3628 GROW;
3629 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003630 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003631 nbchar = 0;
3632 }
Daniel Veillard50582112001-03-26 22:52:16 +00003633 ctxt->input->line = line;
3634 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003635 xmlParseCharDataComplex(ctxt, cdata);
3636}
3637
Daniel Veillard01c13b52002-12-10 15:19:08 +00003638/**
3639 * xmlParseCharDataComplex:
3640 * @ctxt: an XML parser context
3641 * @cdata: int indicating whether we are within a CDATA section
3642 *
3643 * parse a CharData section.this is the fallback function
3644 * of xmlParseCharData() when the parsing requires handling
3645 * of non-ASCII characters.
3646 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003647void
3648xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003649 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3650 int nbchar = 0;
3651 int cur, l;
3652 int count = 0;
3653
3654 SHRINK;
3655 GROW;
3656 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003657 while ((cur != '<') && /* checked */
3658 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003659 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003660 if ((cur == ']') && (NXT(1) == ']') &&
3661 (NXT(2) == '>')) {
3662 if (cdata) break;
3663 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003664 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003665 }
3666 }
3667 COPY_BUF(l,buf,nbchar,cur);
3668 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003669 buf[nbchar] = 0;
3670
Owen Taylor3473f882001-02-23 17:55:21 +00003671 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003672 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003673 */
3674 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3675 if (areBlanks(ctxt, buf, nbchar)) {
3676 if (ctxt->sax->ignorableWhitespace != NULL)
3677 ctxt->sax->ignorableWhitespace(ctxt->userData,
3678 buf, nbchar);
3679 } else {
3680 if (ctxt->sax->characters != NULL)
3681 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3682 }
3683 }
3684 nbchar = 0;
3685 }
3686 count++;
3687 if (count > 50) {
3688 GROW;
3689 count = 0;
3690 }
3691 NEXTL(l);
3692 cur = CUR_CHAR(l);
3693 }
3694 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003695 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003696 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003697 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003698 */
3699 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3700 if (areBlanks(ctxt, buf, nbchar)) {
3701 if (ctxt->sax->ignorableWhitespace != NULL)
3702 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3703 } else {
3704 if (ctxt->sax->characters != NULL)
3705 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3706 }
3707 }
3708 }
3709}
3710
3711/**
3712 * xmlParseExternalID:
3713 * @ctxt: an XML parser context
3714 * @publicID: a xmlChar** receiving PubidLiteral
3715 * @strict: indicate whether we should restrict parsing to only
3716 * production [75], see NOTE below
3717 *
3718 * Parse an External ID or a Public ID
3719 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003720 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003721 * 'PUBLIC' S PubidLiteral S SystemLiteral
3722 *
3723 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3724 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3725 *
3726 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3727 *
3728 * Returns the function returns SystemLiteral and in the second
3729 * case publicID receives PubidLiteral, is strict is off
3730 * it is possible to return NULL and have publicID set.
3731 */
3732
3733xmlChar *
3734xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3735 xmlChar *URI = NULL;
3736
3737 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003738
3739 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003740 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003741 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003742 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003743 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3744 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003745 }
3746 SKIP_BLANKS;
3747 URI = xmlParseSystemLiteral(ctxt);
3748 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003749 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003750 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003751 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003752 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003753 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003754 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003755 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003756 }
3757 SKIP_BLANKS;
3758 *publicID = xmlParsePubidLiteral(ctxt);
3759 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003760 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003761 }
3762 if (strict) {
3763 /*
3764 * We don't handle [83] so "S SystemLiteral" is required.
3765 */
William M. Brack76e95df2003-10-18 16:20:14 +00003766 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003767 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003768 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003769 }
3770 } else {
3771 /*
3772 * We handle [83] so we return immediately, if
3773 * "S SystemLiteral" is not detected. From a purely parsing
3774 * point of view that's a nice mess.
3775 */
3776 const xmlChar *ptr;
3777 GROW;
3778
3779 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003780 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003781
William M. Brack76e95df2003-10-18 16:20:14 +00003782 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003783 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3784 }
3785 SKIP_BLANKS;
3786 URI = xmlParseSystemLiteral(ctxt);
3787 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003788 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003789 }
3790 }
3791 return(URI);
3792}
3793
3794/**
3795 * xmlParseComment:
3796 * @ctxt: an XML parser context
3797 *
3798 * Skip an XML (SGML) comment <!-- .... -->
3799 * The spec says that "For compatibility, the string "--" (double-hyphen)
3800 * must not occur within comments. "
3801 *
3802 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3803 */
3804void
3805xmlParseComment(xmlParserCtxtPtr ctxt) {
3806 xmlChar *buf = NULL;
3807 int len;
3808 int size = XML_PARSER_BUFFER_SIZE;
3809 int q, ql;
3810 int r, rl;
3811 int cur, l;
3812 xmlParserInputState state;
3813 xmlParserInputPtr input = ctxt->input;
3814 int count = 0;
3815
3816 /*
3817 * Check that there is a comment right here.
3818 */
3819 if ((RAW != '<') || (NXT(1) != '!') ||
3820 (NXT(2) != '-') || (NXT(3) != '-')) return;
3821
3822 state = ctxt->instate;
3823 ctxt->instate = XML_PARSER_COMMENT;
3824 SHRINK;
3825 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003826 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003827 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003828 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003829 ctxt->instate = state;
3830 return;
3831 }
3832 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003833 if (q == 0)
3834 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003835 NEXTL(ql);
3836 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003837 if (r == 0)
3838 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003839 NEXTL(rl);
3840 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003841 if (cur == 0)
3842 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003843 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003844 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003845 ((cur != '>') ||
3846 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003847 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003848 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003849 }
3850 if (len + 5 >= size) {
3851 size *= 2;
3852 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3853 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003854 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003855 ctxt->instate = state;
3856 return;
3857 }
3858 }
3859 COPY_BUF(ql,buf,len,q);
3860 q = r;
3861 ql = rl;
3862 r = cur;
3863 rl = l;
3864
3865 count++;
3866 if (count > 50) {
3867 GROW;
3868 count = 0;
3869 }
3870 NEXTL(l);
3871 cur = CUR_CHAR(l);
3872 if (cur == 0) {
3873 SHRINK;
3874 GROW;
3875 cur = CUR_CHAR(l);
3876 }
3877 }
3878 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003879 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003880 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003881 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003882 xmlFree(buf);
3883 } else {
3884 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003885 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3886 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003887 }
3888 NEXT;
3889 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3890 (!ctxt->disableSAX))
3891 ctxt->sax->comment(ctxt->userData, buf);
3892 xmlFree(buf);
3893 }
3894 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003895 return;
3896not_terminated:
3897 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3898 "Comment not terminated\n", NULL);
3899 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003900}
3901
3902/**
3903 * xmlParsePITarget:
3904 * @ctxt: an XML parser context
3905 *
3906 * parse the name of a PI
3907 *
3908 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3909 *
3910 * Returns the PITarget name or NULL
3911 */
3912
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003913const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003914xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003915 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003916
3917 name = xmlParseName(ctxt);
3918 if ((name != NULL) &&
3919 ((name[0] == 'x') || (name[0] == 'X')) &&
3920 ((name[1] == 'm') || (name[1] == 'M')) &&
3921 ((name[2] == 'l') || (name[2] == 'L'))) {
3922 int i;
3923 if ((name[0] == 'x') && (name[1] == 'm') &&
3924 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003925 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003926 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003927 return(name);
3928 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003929 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003930 return(name);
3931 }
3932 for (i = 0;;i++) {
3933 if (xmlW3CPIs[i] == NULL) break;
3934 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3935 return(name);
3936 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003937 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3938 "xmlParsePITarget: invalid name prefix 'xml'\n",
3939 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003940 }
3941 return(name);
3942}
3943
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003944#ifdef LIBXML_CATALOG_ENABLED
3945/**
3946 * xmlParseCatalogPI:
3947 * @ctxt: an XML parser context
3948 * @catalog: the PI value string
3949 *
3950 * parse an XML Catalog Processing Instruction.
3951 *
3952 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3953 *
3954 * Occurs only if allowed by the user and if happening in the Misc
3955 * part of the document before any doctype informations
3956 * This will add the given catalog to the parsing context in order
3957 * to be used if there is a resolution need further down in the document
3958 */
3959
3960static void
3961xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3962 xmlChar *URL = NULL;
3963 const xmlChar *tmp, *base;
3964 xmlChar marker;
3965
3966 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003967 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003968 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3969 goto error;
3970 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003971 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003972 if (*tmp != '=') {
3973 return;
3974 }
3975 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003976 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003977 marker = *tmp;
3978 if ((marker != '\'') && (marker != '"'))
3979 goto error;
3980 tmp++;
3981 base = tmp;
3982 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3983 if (*tmp == 0)
3984 goto error;
3985 URL = xmlStrndup(base, tmp - base);
3986 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003987 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003988 if (*tmp != 0)
3989 goto error;
3990
3991 if (URL != NULL) {
3992 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3993 xmlFree(URL);
3994 }
3995 return;
3996
3997error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003998 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3999 "Catalog PI syntax error: %s\n",
4000 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004001 if (URL != NULL)
4002 xmlFree(URL);
4003}
4004#endif
4005
Owen Taylor3473f882001-02-23 17:55:21 +00004006/**
4007 * xmlParsePI:
4008 * @ctxt: an XML parser context
4009 *
4010 * parse an XML Processing Instruction.
4011 *
4012 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4013 *
4014 * The processing is transfered to SAX once parsed.
4015 */
4016
4017void
4018xmlParsePI(xmlParserCtxtPtr ctxt) {
4019 xmlChar *buf = NULL;
4020 int len = 0;
4021 int size = XML_PARSER_BUFFER_SIZE;
4022 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004023 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004024 xmlParserInputState state;
4025 int count = 0;
4026
4027 if ((RAW == '<') && (NXT(1) == '?')) {
4028 xmlParserInputPtr input = ctxt->input;
4029 state = ctxt->instate;
4030 ctxt->instate = XML_PARSER_PI;
4031 /*
4032 * this is a Processing Instruction.
4033 */
4034 SKIP(2);
4035 SHRINK;
4036
4037 /*
4038 * Parse the target name and check for special support like
4039 * namespace.
4040 */
4041 target = xmlParsePITarget(ctxt);
4042 if (target != NULL) {
4043 if ((RAW == '?') && (NXT(1) == '>')) {
4044 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004045 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4046 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004047 }
4048 SKIP(2);
4049
4050 /*
4051 * SAX: PI detected.
4052 */
4053 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4054 (ctxt->sax->processingInstruction != NULL))
4055 ctxt->sax->processingInstruction(ctxt->userData,
4056 target, NULL);
4057 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004058 return;
4059 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004060 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004061 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004062 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004063 ctxt->instate = state;
4064 return;
4065 }
4066 cur = CUR;
4067 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004068 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4069 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004070 }
4071 SKIP_BLANKS;
4072 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004073 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004074 ((cur != '?') || (NXT(1) != '>'))) {
4075 if (len + 5 >= size) {
4076 size *= 2;
4077 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4078 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004079 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004080 ctxt->instate = state;
4081 return;
4082 }
4083 }
4084 count++;
4085 if (count > 50) {
4086 GROW;
4087 count = 0;
4088 }
4089 COPY_BUF(l,buf,len,cur);
4090 NEXTL(l);
4091 cur = CUR_CHAR(l);
4092 if (cur == 0) {
4093 SHRINK;
4094 GROW;
4095 cur = CUR_CHAR(l);
4096 }
4097 }
4098 buf[len] = 0;
4099 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004100 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4101 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004102 } else {
4103 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004104 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4105 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004106 }
4107 SKIP(2);
4108
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004109#ifdef LIBXML_CATALOG_ENABLED
4110 if (((state == XML_PARSER_MISC) ||
4111 (state == XML_PARSER_START)) &&
4112 (xmlStrEqual(target, XML_CATALOG_PI))) {
4113 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4114 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4115 (allow == XML_CATA_ALLOW_ALL))
4116 xmlParseCatalogPI(ctxt, buf);
4117 }
4118#endif
4119
4120
Owen Taylor3473f882001-02-23 17:55:21 +00004121 /*
4122 * SAX: PI detected.
4123 */
4124 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4125 (ctxt->sax->processingInstruction != NULL))
4126 ctxt->sax->processingInstruction(ctxt->userData,
4127 target, buf);
4128 }
4129 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004131 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004132 }
4133 ctxt->instate = state;
4134 }
4135}
4136
4137/**
4138 * xmlParseNotationDecl:
4139 * @ctxt: an XML parser context
4140 *
4141 * parse a notation declaration
4142 *
4143 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4144 *
4145 * Hence there is actually 3 choices:
4146 * 'PUBLIC' S PubidLiteral
4147 * 'PUBLIC' S PubidLiteral S SystemLiteral
4148 * and 'SYSTEM' S SystemLiteral
4149 *
4150 * See the NOTE on xmlParseExternalID().
4151 */
4152
4153void
4154xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004155 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004156 xmlChar *Pubid;
4157 xmlChar *Systemid;
4158
Daniel Veillarda07050d2003-10-19 14:46:32 +00004159 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004160 xmlParserInputPtr input = ctxt->input;
4161 SHRINK;
4162 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004163 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004164 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4165 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004166 return;
4167 }
4168 SKIP_BLANKS;
4169
Daniel Veillard76d66f42001-05-16 21:05:17 +00004170 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004171 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004172 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004173 return;
4174 }
William M. Brack76e95df2003-10-18 16:20:14 +00004175 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004176 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004177 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004178 return;
4179 }
4180 SKIP_BLANKS;
4181
4182 /*
4183 * Parse the IDs.
4184 */
4185 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4186 SKIP_BLANKS;
4187
4188 if (RAW == '>') {
4189 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004190 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4191 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004192 }
4193 NEXT;
4194 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4195 (ctxt->sax->notationDecl != NULL))
4196 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4197 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004198 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004199 }
Owen Taylor3473f882001-02-23 17:55:21 +00004200 if (Systemid != NULL) xmlFree(Systemid);
4201 if (Pubid != NULL) xmlFree(Pubid);
4202 }
4203}
4204
4205/**
4206 * xmlParseEntityDecl:
4207 * @ctxt: an XML parser context
4208 *
4209 * parse <!ENTITY declarations
4210 *
4211 * [70] EntityDecl ::= GEDecl | PEDecl
4212 *
4213 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4214 *
4215 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4216 *
4217 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4218 *
4219 * [74] PEDef ::= EntityValue | ExternalID
4220 *
4221 * [76] NDataDecl ::= S 'NDATA' S Name
4222 *
4223 * [ VC: Notation Declared ]
4224 * The Name must match the declared name of a notation.
4225 */
4226
4227void
4228xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004229 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004230 xmlChar *value = NULL;
4231 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004232 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004233 int isParameter = 0;
4234 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004235 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004236
4237 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004238 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004239 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004240 SHRINK;
4241 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004242 skipped = SKIP_BLANKS;
4243 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004244 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4245 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004246 }
Owen Taylor3473f882001-02-23 17:55:21 +00004247
4248 if (RAW == '%') {
4249 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004250 skipped = SKIP_BLANKS;
4251 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004252 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4253 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004254 }
Owen Taylor3473f882001-02-23 17:55:21 +00004255 isParameter = 1;
4256 }
4257
Daniel Veillard76d66f42001-05-16 21:05:17 +00004258 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004259 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004260 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4261 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004262 return;
4263 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004264 skipped = SKIP_BLANKS;
4265 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004266 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4267 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
Owen Taylor3473f882001-02-23 17:55:21 +00004269
Daniel Veillardf5582f12002-06-11 10:08:16 +00004270 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004271 /*
4272 * handle the various case of definitions...
4273 */
4274 if (isParameter) {
4275 if ((RAW == '"') || (RAW == '\'')) {
4276 value = xmlParseEntityValue(ctxt, &orig);
4277 if (value) {
4278 if ((ctxt->sax != NULL) &&
4279 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4280 ctxt->sax->entityDecl(ctxt->userData, name,
4281 XML_INTERNAL_PARAMETER_ENTITY,
4282 NULL, NULL, value);
4283 }
4284 } else {
4285 URI = xmlParseExternalID(ctxt, &literal, 1);
4286 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004287 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004288 }
4289 if (URI) {
4290 xmlURIPtr uri;
4291
4292 uri = xmlParseURI((const char *) URI);
4293 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004294 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4295 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004296 /*
4297 * This really ought to be a well formedness error
4298 * but the XML Core WG decided otherwise c.f. issue
4299 * E26 of the XML erratas.
4300 */
Owen Taylor3473f882001-02-23 17:55:21 +00004301 } else {
4302 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004303 /*
4304 * Okay this is foolish to block those but not
4305 * invalid URIs.
4306 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004307 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004308 } else {
4309 if ((ctxt->sax != NULL) &&
4310 (!ctxt->disableSAX) &&
4311 (ctxt->sax->entityDecl != NULL))
4312 ctxt->sax->entityDecl(ctxt->userData, name,
4313 XML_EXTERNAL_PARAMETER_ENTITY,
4314 literal, URI, NULL);
4315 }
4316 xmlFreeURI(uri);
4317 }
4318 }
4319 }
4320 } else {
4321 if ((RAW == '"') || (RAW == '\'')) {
4322 value = xmlParseEntityValue(ctxt, &orig);
4323 if ((ctxt->sax != NULL) &&
4324 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4325 ctxt->sax->entityDecl(ctxt->userData, name,
4326 XML_INTERNAL_GENERAL_ENTITY,
4327 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004328 /*
4329 * For expat compatibility in SAX mode.
4330 */
4331 if ((ctxt->myDoc == NULL) ||
4332 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4333 if (ctxt->myDoc == NULL) {
4334 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4335 }
4336 if (ctxt->myDoc->intSubset == NULL)
4337 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4338 BAD_CAST "fake", NULL, NULL);
4339
Daniel Veillard1af9a412003-08-20 22:54:39 +00004340 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4341 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004342 }
Owen Taylor3473f882001-02-23 17:55:21 +00004343 } else {
4344 URI = xmlParseExternalID(ctxt, &literal, 1);
4345 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004346 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004347 }
4348 if (URI) {
4349 xmlURIPtr uri;
4350
4351 uri = xmlParseURI((const char *)URI);
4352 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004353 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4354 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004355 /*
4356 * This really ought to be a well formedness error
4357 * but the XML Core WG decided otherwise c.f. issue
4358 * E26 of the XML erratas.
4359 */
Owen Taylor3473f882001-02-23 17:55:21 +00004360 } else {
4361 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004362 /*
4363 * Okay this is foolish to block those but not
4364 * invalid URIs.
4365 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004366 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004367 }
4368 xmlFreeURI(uri);
4369 }
4370 }
William M. Brack76e95df2003-10-18 16:20:14 +00004371 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004372 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4373 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004374 }
4375 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004376 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004377 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004378 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004379 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4380 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004381 }
4382 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004383 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004384 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4385 (ctxt->sax->unparsedEntityDecl != NULL))
4386 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4387 literal, URI, ndata);
4388 } else {
4389 if ((ctxt->sax != NULL) &&
4390 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4391 ctxt->sax->entityDecl(ctxt->userData, name,
4392 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4393 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004394 /*
4395 * For expat compatibility in SAX mode.
4396 * assuming the entity repalcement was asked for
4397 */
4398 if ((ctxt->replaceEntities != 0) &&
4399 ((ctxt->myDoc == NULL) ||
4400 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4401 if (ctxt->myDoc == NULL) {
4402 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4403 }
4404
4405 if (ctxt->myDoc->intSubset == NULL)
4406 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4407 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004408 xmlSAX2EntityDecl(ctxt, name,
4409 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4410 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004411 }
Owen Taylor3473f882001-02-23 17:55:21 +00004412 }
4413 }
4414 }
4415 SKIP_BLANKS;
4416 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004417 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004418 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004419 } else {
4420 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004421 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4422 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004423 }
4424 NEXT;
4425 }
4426 if (orig != NULL) {
4427 /*
4428 * Ugly mechanism to save the raw entity value.
4429 */
4430 xmlEntityPtr cur = NULL;
4431
4432 if (isParameter) {
4433 if ((ctxt->sax != NULL) &&
4434 (ctxt->sax->getParameterEntity != NULL))
4435 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4436 } else {
4437 if ((ctxt->sax != NULL) &&
4438 (ctxt->sax->getEntity != NULL))
4439 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004440 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004441 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004442 }
Owen Taylor3473f882001-02-23 17:55:21 +00004443 }
4444 if (cur != NULL) {
4445 if (cur->orig != NULL)
4446 xmlFree(orig);
4447 else
4448 cur->orig = orig;
4449 } else
4450 xmlFree(orig);
4451 }
Owen Taylor3473f882001-02-23 17:55:21 +00004452 if (value != NULL) xmlFree(value);
4453 if (URI != NULL) xmlFree(URI);
4454 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004455 }
4456}
4457
4458/**
4459 * xmlParseDefaultDecl:
4460 * @ctxt: an XML parser context
4461 * @value: Receive a possible fixed default value for the attribute
4462 *
4463 * Parse an attribute default declaration
4464 *
4465 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4466 *
4467 * [ VC: Required Attribute ]
4468 * if the default declaration is the keyword #REQUIRED, then the
4469 * attribute must be specified for all elements of the type in the
4470 * attribute-list declaration.
4471 *
4472 * [ VC: Attribute Default Legal ]
4473 * The declared default value must meet the lexical constraints of
4474 * the declared attribute type c.f. xmlValidateAttributeDecl()
4475 *
4476 * [ VC: Fixed Attribute Default ]
4477 * if an attribute has a default value declared with the #FIXED
4478 * keyword, instances of that attribute must match the default value.
4479 *
4480 * [ WFC: No < in Attribute Values ]
4481 * handled in xmlParseAttValue()
4482 *
4483 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4484 * or XML_ATTRIBUTE_FIXED.
4485 */
4486
4487int
4488xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4489 int val;
4490 xmlChar *ret;
4491
4492 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004493 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004494 SKIP(9);
4495 return(XML_ATTRIBUTE_REQUIRED);
4496 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004497 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004498 SKIP(8);
4499 return(XML_ATTRIBUTE_IMPLIED);
4500 }
4501 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004502 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004503 SKIP(6);
4504 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004505 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004506 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4507 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004508 }
4509 SKIP_BLANKS;
4510 }
4511 ret = xmlParseAttValue(ctxt);
4512 ctxt->instate = XML_PARSER_DTD;
4513 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004514 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004515 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004516 } else
4517 *value = ret;
4518 return(val);
4519}
4520
4521/**
4522 * xmlParseNotationType:
4523 * @ctxt: an XML parser context
4524 *
4525 * parse an Notation attribute type.
4526 *
4527 * Note: the leading 'NOTATION' S part has already being parsed...
4528 *
4529 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4530 *
4531 * [ VC: Notation Attributes ]
4532 * Values of this type must match one of the notation names included
4533 * in the declaration; all notation names in the declaration must be declared.
4534 *
4535 * Returns: the notation attribute tree built while parsing
4536 */
4537
4538xmlEnumerationPtr
4539xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004540 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004541 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4542
4543 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004544 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004545 return(NULL);
4546 }
4547 SHRINK;
4548 do {
4549 NEXT;
4550 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004551 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004552 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004553 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4554 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004555 return(ret);
4556 }
4557 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004558 if (cur == NULL) return(ret);
4559 if (last == NULL) ret = last = cur;
4560 else {
4561 last->next = cur;
4562 last = cur;
4563 }
4564 SKIP_BLANKS;
4565 } while (RAW == '|');
4566 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004567 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004568 if ((last != NULL) && (last != ret))
4569 xmlFreeEnumeration(last);
4570 return(ret);
4571 }
4572 NEXT;
4573 return(ret);
4574}
4575
4576/**
4577 * xmlParseEnumerationType:
4578 * @ctxt: an XML parser context
4579 *
4580 * parse an Enumeration attribute type.
4581 *
4582 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4583 *
4584 * [ VC: Enumeration ]
4585 * Values of this type must match one of the Nmtoken tokens in
4586 * the declaration
4587 *
4588 * Returns: the enumeration attribute tree built while parsing
4589 */
4590
4591xmlEnumerationPtr
4592xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4593 xmlChar *name;
4594 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4595
4596 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004597 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004598 return(NULL);
4599 }
4600 SHRINK;
4601 do {
4602 NEXT;
4603 SKIP_BLANKS;
4604 name = xmlParseNmtoken(ctxt);
4605 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004606 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004607 return(ret);
4608 }
4609 cur = xmlCreateEnumeration(name);
4610 xmlFree(name);
4611 if (cur == NULL) return(ret);
4612 if (last == NULL) ret = last = cur;
4613 else {
4614 last->next = cur;
4615 last = cur;
4616 }
4617 SKIP_BLANKS;
4618 } while (RAW == '|');
4619 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004620 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004621 return(ret);
4622 }
4623 NEXT;
4624 return(ret);
4625}
4626
4627/**
4628 * xmlParseEnumeratedType:
4629 * @ctxt: an XML parser context
4630 * @tree: the enumeration tree built while parsing
4631 *
4632 * parse an Enumerated attribute type.
4633 *
4634 * [57] EnumeratedType ::= NotationType | Enumeration
4635 *
4636 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4637 *
4638 *
4639 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4640 */
4641
4642int
4643xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004644 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004645 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004646 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004647 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4648 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004649 return(0);
4650 }
4651 SKIP_BLANKS;
4652 *tree = xmlParseNotationType(ctxt);
4653 if (*tree == NULL) return(0);
4654 return(XML_ATTRIBUTE_NOTATION);
4655 }
4656 *tree = xmlParseEnumerationType(ctxt);
4657 if (*tree == NULL) return(0);
4658 return(XML_ATTRIBUTE_ENUMERATION);
4659}
4660
4661/**
4662 * xmlParseAttributeType:
4663 * @ctxt: an XML parser context
4664 * @tree: the enumeration tree built while parsing
4665 *
4666 * parse the Attribute list def for an element
4667 *
4668 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4669 *
4670 * [55] StringType ::= 'CDATA'
4671 *
4672 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4673 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4674 *
4675 * Validity constraints for attribute values syntax are checked in
4676 * xmlValidateAttributeValue()
4677 *
4678 * [ VC: ID ]
4679 * Values of type ID must match the Name production. A name must not
4680 * appear more than once in an XML document as a value of this type;
4681 * i.e., ID values must uniquely identify the elements which bear them.
4682 *
4683 * [ VC: One ID per Element Type ]
4684 * No element type may have more than one ID attribute specified.
4685 *
4686 * [ VC: ID Attribute Default ]
4687 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4688 *
4689 * [ VC: IDREF ]
4690 * Values of type IDREF must match the Name production, and values
4691 * of type IDREFS must match Names; each IDREF Name must match the value
4692 * of an ID attribute on some element in the XML document; i.e. IDREF
4693 * values must match the value of some ID attribute.
4694 *
4695 * [ VC: Entity Name ]
4696 * Values of type ENTITY must match the Name production, values
4697 * of type ENTITIES must match Names; each Entity Name must match the
4698 * name of an unparsed entity declared in the DTD.
4699 *
4700 * [ VC: Name Token ]
4701 * Values of type NMTOKEN must match the Nmtoken production; values
4702 * of type NMTOKENS must match Nmtokens.
4703 *
4704 * Returns the attribute type
4705 */
4706int
4707xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4708 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004709 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004710 SKIP(5);
4711 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004712 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004713 SKIP(6);
4714 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004715 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004716 SKIP(5);
4717 return(XML_ATTRIBUTE_IDREF);
4718 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4719 SKIP(2);
4720 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004721 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004722 SKIP(6);
4723 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004724 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004725 SKIP(8);
4726 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004727 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004728 SKIP(8);
4729 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004730 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004731 SKIP(7);
4732 return(XML_ATTRIBUTE_NMTOKEN);
4733 }
4734 return(xmlParseEnumeratedType(ctxt, tree));
4735}
4736
4737/**
4738 * xmlParseAttributeListDecl:
4739 * @ctxt: an XML parser context
4740 *
4741 * : parse the Attribute list def for an element
4742 *
4743 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4744 *
4745 * [53] AttDef ::= S Name S AttType S DefaultDecl
4746 *
4747 */
4748void
4749xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004750 const xmlChar *elemName;
4751 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004752 xmlEnumerationPtr tree;
4753
Daniel Veillarda07050d2003-10-19 14:46:32 +00004754 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004755 xmlParserInputPtr input = ctxt->input;
4756
4757 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004758 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004759 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004760 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004761 }
4762 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004763 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004764 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004765 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4766 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004767 return;
4768 }
4769 SKIP_BLANKS;
4770 GROW;
4771 while (RAW != '>') {
4772 const xmlChar *check = CUR_PTR;
4773 int type;
4774 int def;
4775 xmlChar *defaultValue = NULL;
4776
4777 GROW;
4778 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004779 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004780 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004781 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4782 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004783 break;
4784 }
4785 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004786 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004787 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004788 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004789 if (defaultValue != NULL)
4790 xmlFree(defaultValue);
4791 break;
4792 }
4793 SKIP_BLANKS;
4794
4795 type = xmlParseAttributeType(ctxt, &tree);
4796 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004797 if (defaultValue != NULL)
4798 xmlFree(defaultValue);
4799 break;
4800 }
4801
4802 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004803 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004804 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4805 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004806 if (defaultValue != NULL)
4807 xmlFree(defaultValue);
4808 if (tree != NULL)
4809 xmlFreeEnumeration(tree);
4810 break;
4811 }
4812 SKIP_BLANKS;
4813
4814 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4815 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004816 if (defaultValue != NULL)
4817 xmlFree(defaultValue);
4818 if (tree != NULL)
4819 xmlFreeEnumeration(tree);
4820 break;
4821 }
4822
4823 GROW;
4824 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004825 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004826 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004827 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004828 if (defaultValue != NULL)
4829 xmlFree(defaultValue);
4830 if (tree != NULL)
4831 xmlFreeEnumeration(tree);
4832 break;
4833 }
4834 SKIP_BLANKS;
4835 }
4836 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004837 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4838 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004839 if (defaultValue != NULL)
4840 xmlFree(defaultValue);
4841 if (tree != NULL)
4842 xmlFreeEnumeration(tree);
4843 break;
4844 }
4845 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4846 (ctxt->sax->attributeDecl != NULL))
4847 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4848 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004849 else if (tree != NULL)
4850 xmlFreeEnumeration(tree);
4851
4852 if ((ctxt->sax2) && (defaultValue != NULL) &&
4853 (def != XML_ATTRIBUTE_IMPLIED) &&
4854 (def != XML_ATTRIBUTE_REQUIRED)) {
4855 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4856 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004857 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4858 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4859 }
Owen Taylor3473f882001-02-23 17:55:21 +00004860 if (defaultValue != NULL)
4861 xmlFree(defaultValue);
4862 GROW;
4863 }
4864 if (RAW == '>') {
4865 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004866 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4867 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004868 }
4869 NEXT;
4870 }
Owen Taylor3473f882001-02-23 17:55:21 +00004871 }
4872}
4873
4874/**
4875 * xmlParseElementMixedContentDecl:
4876 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004877 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004878 *
4879 * parse the declaration for a Mixed Element content
4880 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4881 *
4882 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4883 * '(' S? '#PCDATA' S? ')'
4884 *
4885 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4886 *
4887 * [ VC: No Duplicate Types ]
4888 * The same name must not appear more than once in a single
4889 * mixed-content declaration.
4890 *
4891 * returns: the list of the xmlElementContentPtr describing the element choices
4892 */
4893xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004894xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004895 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004896 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004897
4898 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004899 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004900 SKIP(7);
4901 SKIP_BLANKS;
4902 SHRINK;
4903 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004904 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004905 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4906"Element content declaration doesn't start and stop in the same entity\n",
4907 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004908 }
Owen Taylor3473f882001-02-23 17:55:21 +00004909 NEXT;
4910 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4911 if (RAW == '*') {
4912 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4913 NEXT;
4914 }
4915 return(ret);
4916 }
4917 if ((RAW == '(') || (RAW == '|')) {
4918 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4919 if (ret == NULL) return(NULL);
4920 }
4921 while (RAW == '|') {
4922 NEXT;
4923 if (elem == NULL) {
4924 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4925 if (ret == NULL) return(NULL);
4926 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004927 if (cur != NULL)
4928 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004929 cur = ret;
4930 } else {
4931 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4932 if (n == NULL) return(NULL);
4933 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004934 if (n->c1 != NULL)
4935 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004936 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004937 if (n != NULL)
4938 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004939 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004940 }
4941 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004942 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004943 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004944 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004945 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004946 xmlFreeElementContent(cur);
4947 return(NULL);
4948 }
4949 SKIP_BLANKS;
4950 GROW;
4951 }
4952 if ((RAW == ')') && (NXT(1) == '*')) {
4953 if (elem != NULL) {
4954 cur->c2 = xmlNewElementContent(elem,
4955 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004956 if (cur->c2 != NULL)
4957 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004958 }
4959 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004960 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004961 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4962"Element content declaration doesn't start and stop in the same entity\n",
4963 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004964 }
Owen Taylor3473f882001-02-23 17:55:21 +00004965 SKIP(2);
4966 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004967 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004968 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004969 return(NULL);
4970 }
4971
4972 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004973 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004974 }
4975 return(ret);
4976}
4977
4978/**
4979 * xmlParseElementChildrenContentDecl:
4980 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004981 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004982 *
4983 * parse the declaration for a Mixed Element content
4984 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4985 *
4986 *
4987 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4988 *
4989 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4990 *
4991 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4992 *
4993 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4994 *
4995 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4996 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004997 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004998 * opening or closing parentheses in a choice, seq, or Mixed
4999 * construct is contained in the replacement text for a parameter
5000 * entity, both must be contained in the same replacement text. For
5001 * interoperability, if a parameter-entity reference appears in a
5002 * choice, seq, or Mixed construct, its replacement text should not
5003 * be empty, and neither the first nor last non-blank character of
5004 * the replacement text should be a connector (| or ,).
5005 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005006 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005007 * hierarchy.
5008 */
5009xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005010xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005011 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005012 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005013 xmlChar type = 0;
5014
5015 SKIP_BLANKS;
5016 GROW;
5017 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005018 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005019
Owen Taylor3473f882001-02-23 17:55:21 +00005020 /* Recurse on first child */
5021 NEXT;
5022 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005023 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005024 SKIP_BLANKS;
5025 GROW;
5026 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005027 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005028 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005029 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005030 return(NULL);
5031 }
5032 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005033 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005034 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005035 return(NULL);
5036 }
Owen Taylor3473f882001-02-23 17:55:21 +00005037 GROW;
5038 if (RAW == '?') {
5039 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5040 NEXT;
5041 } else if (RAW == '*') {
5042 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5043 NEXT;
5044 } else if (RAW == '+') {
5045 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5046 NEXT;
5047 } else {
5048 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5049 }
Owen Taylor3473f882001-02-23 17:55:21 +00005050 GROW;
5051 }
5052 SKIP_BLANKS;
5053 SHRINK;
5054 while (RAW != ')') {
5055 /*
5056 * Each loop we parse one separator and one element.
5057 */
5058 if (RAW == ',') {
5059 if (type == 0) type = CUR;
5060
5061 /*
5062 * Detect "Name | Name , Name" error
5063 */
5064 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005065 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005066 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005067 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005068 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005069 xmlFreeElementContent(last);
5070 if (ret != NULL)
5071 xmlFreeElementContent(ret);
5072 return(NULL);
5073 }
5074 NEXT;
5075
5076 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5077 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005078 if ((last != NULL) && (last != ret))
5079 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00005080 xmlFreeElementContent(ret);
5081 return(NULL);
5082 }
5083 if (last == NULL) {
5084 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005085 if (ret != NULL)
5086 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005087 ret = cur = op;
5088 } else {
5089 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005090 if (op != NULL)
5091 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005092 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005093 if (last != NULL)
5094 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005095 cur =op;
5096 last = NULL;
5097 }
5098 } else if (RAW == '|') {
5099 if (type == 0) type = CUR;
5100
5101 /*
5102 * Detect "Name , Name | Name" error
5103 */
5104 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005105 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005106 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005107 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005108 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005109 xmlFreeElementContent(last);
5110 if (ret != NULL)
5111 xmlFreeElementContent(ret);
5112 return(NULL);
5113 }
5114 NEXT;
5115
5116 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5117 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005118 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005119 xmlFreeElementContent(last);
5120 if (ret != NULL)
5121 xmlFreeElementContent(ret);
5122 return(NULL);
5123 }
5124 if (last == NULL) {
5125 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005126 if (ret != NULL)
5127 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005128 ret = cur = op;
5129 } else {
5130 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005131 if (op != NULL)
5132 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005133 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005134 if (last != NULL)
5135 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005136 cur =op;
5137 last = NULL;
5138 }
5139 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005140 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005141 if (ret != NULL)
5142 xmlFreeElementContent(ret);
5143 return(NULL);
5144 }
5145 GROW;
5146 SKIP_BLANKS;
5147 GROW;
5148 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005149 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005150 /* Recurse on second child */
5151 NEXT;
5152 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005153 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005154 SKIP_BLANKS;
5155 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005156 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005157 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005158 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005159 if (ret != NULL)
5160 xmlFreeElementContent(ret);
5161 return(NULL);
5162 }
5163 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005164 if (RAW == '?') {
5165 last->ocur = XML_ELEMENT_CONTENT_OPT;
5166 NEXT;
5167 } else if (RAW == '*') {
5168 last->ocur = XML_ELEMENT_CONTENT_MULT;
5169 NEXT;
5170 } else if (RAW == '+') {
5171 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5172 NEXT;
5173 } else {
5174 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5175 }
5176 }
5177 SKIP_BLANKS;
5178 GROW;
5179 }
5180 if ((cur != NULL) && (last != NULL)) {
5181 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005182 if (last != NULL)
5183 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005184 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005185 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005186 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5187"Element content declaration doesn't start and stop in the same entity\n",
5188 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005189 }
Owen Taylor3473f882001-02-23 17:55:21 +00005190 NEXT;
5191 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005192 if (ret != NULL)
5193 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005194 NEXT;
5195 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005196 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005197 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005198 cur = ret;
5199 /*
5200 * Some normalization:
5201 * (a | b* | c?)* == (a | b | c)*
5202 */
5203 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5204 if ((cur->c1 != NULL) &&
5205 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5206 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5207 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5208 if ((cur->c2 != NULL) &&
5209 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5210 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5211 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5212 cur = cur->c2;
5213 }
5214 }
Owen Taylor3473f882001-02-23 17:55:21 +00005215 NEXT;
5216 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005217 if (ret != NULL) {
5218 int found = 0;
5219
Daniel Veillarde470df72001-04-18 21:41:07 +00005220 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005221 /*
5222 * Some normalization:
5223 * (a | b*)+ == (a | b)*
5224 * (a | b?)+ == (a | b)*
5225 */
5226 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5227 if ((cur->c1 != NULL) &&
5228 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5229 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5230 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5231 found = 1;
5232 }
5233 if ((cur->c2 != NULL) &&
5234 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5235 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5236 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5237 found = 1;
5238 }
5239 cur = cur->c2;
5240 }
5241 if (found)
5242 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5243 }
Owen Taylor3473f882001-02-23 17:55:21 +00005244 NEXT;
5245 }
5246 return(ret);
5247}
5248
5249/**
5250 * xmlParseElementContentDecl:
5251 * @ctxt: an XML parser context
5252 * @name: the name of the element being defined.
5253 * @result: the Element Content pointer will be stored here if any
5254 *
5255 * parse the declaration for an Element content either Mixed or Children,
5256 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5257 *
5258 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5259 *
5260 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5261 */
5262
5263int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005264xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005265 xmlElementContentPtr *result) {
5266
5267 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005268 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005269 int res;
5270
5271 *result = NULL;
5272
5273 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005274 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005275 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005276 return(-1);
5277 }
5278 NEXT;
5279 GROW;
5280 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005281 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005282 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005283 res = XML_ELEMENT_TYPE_MIXED;
5284 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005285 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005286 res = XML_ELEMENT_TYPE_ELEMENT;
5287 }
Owen Taylor3473f882001-02-23 17:55:21 +00005288 SKIP_BLANKS;
5289 *result = tree;
5290 return(res);
5291}
5292
5293/**
5294 * xmlParseElementDecl:
5295 * @ctxt: an XML parser context
5296 *
5297 * parse an Element declaration.
5298 *
5299 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5300 *
5301 * [ VC: Unique Element Type Declaration ]
5302 * No element type may be declared more than once
5303 *
5304 * Returns the type of the element, or -1 in case of error
5305 */
5306int
5307xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005308 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005309 int ret = -1;
5310 xmlElementContentPtr content = NULL;
5311
5312 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005313 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005314 xmlParserInputPtr input = ctxt->input;
5315
5316 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005317 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005318 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5319 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005320 }
5321 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005322 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005323 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005324 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5325 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005326 return(-1);
5327 }
5328 while ((RAW == 0) && (ctxt->inputNr > 1))
5329 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005330 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005331 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5332 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005333 }
5334 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005335 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005336 SKIP(5);
5337 /*
5338 * Element must always be empty.
5339 */
5340 ret = XML_ELEMENT_TYPE_EMPTY;
5341 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5342 (NXT(2) == 'Y')) {
5343 SKIP(3);
5344 /*
5345 * Element is a generic container.
5346 */
5347 ret = XML_ELEMENT_TYPE_ANY;
5348 } else if (RAW == '(') {
5349 ret = xmlParseElementContentDecl(ctxt, name, &content);
5350 } else {
5351 /*
5352 * [ WFC: PEs in Internal Subset ] error handling.
5353 */
5354 if ((RAW == '%') && (ctxt->external == 0) &&
5355 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005356 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005357 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005358 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005359 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005360 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5361 }
Owen Taylor3473f882001-02-23 17:55:21 +00005362 return(-1);
5363 }
5364
5365 SKIP_BLANKS;
5366 /*
5367 * Pop-up of finished entities.
5368 */
5369 while ((RAW == 0) && (ctxt->inputNr > 1))
5370 xmlPopInput(ctxt);
5371 SKIP_BLANKS;
5372
5373 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005374 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005375 } else {
5376 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005377 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5378 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005379 }
5380
5381 NEXT;
5382 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5383 (ctxt->sax->elementDecl != NULL))
5384 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5385 content);
5386 }
5387 if (content != NULL) {
5388 xmlFreeElementContent(content);
5389 }
Owen Taylor3473f882001-02-23 17:55:21 +00005390 }
5391 return(ret);
5392}
5393
5394/**
Owen Taylor3473f882001-02-23 17:55:21 +00005395 * xmlParseConditionalSections
5396 * @ctxt: an XML parser context
5397 *
5398 * [61] conditionalSect ::= includeSect | ignoreSect
5399 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5400 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5401 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5402 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5403 */
5404
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005405static void
Owen Taylor3473f882001-02-23 17:55:21 +00005406xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5407 SKIP(3);
5408 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005409 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005410 SKIP(7);
5411 SKIP_BLANKS;
5412 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005413 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005414 } else {
5415 NEXT;
5416 }
5417 if (xmlParserDebugEntities) {
5418 if ((ctxt->input != NULL) && (ctxt->input->filename))
5419 xmlGenericError(xmlGenericErrorContext,
5420 "%s(%d): ", ctxt->input->filename,
5421 ctxt->input->line);
5422 xmlGenericError(xmlGenericErrorContext,
5423 "Entering INCLUDE Conditional Section\n");
5424 }
5425
5426 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5427 (NXT(2) != '>'))) {
5428 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005429 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005430
5431 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5432 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005433 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005434 NEXT;
5435 } else if (RAW == '%') {
5436 xmlParsePEReference(ctxt);
5437 } else
5438 xmlParseMarkupDecl(ctxt);
5439
5440 /*
5441 * Pop-up of finished entities.
5442 */
5443 while ((RAW == 0) && (ctxt->inputNr > 1))
5444 xmlPopInput(ctxt);
5445
Daniel Veillardfdc91562002-07-01 21:52:03 +00005446 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005447 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005448 break;
5449 }
5450 }
5451 if (xmlParserDebugEntities) {
5452 if ((ctxt->input != NULL) && (ctxt->input->filename))
5453 xmlGenericError(xmlGenericErrorContext,
5454 "%s(%d): ", ctxt->input->filename,
5455 ctxt->input->line);
5456 xmlGenericError(xmlGenericErrorContext,
5457 "Leaving INCLUDE Conditional Section\n");
5458 }
5459
Daniel Veillarda07050d2003-10-19 14:46:32 +00005460 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005461 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005462 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005463 int depth = 0;
5464
5465 SKIP(6);
5466 SKIP_BLANKS;
5467 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005468 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005469 } else {
5470 NEXT;
5471 }
5472 if (xmlParserDebugEntities) {
5473 if ((ctxt->input != NULL) && (ctxt->input->filename))
5474 xmlGenericError(xmlGenericErrorContext,
5475 "%s(%d): ", ctxt->input->filename,
5476 ctxt->input->line);
5477 xmlGenericError(xmlGenericErrorContext,
5478 "Entering IGNORE Conditional Section\n");
5479 }
5480
5481 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005482 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005483 * But disable SAX event generating DTD building in the meantime
5484 */
5485 state = ctxt->disableSAX;
5486 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005487 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005488 ctxt->instate = XML_PARSER_IGNORE;
5489
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005490 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005491 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5492 depth++;
5493 SKIP(3);
5494 continue;
5495 }
5496 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5497 if (--depth >= 0) SKIP(3);
5498 continue;
5499 }
5500 NEXT;
5501 continue;
5502 }
5503
5504 ctxt->disableSAX = state;
5505 ctxt->instate = instate;
5506
5507 if (xmlParserDebugEntities) {
5508 if ((ctxt->input != NULL) && (ctxt->input->filename))
5509 xmlGenericError(xmlGenericErrorContext,
5510 "%s(%d): ", ctxt->input->filename,
5511 ctxt->input->line);
5512 xmlGenericError(xmlGenericErrorContext,
5513 "Leaving IGNORE Conditional Section\n");
5514 }
5515
5516 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005517 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005518 }
5519
5520 if (RAW == 0)
5521 SHRINK;
5522
5523 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005524 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005525 } else {
5526 SKIP(3);
5527 }
5528}
5529
5530/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005531 * xmlParseMarkupDecl:
5532 * @ctxt: an XML parser context
5533 *
5534 * parse Markup declarations
5535 *
5536 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5537 * NotationDecl | PI | Comment
5538 *
5539 * [ VC: Proper Declaration/PE Nesting ]
5540 * Parameter-entity replacement text must be properly nested with
5541 * markup declarations. That is to say, if either the first character
5542 * or the last character of a markup declaration (markupdecl above) is
5543 * contained in the replacement text for a parameter-entity reference,
5544 * both must be contained in the same replacement text.
5545 *
5546 * [ WFC: PEs in Internal Subset ]
5547 * In the internal DTD subset, parameter-entity references can occur
5548 * only where markup declarations can occur, not within markup declarations.
5549 * (This does not apply to references that occur in external parameter
5550 * entities or to the external subset.)
5551 */
5552void
5553xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5554 GROW;
5555 xmlParseElementDecl(ctxt);
5556 xmlParseAttributeListDecl(ctxt);
5557 xmlParseEntityDecl(ctxt);
5558 xmlParseNotationDecl(ctxt);
5559 xmlParsePI(ctxt);
5560 xmlParseComment(ctxt);
5561 /*
5562 * This is only for internal subset. On external entities,
5563 * the replacement is done before parsing stage
5564 */
5565 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5566 xmlParsePEReference(ctxt);
5567
5568 /*
5569 * Conditional sections are allowed from entities included
5570 * by PE References in the internal subset.
5571 */
5572 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5573 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5574 xmlParseConditionalSections(ctxt);
5575 }
5576 }
5577
5578 ctxt->instate = XML_PARSER_DTD;
5579}
5580
5581/**
5582 * xmlParseTextDecl:
5583 * @ctxt: an XML parser context
5584 *
5585 * parse an XML declaration header for external entities
5586 *
5587 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5588 *
5589 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5590 */
5591
5592void
5593xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5594 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005595 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005596
5597 /*
5598 * We know that '<?xml' is here.
5599 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005600 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005601 SKIP(5);
5602 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005603 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005604 return;
5605 }
5606
William M. Brack76e95df2003-10-18 16:20:14 +00005607 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005608 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5609 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005610 }
5611 SKIP_BLANKS;
5612
5613 /*
5614 * We may have the VersionInfo here.
5615 */
5616 version = xmlParseVersionInfo(ctxt);
5617 if (version == NULL)
5618 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005619 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005620 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005621 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5622 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005623 }
5624 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005625 ctxt->input->version = version;
5626
5627 /*
5628 * We must have the encoding declaration
5629 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005630 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005631 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5632 /*
5633 * The XML REC instructs us to stop parsing right here
5634 */
5635 return;
5636 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005637 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5638 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5639 "Missing encoding in text declaration\n");
5640 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005641
5642 SKIP_BLANKS;
5643 if ((RAW == '?') && (NXT(1) == '>')) {
5644 SKIP(2);
5645 } else if (RAW == '>') {
5646 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005647 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005648 NEXT;
5649 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005650 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005651 MOVETO_ENDTAG(CUR_PTR);
5652 NEXT;
5653 }
5654}
5655
5656/**
Owen Taylor3473f882001-02-23 17:55:21 +00005657 * xmlParseExternalSubset:
5658 * @ctxt: an XML parser context
5659 * @ExternalID: the external identifier
5660 * @SystemID: the system identifier (or URL)
5661 *
5662 * parse Markup declarations from an external subset
5663 *
5664 * [30] extSubset ::= textDecl? extSubsetDecl
5665 *
5666 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5667 */
5668void
5669xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5670 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005671 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005672 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005673 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005674 xmlParseTextDecl(ctxt);
5675 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5676 /*
5677 * The XML REC instructs us to stop parsing right here
5678 */
5679 ctxt->instate = XML_PARSER_EOF;
5680 return;
5681 }
5682 }
5683 if (ctxt->myDoc == NULL) {
5684 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5685 }
5686 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5687 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5688
5689 ctxt->instate = XML_PARSER_DTD;
5690 ctxt->external = 1;
5691 while (((RAW == '<') && (NXT(1) == '?')) ||
5692 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005693 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005694 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005695 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005696
5697 GROW;
5698 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5699 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005700 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005701 NEXT;
5702 } else if (RAW == '%') {
5703 xmlParsePEReference(ctxt);
5704 } else
5705 xmlParseMarkupDecl(ctxt);
5706
5707 /*
5708 * Pop-up of finished entities.
5709 */
5710 while ((RAW == 0) && (ctxt->inputNr > 1))
5711 xmlPopInput(ctxt);
5712
Daniel Veillardfdc91562002-07-01 21:52:03 +00005713 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005714 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005715 break;
5716 }
5717 }
5718
5719 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005720 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005721 }
5722
5723}
5724
5725/**
5726 * xmlParseReference:
5727 * @ctxt: an XML parser context
5728 *
5729 * parse and handle entity references in content, depending on the SAX
5730 * interface, this may end-up in a call to character() if this is a
5731 * CharRef, a predefined entity, if there is no reference() callback.
5732 * or if the parser was asked to switch to that mode.
5733 *
5734 * [67] Reference ::= EntityRef | CharRef
5735 */
5736void
5737xmlParseReference(xmlParserCtxtPtr ctxt) {
5738 xmlEntityPtr ent;
5739 xmlChar *val;
5740 if (RAW != '&') return;
5741
5742 if (NXT(1) == '#') {
5743 int i = 0;
5744 xmlChar out[10];
5745 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005746 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005747
5748 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5749 /*
5750 * So we are using non-UTF-8 buffers
5751 * Check that the char fit on 8bits, if not
5752 * generate a CharRef.
5753 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005754 if (value <= 0xFF) {
5755 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005756 out[1] = 0;
5757 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5758 (!ctxt->disableSAX))
5759 ctxt->sax->characters(ctxt->userData, out, 1);
5760 } else {
5761 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005762 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005763 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005764 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005765 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5766 (!ctxt->disableSAX))
5767 ctxt->sax->reference(ctxt->userData, out);
5768 }
5769 } else {
5770 /*
5771 * Just encode the value in UTF-8
5772 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005773 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005774 out[i] = 0;
5775 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5776 (!ctxt->disableSAX))
5777 ctxt->sax->characters(ctxt->userData, out, i);
5778 }
5779 } else {
5780 ent = xmlParseEntityRef(ctxt);
5781 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005782 if (!ctxt->wellFormed)
5783 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005784 if ((ent->name != NULL) &&
5785 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5786 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005787 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005788
5789
5790 /*
5791 * The first reference to the entity trigger a parsing phase
5792 * where the ent->children is filled with the result from
5793 * the parsing.
5794 */
5795 if (ent->children == NULL) {
5796 xmlChar *value;
5797 value = ent->content;
5798
5799 /*
5800 * Check that this entity is well formed
5801 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005802 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005803 (value[1] == 0) && (value[0] == '<') &&
5804 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5805 /*
5806 * DONE: get definite answer on this !!!
5807 * Lots of entity decls are used to declare a single
5808 * char
5809 * <!ENTITY lt "<">
5810 * Which seems to be valid since
5811 * 2.4: The ampersand character (&) and the left angle
5812 * bracket (<) may appear in their literal form only
5813 * when used ... They are also legal within the literal
5814 * entity value of an internal entity declaration;i
5815 * see "4.3.2 Well-Formed Parsed Entities".
5816 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5817 * Looking at the OASIS test suite and James Clark
5818 * tests, this is broken. However the XML REC uses
5819 * it. Is the XML REC not well-formed ????
5820 * This is a hack to avoid this problem
5821 *
5822 * ANSWER: since lt gt amp .. are already defined,
5823 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005824 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005825 * is lousy but acceptable.
5826 */
5827 list = xmlNewDocText(ctxt->myDoc, value);
5828 if (list != NULL) {
5829 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5830 (ent->children == NULL)) {
5831 ent->children = list;
5832 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005833 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005834 list->parent = (xmlNodePtr) ent;
5835 } else {
5836 xmlFreeNodeList(list);
5837 }
5838 } else if (list != NULL) {
5839 xmlFreeNodeList(list);
5840 }
5841 } else {
5842 /*
5843 * 4.3.2: An internal general parsed entity is well-formed
5844 * if its replacement text matches the production labeled
5845 * content.
5846 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005847
5848 void *user_data;
5849 /*
5850 * This is a bit hackish but this seems the best
5851 * way to make sure both SAX and DOM entity support
5852 * behaves okay.
5853 */
5854 if (ctxt->userData == ctxt)
5855 user_data = NULL;
5856 else
5857 user_data = ctxt->userData;
5858
Owen Taylor3473f882001-02-23 17:55:21 +00005859 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5860 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005861 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5862 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005863 ctxt->depth--;
5864 } else if (ent->etype ==
5865 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5866 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005867 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005868 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005869 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005870 ctxt->depth--;
5871 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005872 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005873 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5874 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005875 }
5876 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005877 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005878 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005879 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005880 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5881 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005882 (ent->children == NULL)) {
5883 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005884 if (ctxt->replaceEntities) {
5885 /*
5886 * Prune it directly in the generated document
5887 * except for single text nodes.
5888 */
5889 if ((list->type == XML_TEXT_NODE) &&
5890 (list->next == NULL)) {
5891 list->parent = (xmlNodePtr) ent;
5892 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005893 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005894 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005895 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005896 while (list != NULL) {
5897 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005898 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005899 if (list->next == NULL)
5900 ent->last = list;
5901 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005902 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005903 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005904#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005905 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5906 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005907#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005908 }
5909 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005910 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005911 while (list != NULL) {
5912 list->parent = (xmlNodePtr) ent;
5913 if (list->next == NULL)
5914 ent->last = list;
5915 list = list->next;
5916 }
Owen Taylor3473f882001-02-23 17:55:21 +00005917 }
5918 } else {
5919 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005920 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005921 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005922 } else if ((ret != XML_ERR_OK) &&
5923 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005924 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005925 } else if (list != NULL) {
5926 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005927 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005928 }
5929 }
5930 }
5931 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5932 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5933 /*
5934 * Create a node.
5935 */
5936 ctxt->sax->reference(ctxt->userData, ent->name);
5937 return;
5938 } else if (ctxt->replaceEntities) {
5939 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5940 /*
5941 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005942 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005943 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005944 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005945 if ((list == NULL) && (ent->owner == 0)) {
5946 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005947 cur = ent->children;
5948 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005949 nw = xmlCopyNode(cur, 1);
5950 if (nw != NULL) {
5951 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005952 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005953 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005954 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005955 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005956 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005957 if (cur == ent->last)
5958 break;
5959 cur = cur->next;
5960 }
Daniel Veillard81273902003-09-30 00:43:48 +00005961#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005962 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005963 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005964#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005965 } else if (list == NULL) {
5966 xmlNodePtr nw = NULL, cur, next, last,
5967 firstChild = NULL;
5968 /*
5969 * Copy the entity child list and make it the new
5970 * entity child list. The goal is to make sure any
5971 * ID or REF referenced will be the one from the
5972 * document content and not the entity copy.
5973 */
5974 cur = ent->children;
5975 ent->children = NULL;
5976 last = ent->last;
5977 ent->last = NULL;
5978 while (cur != NULL) {
5979 next = cur->next;
5980 cur->next = NULL;
5981 cur->parent = NULL;
5982 nw = xmlCopyNode(cur, 1);
5983 if (nw != NULL) {
5984 nw->_private = cur->_private;
5985 if (firstChild == NULL){
5986 firstChild = cur;
5987 }
5988 xmlAddChild((xmlNodePtr) ent, nw);
5989 xmlAddChild(ctxt->node, cur);
5990 }
5991 if (cur == last)
5992 break;
5993 cur = next;
5994 }
5995 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005996#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005997 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5998 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005999#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006000 } else {
6001 /*
6002 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006003 * node with a possible previous text one which
6004 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006005 */
6006 if (ent->children->type == XML_TEXT_NODE)
6007 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
6008 if ((ent->last != ent->children) &&
6009 (ent->last->type == XML_TEXT_NODE))
6010 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
6011 xmlAddChildList(ctxt->node, ent->children);
6012 }
6013
Owen Taylor3473f882001-02-23 17:55:21 +00006014 /*
6015 * This is to avoid a nasty side effect, see
6016 * characters() in SAX.c
6017 */
6018 ctxt->nodemem = 0;
6019 ctxt->nodelen = 0;
6020 return;
6021 } else {
6022 /*
6023 * Probably running in SAX mode
6024 */
6025 xmlParserInputPtr input;
6026
6027 input = xmlNewEntityInputStream(ctxt, ent);
6028 xmlPushInput(ctxt, input);
6029 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006030 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6031 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006032 xmlParseTextDecl(ctxt);
6033 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6034 /*
6035 * The XML REC instructs us to stop parsing right here
6036 */
6037 ctxt->instate = XML_PARSER_EOF;
6038 return;
6039 }
6040 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006041 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6042 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006043 }
6044 }
6045 return;
6046 }
6047 }
6048 } else {
6049 val = ent->content;
6050 if (val == NULL) return;
6051 /*
6052 * inline the entity.
6053 */
6054 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6055 (!ctxt->disableSAX))
6056 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6057 }
6058 }
6059}
6060
6061/**
6062 * xmlParseEntityRef:
6063 * @ctxt: an XML parser context
6064 *
6065 * parse ENTITY references declarations
6066 *
6067 * [68] EntityRef ::= '&' Name ';'
6068 *
6069 * [ WFC: Entity Declared ]
6070 * In a document without any DTD, a document with only an internal DTD
6071 * subset which contains no parameter entity references, or a document
6072 * with "standalone='yes'", the Name given in the entity reference
6073 * must match that in an entity declaration, except that well-formed
6074 * documents need not declare any of the following entities: amp, lt,
6075 * gt, apos, quot. The declaration of a parameter entity must precede
6076 * any reference to it. Similarly, the declaration of a general entity
6077 * must precede any reference to it which appears in a default value in an
6078 * attribute-list declaration. Note that if entities are declared in the
6079 * external subset or in external parameter entities, a non-validating
6080 * processor is not obligated to read and process their declarations;
6081 * for such documents, the rule that an entity must be declared is a
6082 * well-formedness constraint only if standalone='yes'.
6083 *
6084 * [ WFC: Parsed Entity ]
6085 * An entity reference must not contain the name of an unparsed entity
6086 *
6087 * Returns the xmlEntityPtr if found, or NULL otherwise.
6088 */
6089xmlEntityPtr
6090xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006091 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006092 xmlEntityPtr ent = NULL;
6093
6094 GROW;
6095
6096 if (RAW == '&') {
6097 NEXT;
6098 name = xmlParseName(ctxt);
6099 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006100 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6101 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006102 } else {
6103 if (RAW == ';') {
6104 NEXT;
6105 /*
6106 * Ask first SAX for entity resolution, otherwise try the
6107 * predefined set.
6108 */
6109 if (ctxt->sax != NULL) {
6110 if (ctxt->sax->getEntity != NULL)
6111 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006112 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006113 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006114 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6115 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006116 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006117 }
Owen Taylor3473f882001-02-23 17:55:21 +00006118 }
6119 /*
6120 * [ WFC: Entity Declared ]
6121 * In a document without any DTD, a document with only an
6122 * internal DTD subset which contains no parameter entity
6123 * references, or a document with "standalone='yes'", the
6124 * Name given in the entity reference must match that in an
6125 * entity declaration, except that well-formed documents
6126 * need not declare any of the following entities: amp, lt,
6127 * gt, apos, quot.
6128 * The declaration of a parameter entity must precede any
6129 * reference to it.
6130 * Similarly, the declaration of a general entity must
6131 * precede any reference to it which appears in a default
6132 * value in an attribute-list declaration. Note that if
6133 * entities are declared in the external subset or in
6134 * external parameter entities, a non-validating processor
6135 * is not obligated to read and process their declarations;
6136 * for such documents, the rule that an entity must be
6137 * declared is a well-formedness constraint only if
6138 * standalone='yes'.
6139 */
6140 if (ent == NULL) {
6141 if ((ctxt->standalone == 1) ||
6142 ((ctxt->hasExternalSubset == 0) &&
6143 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006144 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006145 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006146 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006147 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006148 "Entity '%s' not defined\n", name);
6149 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006150 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006151 }
6152
6153 /*
6154 * [ WFC: Parsed Entity ]
6155 * An entity reference must not contain the name of an
6156 * unparsed entity
6157 */
6158 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006159 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006160 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006161 }
6162
6163 /*
6164 * [ WFC: No External Entity References ]
6165 * Attribute values cannot contain direct or indirect
6166 * entity references to external entities.
6167 */
6168 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6169 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006170 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6171 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 }
6173 /*
6174 * [ WFC: No < in Attribute Values ]
6175 * The replacement text of any entity referred to directly or
6176 * indirectly in an attribute value (other than "&lt;") must
6177 * not contain a <.
6178 */
6179 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6180 (ent != NULL) &&
6181 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6182 (ent->content != NULL) &&
6183 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006184 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006185 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006186 }
6187
6188 /*
6189 * Internal check, no parameter entities here ...
6190 */
6191 else {
6192 switch (ent->etype) {
6193 case XML_INTERNAL_PARAMETER_ENTITY:
6194 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006195 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6196 "Attempt to reference the parameter entity '%s'\n",
6197 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006198 break;
6199 default:
6200 break;
6201 }
6202 }
6203
6204 /*
6205 * [ WFC: No Recursion ]
6206 * A parsed entity must not contain a recursive reference
6207 * to itself, either directly or indirectly.
6208 * Done somewhere else
6209 */
6210
6211 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006212 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006213 }
Owen Taylor3473f882001-02-23 17:55:21 +00006214 }
6215 }
6216 return(ent);
6217}
6218
6219/**
6220 * xmlParseStringEntityRef:
6221 * @ctxt: an XML parser context
6222 * @str: a pointer to an index in the string
6223 *
6224 * parse ENTITY references declarations, but this version parses it from
6225 * a string value.
6226 *
6227 * [68] EntityRef ::= '&' Name ';'
6228 *
6229 * [ WFC: Entity Declared ]
6230 * In a document without any DTD, a document with only an internal DTD
6231 * subset which contains no parameter entity references, or a document
6232 * with "standalone='yes'", the Name given in the entity reference
6233 * must match that in an entity declaration, except that well-formed
6234 * documents need not declare any of the following entities: amp, lt,
6235 * gt, apos, quot. The declaration of a parameter entity must precede
6236 * any reference to it. Similarly, the declaration of a general entity
6237 * must precede any reference to it which appears in a default value in an
6238 * attribute-list declaration. Note that if entities are declared in the
6239 * external subset or in external parameter entities, a non-validating
6240 * processor is not obligated to read and process their declarations;
6241 * for such documents, the rule that an entity must be declared is a
6242 * well-formedness constraint only if standalone='yes'.
6243 *
6244 * [ WFC: Parsed Entity ]
6245 * An entity reference must not contain the name of an unparsed entity
6246 *
6247 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6248 * is updated to the current location in the string.
6249 */
6250xmlEntityPtr
6251xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6252 xmlChar *name;
6253 const xmlChar *ptr;
6254 xmlChar cur;
6255 xmlEntityPtr ent = NULL;
6256
6257 if ((str == NULL) || (*str == NULL))
6258 return(NULL);
6259 ptr = *str;
6260 cur = *ptr;
6261 if (cur == '&') {
6262 ptr++;
6263 cur = *ptr;
6264 name = xmlParseStringName(ctxt, &ptr);
6265 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006266 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6267 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006268 } else {
6269 if (*ptr == ';') {
6270 ptr++;
6271 /*
6272 * Ask first SAX for entity resolution, otherwise try the
6273 * predefined set.
6274 */
6275 if (ctxt->sax != NULL) {
6276 if (ctxt->sax->getEntity != NULL)
6277 ent = ctxt->sax->getEntity(ctxt->userData, name);
6278 if (ent == NULL)
6279 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006280 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006281 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006282 }
Owen Taylor3473f882001-02-23 17:55:21 +00006283 }
6284 /*
6285 * [ WFC: Entity Declared ]
6286 * In a document without any DTD, a document with only an
6287 * internal DTD subset which contains no parameter entity
6288 * references, or a document with "standalone='yes'", the
6289 * Name given in the entity reference must match that in an
6290 * entity declaration, except that well-formed documents
6291 * need not declare any of the following entities: amp, lt,
6292 * gt, apos, quot.
6293 * The declaration of a parameter entity must precede any
6294 * reference to it.
6295 * Similarly, the declaration of a general entity must
6296 * precede any reference to it which appears in a default
6297 * value in an attribute-list declaration. Note that if
6298 * entities are declared in the external subset or in
6299 * external parameter entities, a non-validating processor
6300 * is not obligated to read and process their declarations;
6301 * for such documents, the rule that an entity must be
6302 * declared is a well-formedness constraint only if
6303 * standalone='yes'.
6304 */
6305 if (ent == NULL) {
6306 if ((ctxt->standalone == 1) ||
6307 ((ctxt->hasExternalSubset == 0) &&
6308 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006309 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006310 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006311 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006312 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006313 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006314 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006315 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006316 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006317 }
6318
6319 /*
6320 * [ WFC: Parsed Entity ]
6321 * An entity reference must not contain the name of an
6322 * unparsed entity
6323 */
6324 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006325 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006326 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006327 }
6328
6329 /*
6330 * [ WFC: No External Entity References ]
6331 * Attribute values cannot contain direct or indirect
6332 * entity references to external entities.
6333 */
6334 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6335 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006336 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006337 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006338 }
6339 /*
6340 * [ WFC: No < in Attribute Values ]
6341 * The replacement text of any entity referred to directly or
6342 * indirectly in an attribute value (other than "&lt;") must
6343 * not contain a <.
6344 */
6345 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6346 (ent != NULL) &&
6347 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6348 (ent->content != NULL) &&
6349 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006350 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6351 "'<' in entity '%s' is not allowed in attributes values\n",
6352 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006353 }
6354
6355 /*
6356 * Internal check, no parameter entities here ...
6357 */
6358 else {
6359 switch (ent->etype) {
6360 case XML_INTERNAL_PARAMETER_ENTITY:
6361 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006362 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6363 "Attempt to reference the parameter entity '%s'\n",
6364 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006365 break;
6366 default:
6367 break;
6368 }
6369 }
6370
6371 /*
6372 * [ WFC: No Recursion ]
6373 * A parsed entity must not contain a recursive reference
6374 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006375 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006376 */
6377
6378 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006379 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006380 }
6381 xmlFree(name);
6382 }
6383 }
6384 *str = ptr;
6385 return(ent);
6386}
6387
6388/**
6389 * xmlParsePEReference:
6390 * @ctxt: an XML parser context
6391 *
6392 * parse PEReference declarations
6393 * The entity content is handled directly by pushing it's content as
6394 * a new input stream.
6395 *
6396 * [69] PEReference ::= '%' Name ';'
6397 *
6398 * [ WFC: No Recursion ]
6399 * A parsed entity must not contain a recursive
6400 * reference to itself, either directly or indirectly.
6401 *
6402 * [ WFC: Entity Declared ]
6403 * In a document without any DTD, a document with only an internal DTD
6404 * subset which contains no parameter entity references, or a document
6405 * with "standalone='yes'", ... ... The declaration of a parameter
6406 * entity must precede any reference to it...
6407 *
6408 * [ VC: Entity Declared ]
6409 * In a document with an external subset or external parameter entities
6410 * with "standalone='no'", ... ... The declaration of a parameter entity
6411 * must precede any reference to it...
6412 *
6413 * [ WFC: In DTD ]
6414 * Parameter-entity references may only appear in the DTD.
6415 * NOTE: misleading but this is handled.
6416 */
6417void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006418xmlParsePEReference(xmlParserCtxtPtr ctxt)
6419{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006420 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006421 xmlEntityPtr entity = NULL;
6422 xmlParserInputPtr input;
6423
6424 if (RAW == '%') {
6425 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006426 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006427 if (name == NULL) {
6428 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6429 "xmlParsePEReference: no name\n");
6430 } else {
6431 if (RAW == ';') {
6432 NEXT;
6433 if ((ctxt->sax != NULL) &&
6434 (ctxt->sax->getParameterEntity != NULL))
6435 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6436 name);
6437 if (entity == NULL) {
6438 /*
6439 * [ WFC: Entity Declared ]
6440 * In a document without any DTD, a document with only an
6441 * internal DTD subset which contains no parameter entity
6442 * references, or a document with "standalone='yes'", ...
6443 * ... The declaration of a parameter entity must precede
6444 * any reference to it...
6445 */
6446 if ((ctxt->standalone == 1) ||
6447 ((ctxt->hasExternalSubset == 0) &&
6448 (ctxt->hasPErefs == 0))) {
6449 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6450 "PEReference: %%%s; not found\n",
6451 name);
6452 } else {
6453 /*
6454 * [ VC: Entity Declared ]
6455 * In a document with an external subset or external
6456 * parameter entities with "standalone='no'", ...
6457 * ... The declaration of a parameter entity must
6458 * precede any reference to it...
6459 */
6460 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6461 "PEReference: %%%s; not found\n",
6462 name, NULL);
6463 ctxt->valid = 0;
6464 }
6465 } else {
6466 /*
6467 * Internal checking in case the entity quest barfed
6468 */
6469 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6470 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6471 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6472 "Internal: %%%s; is not a parameter entity\n",
6473 name, NULL);
6474 } else if (ctxt->input->free != deallocblankswrapper) {
6475 input =
6476 xmlNewBlanksWrapperInputStream(ctxt, entity);
6477 xmlPushInput(ctxt, input);
6478 } else {
6479 /*
6480 * TODO !!!
6481 * handle the extra spaces added before and after
6482 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6483 */
6484 input = xmlNewEntityInputStream(ctxt, entity);
6485 xmlPushInput(ctxt, input);
6486 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006487 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006488 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006489 xmlParseTextDecl(ctxt);
6490 if (ctxt->errNo ==
6491 XML_ERR_UNSUPPORTED_ENCODING) {
6492 /*
6493 * The XML REC instructs us to stop parsing
6494 * right here
6495 */
6496 ctxt->instate = XML_PARSER_EOF;
6497 return;
6498 }
6499 }
6500 }
6501 }
6502 ctxt->hasPErefs = 1;
6503 } else {
6504 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6505 }
6506 }
Owen Taylor3473f882001-02-23 17:55:21 +00006507 }
6508}
6509
6510/**
6511 * xmlParseStringPEReference:
6512 * @ctxt: an XML parser context
6513 * @str: a pointer to an index in the string
6514 *
6515 * parse PEReference declarations
6516 *
6517 * [69] PEReference ::= '%' Name ';'
6518 *
6519 * [ WFC: No Recursion ]
6520 * A parsed entity must not contain a recursive
6521 * reference to itself, either directly or indirectly.
6522 *
6523 * [ WFC: Entity Declared ]
6524 * In a document without any DTD, a document with only an internal DTD
6525 * subset which contains no parameter entity references, or a document
6526 * with "standalone='yes'", ... ... The declaration of a parameter
6527 * entity must precede any reference to it...
6528 *
6529 * [ VC: Entity Declared ]
6530 * In a document with an external subset or external parameter entities
6531 * with "standalone='no'", ... ... The declaration of a parameter entity
6532 * must precede any reference to it...
6533 *
6534 * [ WFC: In DTD ]
6535 * Parameter-entity references may only appear in the DTD.
6536 * NOTE: misleading but this is handled.
6537 *
6538 * Returns the string of the entity content.
6539 * str is updated to the current value of the index
6540 */
6541xmlEntityPtr
6542xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6543 const xmlChar *ptr;
6544 xmlChar cur;
6545 xmlChar *name;
6546 xmlEntityPtr entity = NULL;
6547
6548 if ((str == NULL) || (*str == NULL)) return(NULL);
6549 ptr = *str;
6550 cur = *ptr;
6551 if (cur == '%') {
6552 ptr++;
6553 cur = *ptr;
6554 name = xmlParseStringName(ctxt, &ptr);
6555 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006556 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6557 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006558 } else {
6559 cur = *ptr;
6560 if (cur == ';') {
6561 ptr++;
6562 cur = *ptr;
6563 if ((ctxt->sax != NULL) &&
6564 (ctxt->sax->getParameterEntity != NULL))
6565 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6566 name);
6567 if (entity == NULL) {
6568 /*
6569 * [ WFC: Entity Declared ]
6570 * In a document without any DTD, a document with only an
6571 * internal DTD subset which contains no parameter entity
6572 * references, or a document with "standalone='yes'", ...
6573 * ... The declaration of a parameter entity must precede
6574 * any reference to it...
6575 */
6576 if ((ctxt->standalone == 1) ||
6577 ((ctxt->hasExternalSubset == 0) &&
6578 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006579 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006580 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006581 } else {
6582 /*
6583 * [ VC: Entity Declared ]
6584 * In a document with an external subset or external
6585 * parameter entities with "standalone='no'", ...
6586 * ... The declaration of a parameter entity must
6587 * precede any reference to it...
6588 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006589 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6590 "PEReference: %%%s; not found\n",
6591 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006592 ctxt->valid = 0;
6593 }
6594 } else {
6595 /*
6596 * Internal checking in case the entity quest barfed
6597 */
6598 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6599 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006600 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6601 "%%%s; is not a parameter entity\n",
6602 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006603 }
6604 }
6605 ctxt->hasPErefs = 1;
6606 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006607 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006608 }
6609 xmlFree(name);
6610 }
6611 }
6612 *str = ptr;
6613 return(entity);
6614}
6615
6616/**
6617 * xmlParseDocTypeDecl:
6618 * @ctxt: an XML parser context
6619 *
6620 * parse a DOCTYPE declaration
6621 *
6622 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6623 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6624 *
6625 * [ VC: Root Element Type ]
6626 * The Name in the document type declaration must match the element
6627 * type of the root element.
6628 */
6629
6630void
6631xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006632 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006633 xmlChar *ExternalID = NULL;
6634 xmlChar *URI = NULL;
6635
6636 /*
6637 * We know that '<!DOCTYPE' has been detected.
6638 */
6639 SKIP(9);
6640
6641 SKIP_BLANKS;
6642
6643 /*
6644 * Parse the DOCTYPE name.
6645 */
6646 name = xmlParseName(ctxt);
6647 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006648 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6649 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006650 }
6651 ctxt->intSubName = name;
6652
6653 SKIP_BLANKS;
6654
6655 /*
6656 * Check for SystemID and ExternalID
6657 */
6658 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6659
6660 if ((URI != NULL) || (ExternalID != NULL)) {
6661 ctxt->hasExternalSubset = 1;
6662 }
6663 ctxt->extSubURI = URI;
6664 ctxt->extSubSystem = ExternalID;
6665
6666 SKIP_BLANKS;
6667
6668 /*
6669 * Create and update the internal subset.
6670 */
6671 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6672 (!ctxt->disableSAX))
6673 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6674
6675 /*
6676 * Is there any internal subset declarations ?
6677 * they are handled separately in xmlParseInternalSubset()
6678 */
6679 if (RAW == '[')
6680 return;
6681
6682 /*
6683 * We should be at the end of the DOCTYPE declaration.
6684 */
6685 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006686 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006687 }
6688 NEXT;
6689}
6690
6691/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006692 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006693 * @ctxt: an XML parser context
6694 *
6695 * parse the internal subset declaration
6696 *
6697 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6698 */
6699
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006700static void
Owen Taylor3473f882001-02-23 17:55:21 +00006701xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6702 /*
6703 * Is there any DTD definition ?
6704 */
6705 if (RAW == '[') {
6706 ctxt->instate = XML_PARSER_DTD;
6707 NEXT;
6708 /*
6709 * Parse the succession of Markup declarations and
6710 * PEReferences.
6711 * Subsequence (markupdecl | PEReference | S)*
6712 */
6713 while (RAW != ']') {
6714 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006715 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006716
6717 SKIP_BLANKS;
6718 xmlParseMarkupDecl(ctxt);
6719 xmlParsePEReference(ctxt);
6720
6721 /*
6722 * Pop-up of finished entities.
6723 */
6724 while ((RAW == 0) && (ctxt->inputNr > 1))
6725 xmlPopInput(ctxt);
6726
6727 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006728 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006729 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006730 break;
6731 }
6732 }
6733 if (RAW == ']') {
6734 NEXT;
6735 SKIP_BLANKS;
6736 }
6737 }
6738
6739 /*
6740 * We should be at the end of the DOCTYPE declaration.
6741 */
6742 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006743 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006744 }
6745 NEXT;
6746}
6747
Daniel Veillard81273902003-09-30 00:43:48 +00006748#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006749/**
6750 * xmlParseAttribute:
6751 * @ctxt: an XML parser context
6752 * @value: a xmlChar ** used to store the value of the attribute
6753 *
6754 * parse an attribute
6755 *
6756 * [41] Attribute ::= Name Eq AttValue
6757 *
6758 * [ WFC: No External Entity References ]
6759 * Attribute values cannot contain direct or indirect entity references
6760 * to external entities.
6761 *
6762 * [ WFC: No < in Attribute Values ]
6763 * The replacement text of any entity referred to directly or indirectly in
6764 * an attribute value (other than "&lt;") must not contain a <.
6765 *
6766 * [ VC: Attribute Value Type ]
6767 * The attribute must have been declared; the value must be of the type
6768 * declared for it.
6769 *
6770 * [25] Eq ::= S? '=' S?
6771 *
6772 * With namespace:
6773 *
6774 * [NS 11] Attribute ::= QName Eq AttValue
6775 *
6776 * Also the case QName == xmlns:??? is handled independently as a namespace
6777 * definition.
6778 *
6779 * Returns the attribute name, and the value in *value.
6780 */
6781
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006782const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006783xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006784 const xmlChar *name;
6785 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006786
6787 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006788 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006789 name = xmlParseName(ctxt);
6790 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006791 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006792 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006793 return(NULL);
6794 }
6795
6796 /*
6797 * read the value
6798 */
6799 SKIP_BLANKS;
6800 if (RAW == '=') {
6801 NEXT;
6802 SKIP_BLANKS;
6803 val = xmlParseAttValue(ctxt);
6804 ctxt->instate = XML_PARSER_CONTENT;
6805 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006806 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006807 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006808 return(NULL);
6809 }
6810
6811 /*
6812 * Check that xml:lang conforms to the specification
6813 * No more registered as an error, just generate a warning now
6814 * since this was deprecated in XML second edition
6815 */
6816 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6817 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006818 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6819 "Malformed value for xml:lang : %s\n",
6820 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006821 }
6822 }
6823
6824 /*
6825 * Check that xml:space conforms to the specification
6826 */
6827 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6828 if (xmlStrEqual(val, BAD_CAST "default"))
6829 *(ctxt->space) = 0;
6830 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6831 *(ctxt->space) = 1;
6832 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006833 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006834"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006835 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006836 }
6837 }
6838
6839 *value = val;
6840 return(name);
6841}
6842
6843/**
6844 * xmlParseStartTag:
6845 * @ctxt: an XML parser context
6846 *
6847 * parse a start of tag either for rule element or
6848 * EmptyElement. In both case we don't parse the tag closing chars.
6849 *
6850 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6851 *
6852 * [ WFC: Unique Att Spec ]
6853 * No attribute name may appear more than once in the same start-tag or
6854 * empty-element tag.
6855 *
6856 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6857 *
6858 * [ WFC: Unique Att Spec ]
6859 * No attribute name may appear more than once in the same start-tag or
6860 * empty-element tag.
6861 *
6862 * With namespace:
6863 *
6864 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6865 *
6866 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6867 *
6868 * Returns the element name parsed
6869 */
6870
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006871const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006872xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006873 const xmlChar *name;
6874 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006875 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006876 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006877 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006878 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006879 int i;
6880
6881 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006882 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006883
6884 name = xmlParseName(ctxt);
6885 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006886 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006887 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006888 return(NULL);
6889 }
6890
6891 /*
6892 * Now parse the attributes, it ends up with the ending
6893 *
6894 * (S Attribute)* S?
6895 */
6896 SKIP_BLANKS;
6897 GROW;
6898
Daniel Veillard21a0f912001-02-25 19:54:14 +00006899 while ((RAW != '>') &&
6900 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006901 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006902 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006903 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006904
6905 attname = xmlParseAttribute(ctxt, &attvalue);
6906 if ((attname != NULL) && (attvalue != NULL)) {
6907 /*
6908 * [ WFC: Unique Att Spec ]
6909 * No attribute name may appear more than once in the same
6910 * start-tag or empty-element tag.
6911 */
6912 for (i = 0; i < nbatts;i += 2) {
6913 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006914 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006915 xmlFree(attvalue);
6916 goto failed;
6917 }
6918 }
Owen Taylor3473f882001-02-23 17:55:21 +00006919 /*
6920 * Add the pair to atts
6921 */
6922 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006923 maxatts = 22; /* allow for 10 attrs by default */
6924 atts = (const xmlChar **)
6925 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006926 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006927 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006928 if (attvalue != NULL)
6929 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006930 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006931 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006932 ctxt->atts = atts;
6933 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006934 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006935 const xmlChar **n;
6936
Owen Taylor3473f882001-02-23 17:55:21 +00006937 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006938 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006939 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006940 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006941 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006942 if (attvalue != NULL)
6943 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006944 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006945 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006946 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006947 ctxt->atts = atts;
6948 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006949 }
6950 atts[nbatts++] = attname;
6951 atts[nbatts++] = attvalue;
6952 atts[nbatts] = NULL;
6953 atts[nbatts + 1] = NULL;
6954 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006955 if (attvalue != NULL)
6956 xmlFree(attvalue);
6957 }
6958
6959failed:
6960
Daniel Veillard3772de32002-12-17 10:31:45 +00006961 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006962 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6963 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006964 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006965 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6966 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006967 }
6968 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006969 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6970 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006971 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6972 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006973 break;
6974 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006975 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006976 GROW;
6977 }
6978
6979 /*
6980 * SAX: Start of Element !
6981 */
6982 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006983 (!ctxt->disableSAX)) {
6984 if (nbatts > 0)
6985 ctxt->sax->startElement(ctxt->userData, name, atts);
6986 else
6987 ctxt->sax->startElement(ctxt->userData, name, NULL);
6988 }
Owen Taylor3473f882001-02-23 17:55:21 +00006989
6990 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006991 /* Free only the content strings */
6992 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006993 if (atts[i] != NULL)
6994 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006995 }
6996 return(name);
6997}
6998
6999/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007000 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007001 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007002 * @line: line of the start tag
7003 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007004 *
7005 * parse an end of tag
7006 *
7007 * [42] ETag ::= '</' Name S? '>'
7008 *
7009 * With namespace
7010 *
7011 * [NS 9] ETag ::= '</' QName S? '>'
7012 */
7013
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007014static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007015xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007016 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007017
7018 GROW;
7019 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007020 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007021 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007022 return;
7023 }
7024 SKIP(2);
7025
Daniel Veillard46de64e2002-05-29 08:21:33 +00007026 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007027
7028 /*
7029 * We should definitely be at the ending "S? '>'" part
7030 */
7031 GROW;
7032 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007033 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007034 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007035 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007036 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007037
7038 /*
7039 * [ WFC: Element Type Match ]
7040 * The Name in an element's end-tag must match the element type in the
7041 * start-tag.
7042 *
7043 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007044 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007045 if (name == NULL) name = BAD_CAST "unparseable";
7046 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007047 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007048 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007049 }
7050
7051 /*
7052 * SAX: End of Tag
7053 */
7054 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7055 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007056 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007057
Daniel Veillarde57ec792003-09-10 10:50:59 +00007058 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007059 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007060 return;
7061}
7062
7063/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007064 * xmlParseEndTag:
7065 * @ctxt: an XML parser context
7066 *
7067 * parse an end of tag
7068 *
7069 * [42] ETag ::= '</' Name S? '>'
7070 *
7071 * With namespace
7072 *
7073 * [NS 9] ETag ::= '</' QName S? '>'
7074 */
7075
7076void
7077xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007078 xmlParseEndTag1(ctxt, 0);
7079}
Daniel Veillard81273902003-09-30 00:43:48 +00007080#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007081
7082/************************************************************************
7083 * *
7084 * SAX 2 specific operations *
7085 * *
7086 ************************************************************************/
7087
7088static const xmlChar *
7089xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7090 int len = 0, l;
7091 int c;
7092 int count = 0;
7093
7094 /*
7095 * Handler for more complex cases
7096 */
7097 GROW;
7098 c = CUR_CHAR(l);
7099 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007100 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007101 return(NULL);
7102 }
7103
7104 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007105 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007106 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007107 (IS_COMBINING(c)) ||
7108 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007109 if (count++ > 100) {
7110 count = 0;
7111 GROW;
7112 }
7113 len += l;
7114 NEXTL(l);
7115 c = CUR_CHAR(l);
7116 }
7117 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7118}
7119
7120/*
7121 * xmlGetNamespace:
7122 * @ctxt: an XML parser context
7123 * @prefix: the prefix to lookup
7124 *
7125 * Lookup the namespace name for the @prefix (which ca be NULL)
7126 * The prefix must come from the @ctxt->dict dictionnary
7127 *
7128 * Returns the namespace name or NULL if not bound
7129 */
7130static const xmlChar *
7131xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7132 int i;
7133
Daniel Veillarde57ec792003-09-10 10:50:59 +00007134 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007135 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007136 if (ctxt->nsTab[i] == prefix) {
7137 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7138 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007139 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007140 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007141 return(NULL);
7142}
7143
7144/**
7145 * xmlParseNCName:
7146 * @ctxt: an XML parser context
7147 *
7148 * parse an XML name.
7149 *
7150 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7151 * CombiningChar | Extender
7152 *
7153 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7154 *
7155 * Returns the Name parsed or NULL
7156 */
7157
7158static const xmlChar *
7159xmlParseNCName(xmlParserCtxtPtr ctxt) {
7160 const xmlChar *in;
7161 const xmlChar *ret;
7162 int count = 0;
7163
7164 /*
7165 * Accelerator for simple ASCII names
7166 */
7167 in = ctxt->input->cur;
7168 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7169 ((*in >= 0x41) && (*in <= 0x5A)) ||
7170 (*in == '_')) {
7171 in++;
7172 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7173 ((*in >= 0x41) && (*in <= 0x5A)) ||
7174 ((*in >= 0x30) && (*in <= 0x39)) ||
7175 (*in == '_') || (*in == '-') ||
7176 (*in == '.'))
7177 in++;
7178 if ((*in > 0) && (*in < 0x80)) {
7179 count = in - ctxt->input->cur;
7180 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7181 ctxt->input->cur = in;
7182 ctxt->nbChars += count;
7183 ctxt->input->col += count;
7184 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007185 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007186 }
7187 return(ret);
7188 }
7189 }
7190 return(xmlParseNCNameComplex(ctxt));
7191}
7192
7193/**
7194 * xmlParseQName:
7195 * @ctxt: an XML parser context
7196 * @prefix: pointer to store the prefix part
7197 *
7198 * parse an XML Namespace QName
7199 *
7200 * [6] QName ::= (Prefix ':')? LocalPart
7201 * [7] Prefix ::= NCName
7202 * [8] LocalPart ::= NCName
7203 *
7204 * Returns the Name parsed or NULL
7205 */
7206
7207static const xmlChar *
7208xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7209 const xmlChar *l, *p;
7210
7211 GROW;
7212
7213 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007214 if (l == NULL) {
7215 if (CUR == ':') {
7216 l = xmlParseName(ctxt);
7217 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007218 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7219 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007220 *prefix = NULL;
7221 return(l);
7222 }
7223 }
7224 return(NULL);
7225 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007226 if (CUR == ':') {
7227 NEXT;
7228 p = l;
7229 l = xmlParseNCName(ctxt);
7230 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007231 xmlChar *tmp;
7232
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007233 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7234 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007235 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7236 p = xmlDictLookup(ctxt->dict, tmp, -1);
7237 if (tmp != NULL) xmlFree(tmp);
7238 *prefix = NULL;
7239 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007240 }
7241 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007242 xmlChar *tmp;
7243
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007244 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7245 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007246 NEXT;
7247 tmp = (xmlChar *) xmlParseName(ctxt);
7248 if (tmp != NULL) {
7249 tmp = xmlBuildQName(tmp, l, NULL, 0);
7250 l = xmlDictLookup(ctxt->dict, tmp, -1);
7251 if (tmp != NULL) xmlFree(tmp);
7252 *prefix = p;
7253 return(l);
7254 }
7255 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7256 l = xmlDictLookup(ctxt->dict, tmp, -1);
7257 if (tmp != NULL) xmlFree(tmp);
7258 *prefix = p;
7259 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007260 }
7261 *prefix = p;
7262 } else
7263 *prefix = NULL;
7264 return(l);
7265}
7266
7267/**
7268 * xmlParseQNameAndCompare:
7269 * @ctxt: an XML parser context
7270 * @name: the localname
7271 * @prefix: the prefix, if any.
7272 *
7273 * parse an XML name and compares for match
7274 * (specialized for endtag parsing)
7275 *
7276 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7277 * and the name for mismatch
7278 */
7279
7280static const xmlChar *
7281xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7282 xmlChar const *prefix) {
7283 const xmlChar *cmp = name;
7284 const xmlChar *in;
7285 const xmlChar *ret;
7286 const xmlChar *prefix2;
7287
7288 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7289
7290 GROW;
7291 in = ctxt->input->cur;
7292
7293 cmp = prefix;
7294 while (*in != 0 && *in == *cmp) {
7295 ++in;
7296 ++cmp;
7297 }
7298 if ((*cmp == 0) && (*in == ':')) {
7299 in++;
7300 cmp = name;
7301 while (*in != 0 && *in == *cmp) {
7302 ++in;
7303 ++cmp;
7304 }
William M. Brack76e95df2003-10-18 16:20:14 +00007305 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007306 /* success */
7307 ctxt->input->cur = in;
7308 return((const xmlChar*) 1);
7309 }
7310 }
7311 /*
7312 * all strings coms from the dictionary, equality can be done directly
7313 */
7314 ret = xmlParseQName (ctxt, &prefix2);
7315 if ((ret == name) && (prefix == prefix2))
7316 return((const xmlChar*) 1);
7317 return ret;
7318}
7319
7320/**
7321 * xmlParseAttValueInternal:
7322 * @ctxt: an XML parser context
7323 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007324 * @alloc: whether the attribute was reallocated as a new string
7325 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007326 *
7327 * parse a value for an attribute.
7328 * NOTE: if no normalization is needed, the routine will return pointers
7329 * directly from the data buffer.
7330 *
7331 * 3.3.3 Attribute-Value Normalization:
7332 * Before the value of an attribute is passed to the application or
7333 * checked for validity, the XML processor must normalize it as follows:
7334 * - a character reference is processed by appending the referenced
7335 * character to the attribute value
7336 * - an entity reference is processed by recursively processing the
7337 * replacement text of the entity
7338 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7339 * appending #x20 to the normalized value, except that only a single
7340 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7341 * parsed entity or the literal entity value of an internal parsed entity
7342 * - other characters are processed by appending them to the normalized value
7343 * If the declared value is not CDATA, then the XML processor must further
7344 * process the normalized attribute value by discarding any leading and
7345 * trailing space (#x20) characters, and by replacing sequences of space
7346 * (#x20) characters by a single space (#x20) character.
7347 * All attributes for which no declaration has been read should be treated
7348 * by a non-validating parser as if declared CDATA.
7349 *
7350 * Returns the AttValue parsed or NULL. The value has to be freed by the
7351 * caller if it was copied, this can be detected by val[*len] == 0.
7352 */
7353
7354static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007355xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7356 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007357{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007358 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007359 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007360 xmlChar *ret = NULL;
7361
7362 GROW;
7363 in = (xmlChar *) CUR_PTR;
7364 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007365 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007366 return (NULL);
7367 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007368 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007369
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007370 /*
7371 * try to handle in this routine the most common case where no
7372 * allocation of a new string is required and where content is
7373 * pure ASCII.
7374 */
7375 limit = *in++;
7376 end = ctxt->input->end;
7377 start = in;
7378 if (in >= end) {
7379 const xmlChar *oldbase = ctxt->input->base;
7380 GROW;
7381 if (oldbase != ctxt->input->base) {
7382 long delta = ctxt->input->base - oldbase;
7383 start = start + delta;
7384 in = in + delta;
7385 }
7386 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007387 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007388 if (normalize) {
7389 /*
7390 * Skip any leading spaces
7391 */
7392 while ((in < end) && (*in != limit) &&
7393 ((*in == 0x20) || (*in == 0x9) ||
7394 (*in == 0xA) || (*in == 0xD))) {
7395 in++;
7396 start = in;
7397 if (in >= end) {
7398 const xmlChar *oldbase = ctxt->input->base;
7399 GROW;
7400 if (oldbase != ctxt->input->base) {
7401 long delta = ctxt->input->base - oldbase;
7402 start = start + delta;
7403 in = in + delta;
7404 }
7405 end = ctxt->input->end;
7406 }
7407 }
7408 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7409 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7410 if ((*in++ == 0x20) && (*in == 0x20)) break;
7411 if (in >= end) {
7412 const xmlChar *oldbase = ctxt->input->base;
7413 GROW;
7414 if (oldbase != ctxt->input->base) {
7415 long delta = ctxt->input->base - oldbase;
7416 start = start + delta;
7417 in = in + delta;
7418 }
7419 end = ctxt->input->end;
7420 }
7421 }
7422 last = in;
7423 /*
7424 * skip the trailing blanks
7425 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007426 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007427 while ((in < end) && (*in != limit) &&
7428 ((*in == 0x20) || (*in == 0x9) ||
7429 (*in == 0xA) || (*in == 0xD))) {
7430 in++;
7431 if (in >= end) {
7432 const xmlChar *oldbase = ctxt->input->base;
7433 GROW;
7434 if (oldbase != ctxt->input->base) {
7435 long delta = ctxt->input->base - oldbase;
7436 start = start + delta;
7437 in = in + delta;
7438 last = last + delta;
7439 }
7440 end = ctxt->input->end;
7441 }
7442 }
7443 if (*in != limit) goto need_complex;
7444 } else {
7445 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7446 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7447 in++;
7448 if (in >= end) {
7449 const xmlChar *oldbase = ctxt->input->base;
7450 GROW;
7451 if (oldbase != ctxt->input->base) {
7452 long delta = ctxt->input->base - oldbase;
7453 start = start + delta;
7454 in = in + delta;
7455 }
7456 end = ctxt->input->end;
7457 }
7458 }
7459 last = in;
7460 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007461 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007462 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007463 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007464 *len = last - start;
7465 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007466 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007467 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007468 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007469 }
7470 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007471 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007472 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007473need_complex:
7474 if (alloc) *alloc = 1;
7475 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007476}
7477
7478/**
7479 * xmlParseAttribute2:
7480 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007481 * @pref: the element prefix
7482 * @elem: the element name
7483 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007484 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007485 * @len: an int * to save the length of the attribute
7486 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007487 *
7488 * parse an attribute in the new SAX2 framework.
7489 *
7490 * Returns the attribute name, and the value in *value, .
7491 */
7492
7493static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007494xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7495 const xmlChar *pref, const xmlChar *elem,
7496 const xmlChar **prefix, xmlChar **value,
7497 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007498 const xmlChar *name;
7499 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007500 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007501
7502 *value = NULL;
7503 GROW;
7504 name = xmlParseQName(ctxt, prefix);
7505 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007506 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7507 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007508 return(NULL);
7509 }
7510
7511 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007512 * get the type if needed
7513 */
7514 if (ctxt->attsSpecial != NULL) {
7515 int type;
7516
7517 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7518 pref, elem, *prefix, name);
7519 if (type != 0) normalize = 1;
7520 }
7521
7522 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007523 * read the value
7524 */
7525 SKIP_BLANKS;
7526 if (RAW == '=') {
7527 NEXT;
7528 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007529 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007530 ctxt->instate = XML_PARSER_CONTENT;
7531 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007532 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007533 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007534 return(NULL);
7535 }
7536
7537 /*
7538 * Check that xml:lang conforms to the specification
7539 * No more registered as an error, just generate a warning now
7540 * since this was deprecated in XML second edition
7541 */
7542 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7543 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007544 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7545 "Malformed value for xml:lang : %s\n",
7546 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007547 }
7548 }
7549
7550 /*
7551 * Check that xml:space conforms to the specification
7552 */
7553 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7554 if (xmlStrEqual(val, BAD_CAST "default"))
7555 *(ctxt->space) = 0;
7556 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7557 *(ctxt->space) = 1;
7558 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007559 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007560"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7561 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007562 }
7563 }
7564
7565 *value = val;
7566 return(name);
7567}
7568
7569/**
7570 * xmlParseStartTag2:
7571 * @ctxt: an XML parser context
7572 *
7573 * parse a start of tag either for rule element or
7574 * EmptyElement. In both case we don't parse the tag closing chars.
7575 * This routine is called when running SAX2 parsing
7576 *
7577 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7578 *
7579 * [ WFC: Unique Att Spec ]
7580 * No attribute name may appear more than once in the same start-tag or
7581 * empty-element tag.
7582 *
7583 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7584 *
7585 * [ WFC: Unique Att Spec ]
7586 * No attribute name may appear more than once in the same start-tag or
7587 * empty-element tag.
7588 *
7589 * With namespace:
7590 *
7591 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7592 *
7593 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7594 *
7595 * Returns the element name parsed
7596 */
7597
7598static const xmlChar *
7599xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7600 const xmlChar **URI) {
7601 const xmlChar *localname;
7602 const xmlChar *prefix;
7603 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007604 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007605 const xmlChar *nsname;
7606 xmlChar *attvalue;
7607 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007608 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007609 int nratts, nbatts, nbdef;
7610 int i, j, nbNs, attval;
7611 const xmlChar *base;
7612 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007613
7614 if (RAW != '<') return(NULL);
7615 NEXT1;
7616
7617 /*
7618 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7619 * point since the attribute values may be stored as pointers to
7620 * the buffer and calling SHRINK would destroy them !
7621 * The Shrinking is only possible once the full set of attribute
7622 * callbacks have been done.
7623 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007624reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007625 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007626 base = ctxt->input->base;
7627 cur = ctxt->input->cur - ctxt->input->base;
7628 nbatts = 0;
7629 nratts = 0;
7630 nbdef = 0;
7631 nbNs = 0;
7632 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007633
7634 localname = xmlParseQName(ctxt, &prefix);
7635 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007636 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7637 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007638 return(NULL);
7639 }
7640
7641 /*
7642 * Now parse the attributes, it ends up with the ending
7643 *
7644 * (S Attribute)* S?
7645 */
7646 SKIP_BLANKS;
7647 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007648 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007649
7650 while ((RAW != '>') &&
7651 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007652 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007653 const xmlChar *q = CUR_PTR;
7654 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007655 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007656
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007657 attname = xmlParseAttribute2(ctxt, prefix, localname,
7658 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007659 if ((attname != NULL) && (attvalue != NULL)) {
7660 if (len < 0) len = xmlStrlen(attvalue);
7661 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007662 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7663 xmlURIPtr uri;
7664
7665 if (*URL != 0) {
7666 uri = xmlParseURI((const char *) URL);
7667 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007668 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7669 "xmlns: %s not a valid URI\n",
7670 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007671 } else {
7672 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007673 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7674 "xmlns: URI %s is not absolute\n",
7675 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007676 }
7677 xmlFreeURI(uri);
7678 }
7679 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007680 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007681 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007682 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007683 for (j = 1;j <= nbNs;j++)
7684 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7685 break;
7686 if (j <= nbNs)
7687 xmlErrAttributeDup(ctxt, NULL, attname);
7688 else
7689 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007690 if (alloc != 0) xmlFree(attvalue);
7691 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007692 continue;
7693 }
7694 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007695 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7696 xmlURIPtr uri;
7697
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007698 if (attname == ctxt->str_xml) {
7699 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007700 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7701 "xml namespace prefix mapped to wrong URI\n",
7702 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007703 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007704 /*
7705 * Do not keep a namespace definition node
7706 */
7707 if (alloc != 0) xmlFree(attvalue);
7708 SKIP_BLANKS;
7709 continue;
7710 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007711 uri = xmlParseURI((const char *) URL);
7712 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007713 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7714 "xmlns:%s: '%s' is not a valid URI\n",
7715 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007716 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007717 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007718 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7719 "xmlns:%s: URI %s is not absolute\n",
7720 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007721 }
7722 xmlFreeURI(uri);
7723 }
7724
Daniel Veillard0fb18932003-09-07 09:14:37 +00007725 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007726 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007727 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007728 for (j = 1;j <= nbNs;j++)
7729 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7730 break;
7731 if (j <= nbNs)
7732 xmlErrAttributeDup(ctxt, aprefix, attname);
7733 else
7734 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007735 if (alloc != 0) xmlFree(attvalue);
7736 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737 continue;
7738 }
7739
7740 /*
7741 * Add the pair to atts
7742 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007743 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7744 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007745 if (attvalue[len] == 0)
7746 xmlFree(attvalue);
7747 goto failed;
7748 }
7749 maxatts = ctxt->maxatts;
7750 atts = ctxt->atts;
7751 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007752 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007753 atts[nbatts++] = attname;
7754 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007755 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007756 atts[nbatts++] = attvalue;
7757 attvalue += len;
7758 atts[nbatts++] = attvalue;
7759 /*
7760 * tag if some deallocation is needed
7761 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007762 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007763 } else {
7764 if ((attvalue != NULL) && (attvalue[len] == 0))
7765 xmlFree(attvalue);
7766 }
7767
7768failed:
7769
7770 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007771 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007772 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7773 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007774 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007775 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7776 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007777 }
7778 SKIP_BLANKS;
7779 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7780 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007781 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007783 break;
7784 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007785 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007786 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007787 }
7788
Daniel Veillard0fb18932003-09-07 09:14:37 +00007789 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007790 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007791 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007792 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007793 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7794 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007795 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007796 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007797 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007798 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007799 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007800 /*
7801 * [ WFC: Unique Att Spec ]
7802 * No attribute name may appear more than once in the same
7803 * start-tag or empty-element tag.
7804 * As extended by the Namespace in XML REC.
7805 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007806 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007807 if (atts[i] == atts[j]) {
7808 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007809 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007810 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007811 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007812 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007813 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007814 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007815 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007816 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007817 }
7818 }
7819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007820 }
7821
7822 /*
7823 * The attributes defaulting
7824 */
7825 if (ctxt->attsDefault != NULL) {
7826 xmlDefAttrsPtr defaults;
7827
7828 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7829 if (defaults != NULL) {
7830 for (i = 0;i < defaults->nbAttrs;i++) {
7831 attname = defaults->values[4 * i];
7832 aprefix = defaults->values[4 * i + 1];
7833
7834 /*
7835 * special work for namespaces defaulted defs
7836 */
7837 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7838 /*
7839 * check that it's not a defined namespace
7840 */
7841 for (j = 1;j <= nbNs;j++)
7842 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7843 break;
7844 if (j <= nbNs) continue;
7845
7846 nsname = xmlGetNamespace(ctxt, NULL);
7847 if (nsname != defaults->values[4 * i + 2]) {
7848 if (nsPush(ctxt, NULL,
7849 defaults->values[4 * i + 2]) > 0)
7850 nbNs++;
7851 }
7852 } else if (aprefix == ctxt->str_xmlns) {
7853 /*
7854 * check that it's not a defined namespace
7855 */
7856 for (j = 1;j <= nbNs;j++)
7857 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7858 break;
7859 if (j <= nbNs) continue;
7860
7861 nsname = xmlGetNamespace(ctxt, attname);
7862 if (nsname != defaults->values[2]) {
7863 if (nsPush(ctxt, attname,
7864 defaults->values[4 * i + 2]) > 0)
7865 nbNs++;
7866 }
7867 } else {
7868 /*
7869 * check that it's not a defined attribute
7870 */
7871 for (j = 0;j < nbatts;j+=5) {
7872 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7873 break;
7874 }
7875 if (j < nbatts) continue;
7876
7877 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7878 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007879 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007880 }
7881 maxatts = ctxt->maxatts;
7882 atts = ctxt->atts;
7883 }
7884 atts[nbatts++] = attname;
7885 atts[nbatts++] = aprefix;
7886 if (aprefix == NULL)
7887 atts[nbatts++] = NULL;
7888 else
7889 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7890 atts[nbatts++] = defaults->values[4 * i + 2];
7891 atts[nbatts++] = defaults->values[4 * i + 3];
7892 nbdef++;
7893 }
7894 }
7895 }
7896 }
7897
7898 nsname = xmlGetNamespace(ctxt, prefix);
7899 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007900 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7901 "Namespace prefix %s on %s is not defined\n",
7902 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007903 }
7904 *pref = prefix;
7905 *URI = nsname;
7906
7907 /*
7908 * SAX: Start of Element !
7909 */
7910 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7911 (!ctxt->disableSAX)) {
7912 if (nbNs > 0)
7913 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7914 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7915 nbatts / 5, nbdef, atts);
7916 else
7917 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7918 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7919 }
7920
7921 /*
7922 * Free up attribute allocated strings if needed
7923 */
7924 if (attval != 0) {
7925 for (i = 3,j = 0; j < nratts;i += 5,j++)
7926 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7927 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007928 }
7929
7930 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007931
7932base_changed:
7933 /*
7934 * the attribute strings are valid iif the base didn't changed
7935 */
7936 if (attval != 0) {
7937 for (i = 3,j = 0; j < nratts;i += 5,j++)
7938 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7939 xmlFree((xmlChar *) atts[i]);
7940 }
7941 ctxt->input->cur = ctxt->input->base + cur;
7942 if (ctxt->wellFormed == 1) {
7943 goto reparse;
7944 }
7945 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007946}
7947
7948/**
7949 * xmlParseEndTag2:
7950 * @ctxt: an XML parser context
7951 * @line: line of the start tag
7952 * @nsNr: number of namespaces on the start tag
7953 *
7954 * parse an end of tag
7955 *
7956 * [42] ETag ::= '</' Name S? '>'
7957 *
7958 * With namespace
7959 *
7960 * [NS 9] ETag ::= '</' QName S? '>'
7961 */
7962
7963static void
7964xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7965 const xmlChar *URI, int line, int nsNr) {
7966 const xmlChar *name;
7967
7968 GROW;
7969 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007970 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007971 return;
7972 }
7973 SKIP(2);
7974
7975 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7976
7977 /*
7978 * We should definitely be at the ending "S? '>'" part
7979 */
7980 GROW;
7981 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007982 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007983 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007984 } else
7985 NEXT1;
7986
7987 /*
7988 * [ WFC: Element Type Match ]
7989 * The Name in an element's end-tag must match the element type in the
7990 * start-tag.
7991 *
7992 */
7993 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007994 if (name == NULL) name = BAD_CAST "unparseable";
7995 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007996 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007997 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007998 }
7999
8000 /*
8001 * SAX: End of Tag
8002 */
8003 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8004 (!ctxt->disableSAX))
8005 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8006
Daniel Veillard0fb18932003-09-07 09:14:37 +00008007 spacePop(ctxt);
8008 if (nsNr != 0)
8009 nsPop(ctxt, nsNr);
8010 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008011}
8012
8013/**
Owen Taylor3473f882001-02-23 17:55:21 +00008014 * xmlParseCDSect:
8015 * @ctxt: an XML parser context
8016 *
8017 * Parse escaped pure raw content.
8018 *
8019 * [18] CDSect ::= CDStart CData CDEnd
8020 *
8021 * [19] CDStart ::= '<![CDATA['
8022 *
8023 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8024 *
8025 * [21] CDEnd ::= ']]>'
8026 */
8027void
8028xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8029 xmlChar *buf = NULL;
8030 int len = 0;
8031 int size = XML_PARSER_BUFFER_SIZE;
8032 int r, rl;
8033 int s, sl;
8034 int cur, l;
8035 int count = 0;
8036
Daniel Veillard8f597c32003-10-06 08:19:27 +00008037 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008038 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008039 SKIP(9);
8040 } else
8041 return;
8042
8043 ctxt->instate = XML_PARSER_CDATA_SECTION;
8044 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008045 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008046 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008047 ctxt->instate = XML_PARSER_CONTENT;
8048 return;
8049 }
8050 NEXTL(rl);
8051 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008052 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008053 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008054 ctxt->instate = XML_PARSER_CONTENT;
8055 return;
8056 }
8057 NEXTL(sl);
8058 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008059 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008060 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008061 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008062 return;
8063 }
William M. Brack871611b2003-10-18 04:53:14 +00008064 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008065 ((r != ']') || (s != ']') || (cur != '>'))) {
8066 if (len + 5 >= size) {
8067 size *= 2;
8068 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8069 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008070 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008071 return;
8072 }
8073 }
8074 COPY_BUF(rl,buf,len,r);
8075 r = s;
8076 rl = sl;
8077 s = cur;
8078 sl = l;
8079 count++;
8080 if (count > 50) {
8081 GROW;
8082 count = 0;
8083 }
8084 NEXTL(l);
8085 cur = CUR_CHAR(l);
8086 }
8087 buf[len] = 0;
8088 ctxt->instate = XML_PARSER_CONTENT;
8089 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008090 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008091 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008092 xmlFree(buf);
8093 return;
8094 }
8095 NEXTL(l);
8096
8097 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008098 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008099 */
8100 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8101 if (ctxt->sax->cdataBlock != NULL)
8102 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008103 else if (ctxt->sax->characters != NULL)
8104 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008105 }
8106 xmlFree(buf);
8107}
8108
8109/**
8110 * xmlParseContent:
8111 * @ctxt: an XML parser context
8112 *
8113 * Parse a content:
8114 *
8115 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8116 */
8117
8118void
8119xmlParseContent(xmlParserCtxtPtr ctxt) {
8120 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008121 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008122 ((RAW != '<') || (NXT(1) != '/'))) {
8123 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008124 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008125 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008126
8127 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008128 * First case : a Processing Instruction.
8129 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008130 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008131 xmlParsePI(ctxt);
8132 }
8133
8134 /*
8135 * Second case : a CDSection
8136 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008137 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008138 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008139 xmlParseCDSect(ctxt);
8140 }
8141
8142 /*
8143 * Third case : a comment
8144 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008145 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008146 (NXT(2) == '-') && (NXT(3) == '-')) {
8147 xmlParseComment(ctxt);
8148 ctxt->instate = XML_PARSER_CONTENT;
8149 }
8150
8151 /*
8152 * Fourth case : a sub-element.
8153 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008154 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008155 xmlParseElement(ctxt);
8156 }
8157
8158 /*
8159 * Fifth case : a reference. If if has not been resolved,
8160 * parsing returns it's Name, create the node
8161 */
8162
Daniel Veillard21a0f912001-02-25 19:54:14 +00008163 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008164 xmlParseReference(ctxt);
8165 }
8166
8167 /*
8168 * Last case, text. Note that References are handled directly.
8169 */
8170 else {
8171 xmlParseCharData(ctxt, 0);
8172 }
8173
8174 GROW;
8175 /*
8176 * Pop-up of finished entities.
8177 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008178 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008179 xmlPopInput(ctxt);
8180 SHRINK;
8181
Daniel Veillardfdc91562002-07-01 21:52:03 +00008182 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008183 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8184 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008185 ctxt->instate = XML_PARSER_EOF;
8186 break;
8187 }
8188 }
8189}
8190
8191/**
8192 * xmlParseElement:
8193 * @ctxt: an XML parser context
8194 *
8195 * parse an XML element, this is highly recursive
8196 *
8197 * [39] element ::= EmptyElemTag | STag content ETag
8198 *
8199 * [ WFC: Element Type Match ]
8200 * The Name in an element's end-tag must match the element type in the
8201 * start-tag.
8202 *
Owen Taylor3473f882001-02-23 17:55:21 +00008203 */
8204
8205void
8206xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008207 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008208 const xmlChar *prefix;
8209 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008210 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008211 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008212 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008213 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008214
8215 /* Capture start position */
8216 if (ctxt->record_info) {
8217 node_info.begin_pos = ctxt->input->consumed +
8218 (CUR_PTR - ctxt->input->base);
8219 node_info.begin_line = ctxt->input->line;
8220 }
8221
8222 if (ctxt->spaceNr == 0)
8223 spacePush(ctxt, -1);
8224 else
8225 spacePush(ctxt, *ctxt->space);
8226
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008227 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008228#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008229 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008230#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008231 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008232#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008233 else
8234 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008235#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008236 if (name == NULL) {
8237 spacePop(ctxt);
8238 return;
8239 }
8240 namePush(ctxt, name);
8241 ret = ctxt->node;
8242
Daniel Veillard4432df22003-09-28 18:58:27 +00008243#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008244 /*
8245 * [ VC: Root Element Type ]
8246 * The Name in the document type declaration must match the element
8247 * type of the root element.
8248 */
8249 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8250 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8251 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008252#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008253
8254 /*
8255 * Check for an Empty Element.
8256 */
8257 if ((RAW == '/') && (NXT(1) == '>')) {
8258 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008259 if (ctxt->sax2) {
8260 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8261 (!ctxt->disableSAX))
8262 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008263#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008264 } else {
8265 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8266 (!ctxt->disableSAX))
8267 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008268#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008269 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008270 namePop(ctxt);
8271 spacePop(ctxt);
8272 if (nsNr != ctxt->nsNr)
8273 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008274 if ( ret != NULL && ctxt->record_info ) {
8275 node_info.end_pos = ctxt->input->consumed +
8276 (CUR_PTR - ctxt->input->base);
8277 node_info.end_line = ctxt->input->line;
8278 node_info.node = ret;
8279 xmlParserAddNodeInfo(ctxt, &node_info);
8280 }
8281 return;
8282 }
8283 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008284 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008285 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008286 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8287 "Couldn't find end of Start Tag %s line %d\n",
8288 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008289
8290 /*
8291 * end of parsing of this node.
8292 */
8293 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008294 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008295 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008296 if (nsNr != ctxt->nsNr)
8297 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008298
8299 /*
8300 * Capture end position and add node
8301 */
8302 if ( ret != NULL && ctxt->record_info ) {
8303 node_info.end_pos = ctxt->input->consumed +
8304 (CUR_PTR - ctxt->input->base);
8305 node_info.end_line = ctxt->input->line;
8306 node_info.node = ret;
8307 xmlParserAddNodeInfo(ctxt, &node_info);
8308 }
8309 return;
8310 }
8311
8312 /*
8313 * Parse the content of the element:
8314 */
8315 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008316 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008317 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008318 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008319 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008320
8321 /*
8322 * end of parsing of this node.
8323 */
8324 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008325 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008326 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008327 if (nsNr != ctxt->nsNr)
8328 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008329 return;
8330 }
8331
8332 /*
8333 * parse the end of tag: '</' should be here.
8334 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008335 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008336 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008337 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008338 }
8339#ifdef LIBXML_SAX1_ENABLED
8340 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008341 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008342#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008343
8344 /*
8345 * Capture end position and add node
8346 */
8347 if ( ret != NULL && ctxt->record_info ) {
8348 node_info.end_pos = ctxt->input->consumed +
8349 (CUR_PTR - ctxt->input->base);
8350 node_info.end_line = ctxt->input->line;
8351 node_info.node = ret;
8352 xmlParserAddNodeInfo(ctxt, &node_info);
8353 }
8354}
8355
8356/**
8357 * xmlParseVersionNum:
8358 * @ctxt: an XML parser context
8359 *
8360 * parse the XML version value.
8361 *
8362 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8363 *
8364 * Returns the string giving the XML version number, or NULL
8365 */
8366xmlChar *
8367xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8368 xmlChar *buf = NULL;
8369 int len = 0;
8370 int size = 10;
8371 xmlChar cur;
8372
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008373 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008374 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008375 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008376 return(NULL);
8377 }
8378 cur = CUR;
8379 while (((cur >= 'a') && (cur <= 'z')) ||
8380 ((cur >= 'A') && (cur <= 'Z')) ||
8381 ((cur >= '0') && (cur <= '9')) ||
8382 (cur == '_') || (cur == '.') ||
8383 (cur == ':') || (cur == '-')) {
8384 if (len + 1 >= size) {
8385 size *= 2;
8386 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8387 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008388 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008389 return(NULL);
8390 }
8391 }
8392 buf[len++] = cur;
8393 NEXT;
8394 cur=CUR;
8395 }
8396 buf[len] = 0;
8397 return(buf);
8398}
8399
8400/**
8401 * xmlParseVersionInfo:
8402 * @ctxt: an XML parser context
8403 *
8404 * parse the XML version.
8405 *
8406 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8407 *
8408 * [25] Eq ::= S? '=' S?
8409 *
8410 * Returns the version string, e.g. "1.0"
8411 */
8412
8413xmlChar *
8414xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8415 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008416
Daniel Veillarda07050d2003-10-19 14:46:32 +00008417 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008418 SKIP(7);
8419 SKIP_BLANKS;
8420 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008421 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008422 return(NULL);
8423 }
8424 NEXT;
8425 SKIP_BLANKS;
8426 if (RAW == '"') {
8427 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008428 version = xmlParseVersionNum(ctxt);
8429 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008430 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008431 } else
8432 NEXT;
8433 } else if (RAW == '\''){
8434 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008435 version = xmlParseVersionNum(ctxt);
8436 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008437 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008438 } else
8439 NEXT;
8440 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008441 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008442 }
8443 }
8444 return(version);
8445}
8446
8447/**
8448 * xmlParseEncName:
8449 * @ctxt: an XML parser context
8450 *
8451 * parse the XML encoding name
8452 *
8453 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8454 *
8455 * Returns the encoding name value or NULL
8456 */
8457xmlChar *
8458xmlParseEncName(xmlParserCtxtPtr ctxt) {
8459 xmlChar *buf = NULL;
8460 int len = 0;
8461 int size = 10;
8462 xmlChar cur;
8463
8464 cur = CUR;
8465 if (((cur >= 'a') && (cur <= 'z')) ||
8466 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008467 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008468 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008469 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008470 return(NULL);
8471 }
8472
8473 buf[len++] = cur;
8474 NEXT;
8475 cur = CUR;
8476 while (((cur >= 'a') && (cur <= 'z')) ||
8477 ((cur >= 'A') && (cur <= 'Z')) ||
8478 ((cur >= '0') && (cur <= '9')) ||
8479 (cur == '.') || (cur == '_') ||
8480 (cur == '-')) {
8481 if (len + 1 >= size) {
8482 size *= 2;
8483 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8484 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008485 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008486 return(NULL);
8487 }
8488 }
8489 buf[len++] = cur;
8490 NEXT;
8491 cur = CUR;
8492 if (cur == 0) {
8493 SHRINK;
8494 GROW;
8495 cur = CUR;
8496 }
8497 }
8498 buf[len] = 0;
8499 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008500 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008501 }
8502 return(buf);
8503}
8504
8505/**
8506 * xmlParseEncodingDecl:
8507 * @ctxt: an XML parser context
8508 *
8509 * parse the XML encoding declaration
8510 *
8511 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8512 *
8513 * this setups the conversion filters.
8514 *
8515 * Returns the encoding value or NULL
8516 */
8517
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008518const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008519xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8520 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008521
8522 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008523 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008524 SKIP(8);
8525 SKIP_BLANKS;
8526 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008527 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008528 return(NULL);
8529 }
8530 NEXT;
8531 SKIP_BLANKS;
8532 if (RAW == '"') {
8533 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008534 encoding = xmlParseEncName(ctxt);
8535 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008536 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008537 } else
8538 NEXT;
8539 } else if (RAW == '\''){
8540 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008541 encoding = xmlParseEncName(ctxt);
8542 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008543 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008544 } else
8545 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008546 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008547 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008548 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008549 /*
8550 * UTF-16 encoding stwich has already taken place at this stage,
8551 * more over the little-endian/big-endian selection is already done
8552 */
8553 if ((encoding != NULL) &&
8554 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8555 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008556 if (ctxt->encoding != NULL)
8557 xmlFree((xmlChar *) ctxt->encoding);
8558 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008559 }
8560 /*
8561 * UTF-8 encoding is handled natively
8562 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008563 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008564 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8565 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008566 if (ctxt->encoding != NULL)
8567 xmlFree((xmlChar *) ctxt->encoding);
8568 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008569 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008570 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008571 xmlCharEncodingHandlerPtr handler;
8572
8573 if (ctxt->input->encoding != NULL)
8574 xmlFree((xmlChar *) ctxt->input->encoding);
8575 ctxt->input->encoding = encoding;
8576
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008577 handler = xmlFindCharEncodingHandler((const char *) encoding);
8578 if (handler != NULL) {
8579 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008580 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008581 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008582 "Unsupported encoding %s\n", encoding);
8583 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008584 }
8585 }
8586 }
8587 return(encoding);
8588}
8589
8590/**
8591 * xmlParseSDDecl:
8592 * @ctxt: an XML parser context
8593 *
8594 * parse the XML standalone declaration
8595 *
8596 * [32] SDDecl ::= S 'standalone' Eq
8597 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8598 *
8599 * [ VC: Standalone Document Declaration ]
8600 * TODO The standalone document declaration must have the value "no"
8601 * if any external markup declarations contain declarations of:
8602 * - attributes with default values, if elements to which these
8603 * attributes apply appear in the document without specifications
8604 * of values for these attributes, or
8605 * - entities (other than amp, lt, gt, apos, quot), if references
8606 * to those entities appear in the document, or
8607 * - attributes with values subject to normalization, where the
8608 * attribute appears in the document with a value which will change
8609 * as a result of normalization, or
8610 * - element types with element content, if white space occurs directly
8611 * within any instance of those types.
8612 *
8613 * Returns 1 if standalone, 0 otherwise
8614 */
8615
8616int
8617xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8618 int standalone = -1;
8619
8620 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008621 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008622 SKIP(10);
8623 SKIP_BLANKS;
8624 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008625 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008626 return(standalone);
8627 }
8628 NEXT;
8629 SKIP_BLANKS;
8630 if (RAW == '\''){
8631 NEXT;
8632 if ((RAW == 'n') && (NXT(1) == 'o')) {
8633 standalone = 0;
8634 SKIP(2);
8635 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8636 (NXT(2) == 's')) {
8637 standalone = 1;
8638 SKIP(3);
8639 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008640 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008641 }
8642 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008643 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008644 } else
8645 NEXT;
8646 } else if (RAW == '"'){
8647 NEXT;
8648 if ((RAW == 'n') && (NXT(1) == 'o')) {
8649 standalone = 0;
8650 SKIP(2);
8651 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8652 (NXT(2) == 's')) {
8653 standalone = 1;
8654 SKIP(3);
8655 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008656 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008657 }
8658 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008659 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008660 } else
8661 NEXT;
8662 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008663 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008664 }
8665 }
8666 return(standalone);
8667}
8668
8669/**
8670 * xmlParseXMLDecl:
8671 * @ctxt: an XML parser context
8672 *
8673 * parse an XML declaration header
8674 *
8675 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8676 */
8677
8678void
8679xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8680 xmlChar *version;
8681
8682 /*
8683 * We know that '<?xml' is here.
8684 */
8685 SKIP(5);
8686
William M. Brack76e95df2003-10-18 16:20:14 +00008687 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008688 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8689 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008690 }
8691 SKIP_BLANKS;
8692
8693 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008694 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008695 */
8696 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008697 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008698 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008699 } else {
8700 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8701 /*
8702 * TODO: Blueberry should be detected here
8703 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008704 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8705 "Unsupported version '%s'\n",
8706 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008707 }
8708 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008709 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008710 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008711 }
Owen Taylor3473f882001-02-23 17:55:21 +00008712
8713 /*
8714 * We may have the encoding declaration
8715 */
William M. Brack76e95df2003-10-18 16:20:14 +00008716 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008717 if ((RAW == '?') && (NXT(1) == '>')) {
8718 SKIP(2);
8719 return;
8720 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008721 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008722 }
8723 xmlParseEncodingDecl(ctxt);
8724 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8725 /*
8726 * The XML REC instructs us to stop parsing right here
8727 */
8728 return;
8729 }
8730
8731 /*
8732 * We may have the standalone status.
8733 */
William M. Brack76e95df2003-10-18 16:20:14 +00008734 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008735 if ((RAW == '?') && (NXT(1) == '>')) {
8736 SKIP(2);
8737 return;
8738 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008739 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008740 }
8741 SKIP_BLANKS;
8742 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8743
8744 SKIP_BLANKS;
8745 if ((RAW == '?') && (NXT(1) == '>')) {
8746 SKIP(2);
8747 } else if (RAW == '>') {
8748 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008749 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008750 NEXT;
8751 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008752 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008753 MOVETO_ENDTAG(CUR_PTR);
8754 NEXT;
8755 }
8756}
8757
8758/**
8759 * xmlParseMisc:
8760 * @ctxt: an XML parser context
8761 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008762 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008763 *
8764 * [27] Misc ::= Comment | PI | S
8765 */
8766
8767void
8768xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008769 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008770 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008771 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008772 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008773 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008774 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008775 NEXT;
8776 } else
8777 xmlParseComment(ctxt);
8778 }
8779}
8780
8781/**
8782 * xmlParseDocument:
8783 * @ctxt: an XML parser context
8784 *
8785 * parse an XML document (and build a tree if using the standard SAX
8786 * interface).
8787 *
8788 * [1] document ::= prolog element Misc*
8789 *
8790 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8791 *
8792 * Returns 0, -1 in case of error. the parser context is augmented
8793 * as a result of the parsing.
8794 */
8795
8796int
8797xmlParseDocument(xmlParserCtxtPtr ctxt) {
8798 xmlChar start[4];
8799 xmlCharEncoding enc;
8800
8801 xmlInitParser();
8802
8803 GROW;
8804
8805 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008806 * SAX: detecting the level.
8807 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008808 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008809
8810 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008811 * SAX: beginning of the document processing.
8812 */
8813 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8814 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8815
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008816 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8817 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008818 /*
8819 * Get the 4 first bytes and decode the charset
8820 * if enc != XML_CHAR_ENCODING_NONE
8821 * plug some encoding conversion routines.
8822 */
8823 start[0] = RAW;
8824 start[1] = NXT(1);
8825 start[2] = NXT(2);
8826 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008827 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008828 if (enc != XML_CHAR_ENCODING_NONE) {
8829 xmlSwitchEncoding(ctxt, enc);
8830 }
Owen Taylor3473f882001-02-23 17:55:21 +00008831 }
8832
8833
8834 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008835 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008836 }
8837
8838 /*
8839 * Check for the XMLDecl in the Prolog.
8840 */
8841 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008842 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008843
8844 /*
8845 * Note that we will switch encoding on the fly.
8846 */
8847 xmlParseXMLDecl(ctxt);
8848 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8849 /*
8850 * The XML REC instructs us to stop parsing right here
8851 */
8852 return(-1);
8853 }
8854 ctxt->standalone = ctxt->input->standalone;
8855 SKIP_BLANKS;
8856 } else {
8857 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8858 }
8859 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8860 ctxt->sax->startDocument(ctxt->userData);
8861
8862 /*
8863 * The Misc part of the Prolog
8864 */
8865 GROW;
8866 xmlParseMisc(ctxt);
8867
8868 /*
8869 * Then possibly doc type declaration(s) and more Misc
8870 * (doctypedecl Misc*)?
8871 */
8872 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008873 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008874
8875 ctxt->inSubset = 1;
8876 xmlParseDocTypeDecl(ctxt);
8877 if (RAW == '[') {
8878 ctxt->instate = XML_PARSER_DTD;
8879 xmlParseInternalSubset(ctxt);
8880 }
8881
8882 /*
8883 * Create and update the external subset.
8884 */
8885 ctxt->inSubset = 2;
8886 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8887 (!ctxt->disableSAX))
8888 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8889 ctxt->extSubSystem, ctxt->extSubURI);
8890 ctxt->inSubset = 0;
8891
8892
8893 ctxt->instate = XML_PARSER_PROLOG;
8894 xmlParseMisc(ctxt);
8895 }
8896
8897 /*
8898 * Time to start parsing the tree itself
8899 */
8900 GROW;
8901 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008902 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8903 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008904 } else {
8905 ctxt->instate = XML_PARSER_CONTENT;
8906 xmlParseElement(ctxt);
8907 ctxt->instate = XML_PARSER_EPILOG;
8908
8909
8910 /*
8911 * The Misc part at the end
8912 */
8913 xmlParseMisc(ctxt);
8914
Daniel Veillard561b7f82002-03-20 21:55:57 +00008915 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008916 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008917 }
8918 ctxt->instate = XML_PARSER_EOF;
8919 }
8920
8921 /*
8922 * SAX: end of the document processing.
8923 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008924 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008925 ctxt->sax->endDocument(ctxt->userData);
8926
Daniel Veillard5997aca2002-03-18 18:36:20 +00008927 /*
8928 * Remove locally kept entity definitions if the tree was not built
8929 */
8930 if ((ctxt->myDoc != NULL) &&
8931 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8932 xmlFreeDoc(ctxt->myDoc);
8933 ctxt->myDoc = NULL;
8934 }
8935
Daniel Veillardc7612992002-02-17 22:47:37 +00008936 if (! ctxt->wellFormed) {
8937 ctxt->valid = 0;
8938 return(-1);
8939 }
Owen Taylor3473f882001-02-23 17:55:21 +00008940 return(0);
8941}
8942
8943/**
8944 * xmlParseExtParsedEnt:
8945 * @ctxt: an XML parser context
8946 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008947 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008948 * An external general parsed entity is well-formed if it matches the
8949 * production labeled extParsedEnt.
8950 *
8951 * [78] extParsedEnt ::= TextDecl? content
8952 *
8953 * Returns 0, -1 in case of error. the parser context is augmented
8954 * as a result of the parsing.
8955 */
8956
8957int
8958xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8959 xmlChar start[4];
8960 xmlCharEncoding enc;
8961
8962 xmlDefaultSAXHandlerInit();
8963
Daniel Veillard309f81d2003-09-23 09:02:53 +00008964 xmlDetectSAX2(ctxt);
8965
Owen Taylor3473f882001-02-23 17:55:21 +00008966 GROW;
8967
8968 /*
8969 * SAX: beginning of the document processing.
8970 */
8971 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8972 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8973
8974 /*
8975 * Get the 4 first bytes and decode the charset
8976 * if enc != XML_CHAR_ENCODING_NONE
8977 * plug some encoding conversion routines.
8978 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008979 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8980 start[0] = RAW;
8981 start[1] = NXT(1);
8982 start[2] = NXT(2);
8983 start[3] = NXT(3);
8984 enc = xmlDetectCharEncoding(start, 4);
8985 if (enc != XML_CHAR_ENCODING_NONE) {
8986 xmlSwitchEncoding(ctxt, enc);
8987 }
Owen Taylor3473f882001-02-23 17:55:21 +00008988 }
8989
8990
8991 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008992 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008993 }
8994
8995 /*
8996 * Check for the XMLDecl in the Prolog.
8997 */
8998 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008999 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009000
9001 /*
9002 * Note that we will switch encoding on the fly.
9003 */
9004 xmlParseXMLDecl(ctxt);
9005 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9006 /*
9007 * The XML REC instructs us to stop parsing right here
9008 */
9009 return(-1);
9010 }
9011 SKIP_BLANKS;
9012 } else {
9013 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9014 }
9015 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9016 ctxt->sax->startDocument(ctxt->userData);
9017
9018 /*
9019 * Doing validity checking on chunk doesn't make sense
9020 */
9021 ctxt->instate = XML_PARSER_CONTENT;
9022 ctxt->validate = 0;
9023 ctxt->loadsubset = 0;
9024 ctxt->depth = 0;
9025
9026 xmlParseContent(ctxt);
9027
9028 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009029 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009030 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009031 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009032 }
9033
9034 /*
9035 * SAX: end of the document processing.
9036 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009037 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009038 ctxt->sax->endDocument(ctxt->userData);
9039
9040 if (! ctxt->wellFormed) return(-1);
9041 return(0);
9042}
9043
Daniel Veillard73b013f2003-09-30 12:36:01 +00009044#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009045/************************************************************************
9046 * *
9047 * Progressive parsing interfaces *
9048 * *
9049 ************************************************************************/
9050
9051/**
9052 * xmlParseLookupSequence:
9053 * @ctxt: an XML parser context
9054 * @first: the first char to lookup
9055 * @next: the next char to lookup or zero
9056 * @third: the next char to lookup or zero
9057 *
9058 * Try to find if a sequence (first, next, third) or just (first next) or
9059 * (first) is available in the input stream.
9060 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9061 * to avoid rescanning sequences of bytes, it DOES change the state of the
9062 * parser, do not use liberally.
9063 *
9064 * Returns the index to the current parsing point if the full sequence
9065 * is available, -1 otherwise.
9066 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009067static int
Owen Taylor3473f882001-02-23 17:55:21 +00009068xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9069 xmlChar next, xmlChar third) {
9070 int base, len;
9071 xmlParserInputPtr in;
9072 const xmlChar *buf;
9073
9074 in = ctxt->input;
9075 if (in == NULL) return(-1);
9076 base = in->cur - in->base;
9077 if (base < 0) return(-1);
9078 if (ctxt->checkIndex > base)
9079 base = ctxt->checkIndex;
9080 if (in->buf == NULL) {
9081 buf = in->base;
9082 len = in->length;
9083 } else {
9084 buf = in->buf->buffer->content;
9085 len = in->buf->buffer->use;
9086 }
9087 /* take into account the sequence length */
9088 if (third) len -= 2;
9089 else if (next) len --;
9090 for (;base < len;base++) {
9091 if (buf[base] == first) {
9092 if (third != 0) {
9093 if ((buf[base + 1] != next) ||
9094 (buf[base + 2] != third)) continue;
9095 } else if (next != 0) {
9096 if (buf[base + 1] != next) continue;
9097 }
9098 ctxt->checkIndex = 0;
9099#ifdef DEBUG_PUSH
9100 if (next == 0)
9101 xmlGenericError(xmlGenericErrorContext,
9102 "PP: lookup '%c' found at %d\n",
9103 first, base);
9104 else if (third == 0)
9105 xmlGenericError(xmlGenericErrorContext,
9106 "PP: lookup '%c%c' found at %d\n",
9107 first, next, base);
9108 else
9109 xmlGenericError(xmlGenericErrorContext,
9110 "PP: lookup '%c%c%c' found at %d\n",
9111 first, next, third, base);
9112#endif
9113 return(base - (in->cur - in->base));
9114 }
9115 }
9116 ctxt->checkIndex = base;
9117#ifdef DEBUG_PUSH
9118 if (next == 0)
9119 xmlGenericError(xmlGenericErrorContext,
9120 "PP: lookup '%c' failed\n", first);
9121 else if (third == 0)
9122 xmlGenericError(xmlGenericErrorContext,
9123 "PP: lookup '%c%c' failed\n", first, next);
9124 else
9125 xmlGenericError(xmlGenericErrorContext,
9126 "PP: lookup '%c%c%c' failed\n", first, next, third);
9127#endif
9128 return(-1);
9129}
9130
9131/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009132 * xmlParseGetLasts:
9133 * @ctxt: an XML parser context
9134 * @lastlt: pointer to store the last '<' from the input
9135 * @lastgt: pointer to store the last '>' from the input
9136 *
9137 * Lookup the last < and > in the current chunk
9138 */
9139static void
9140xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9141 const xmlChar **lastgt) {
9142 const xmlChar *tmp;
9143
9144 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9145 xmlGenericError(xmlGenericErrorContext,
9146 "Internal error: xmlParseGetLasts\n");
9147 return;
9148 }
9149 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9150 tmp = ctxt->input->end;
9151 tmp--;
9152 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9153 (*tmp != '>')) tmp--;
9154 if (tmp < ctxt->input->base) {
9155 *lastlt = NULL;
9156 *lastgt = NULL;
9157 } else if (*tmp == '<') {
9158 *lastlt = tmp;
9159 tmp--;
9160 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9161 if (tmp < ctxt->input->base)
9162 *lastgt = NULL;
9163 else
9164 *lastgt = tmp;
9165 } else {
9166 *lastgt = tmp;
9167 tmp--;
9168 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9169 if (tmp < ctxt->input->base)
9170 *lastlt = NULL;
9171 else
9172 *lastlt = tmp;
9173 }
9174
9175 } else {
9176 *lastlt = NULL;
9177 *lastgt = NULL;
9178 }
9179}
9180/**
Owen Taylor3473f882001-02-23 17:55:21 +00009181 * xmlParseTryOrFinish:
9182 * @ctxt: an XML parser context
9183 * @terminate: last chunk indicator
9184 *
9185 * Try to progress on parsing
9186 *
9187 * Returns zero if no parsing was possible
9188 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009189static int
Owen Taylor3473f882001-02-23 17:55:21 +00009190xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9191 int ret = 0;
9192 int avail;
9193 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009194 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009195
9196#ifdef DEBUG_PUSH
9197 switch (ctxt->instate) {
9198 case XML_PARSER_EOF:
9199 xmlGenericError(xmlGenericErrorContext,
9200 "PP: try EOF\n"); break;
9201 case XML_PARSER_START:
9202 xmlGenericError(xmlGenericErrorContext,
9203 "PP: try START\n"); break;
9204 case XML_PARSER_MISC:
9205 xmlGenericError(xmlGenericErrorContext,
9206 "PP: try MISC\n");break;
9207 case XML_PARSER_COMMENT:
9208 xmlGenericError(xmlGenericErrorContext,
9209 "PP: try COMMENT\n");break;
9210 case XML_PARSER_PROLOG:
9211 xmlGenericError(xmlGenericErrorContext,
9212 "PP: try PROLOG\n");break;
9213 case XML_PARSER_START_TAG:
9214 xmlGenericError(xmlGenericErrorContext,
9215 "PP: try START_TAG\n");break;
9216 case XML_PARSER_CONTENT:
9217 xmlGenericError(xmlGenericErrorContext,
9218 "PP: try CONTENT\n");break;
9219 case XML_PARSER_CDATA_SECTION:
9220 xmlGenericError(xmlGenericErrorContext,
9221 "PP: try CDATA_SECTION\n");break;
9222 case XML_PARSER_END_TAG:
9223 xmlGenericError(xmlGenericErrorContext,
9224 "PP: try END_TAG\n");break;
9225 case XML_PARSER_ENTITY_DECL:
9226 xmlGenericError(xmlGenericErrorContext,
9227 "PP: try ENTITY_DECL\n");break;
9228 case XML_PARSER_ENTITY_VALUE:
9229 xmlGenericError(xmlGenericErrorContext,
9230 "PP: try ENTITY_VALUE\n");break;
9231 case XML_PARSER_ATTRIBUTE_VALUE:
9232 xmlGenericError(xmlGenericErrorContext,
9233 "PP: try ATTRIBUTE_VALUE\n");break;
9234 case XML_PARSER_DTD:
9235 xmlGenericError(xmlGenericErrorContext,
9236 "PP: try DTD\n");break;
9237 case XML_PARSER_EPILOG:
9238 xmlGenericError(xmlGenericErrorContext,
9239 "PP: try EPILOG\n");break;
9240 case XML_PARSER_PI:
9241 xmlGenericError(xmlGenericErrorContext,
9242 "PP: try PI\n");break;
9243 case XML_PARSER_IGNORE:
9244 xmlGenericError(xmlGenericErrorContext,
9245 "PP: try IGNORE\n");break;
9246 }
9247#endif
9248
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009249 if ((ctxt->input != NULL) &&
9250 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009251 xmlSHRINK(ctxt);
9252 ctxt->checkIndex = 0;
9253 }
9254 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009255
Daniel Veillarda880b122003-04-21 21:36:41 +00009256 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009257 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9258 return(0);
9259
9260
Owen Taylor3473f882001-02-23 17:55:21 +00009261 /*
9262 * Pop-up of finished entities.
9263 */
9264 while ((RAW == 0) && (ctxt->inputNr > 1))
9265 xmlPopInput(ctxt);
9266
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009267 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009268 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009269 avail = ctxt->input->length -
9270 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009271 else {
9272 /*
9273 * If we are operating on converted input, try to flush
9274 * remainng chars to avoid them stalling in the non-converted
9275 * buffer.
9276 */
9277 if ((ctxt->input->buf->raw != NULL) &&
9278 (ctxt->input->buf->raw->use > 0)) {
9279 int base = ctxt->input->base -
9280 ctxt->input->buf->buffer->content;
9281 int current = ctxt->input->cur - ctxt->input->base;
9282
9283 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9284 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9285 ctxt->input->cur = ctxt->input->base + current;
9286 ctxt->input->end =
9287 &ctxt->input->buf->buffer->content[
9288 ctxt->input->buf->buffer->use];
9289 }
9290 avail = ctxt->input->buf->buffer->use -
9291 (ctxt->input->cur - ctxt->input->base);
9292 }
Owen Taylor3473f882001-02-23 17:55:21 +00009293 if (avail < 1)
9294 goto done;
9295 switch (ctxt->instate) {
9296 case XML_PARSER_EOF:
9297 /*
9298 * Document parsing is done !
9299 */
9300 goto done;
9301 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009302 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9303 xmlChar start[4];
9304 xmlCharEncoding enc;
9305
9306 /*
9307 * Very first chars read from the document flow.
9308 */
9309 if (avail < 4)
9310 goto done;
9311
9312 /*
9313 * Get the 4 first bytes and decode the charset
9314 * if enc != XML_CHAR_ENCODING_NONE
9315 * plug some encoding conversion routines.
9316 */
9317 start[0] = RAW;
9318 start[1] = NXT(1);
9319 start[2] = NXT(2);
9320 start[3] = NXT(3);
9321 enc = xmlDetectCharEncoding(start, 4);
9322 if (enc != XML_CHAR_ENCODING_NONE) {
9323 xmlSwitchEncoding(ctxt, enc);
9324 }
9325 break;
9326 }
Owen Taylor3473f882001-02-23 17:55:21 +00009327
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009328 if (avail < 2)
9329 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009330 cur = ctxt->input->cur[0];
9331 next = ctxt->input->cur[1];
9332 if (cur == 0) {
9333 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9334 ctxt->sax->setDocumentLocator(ctxt->userData,
9335 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009336 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009337 ctxt->instate = XML_PARSER_EOF;
9338#ifdef DEBUG_PUSH
9339 xmlGenericError(xmlGenericErrorContext,
9340 "PP: entering EOF\n");
9341#endif
9342 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9343 ctxt->sax->endDocument(ctxt->userData);
9344 goto done;
9345 }
9346 if ((cur == '<') && (next == '?')) {
9347 /* PI or XML decl */
9348 if (avail < 5) return(ret);
9349 if ((!terminate) &&
9350 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9351 return(ret);
9352 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9353 ctxt->sax->setDocumentLocator(ctxt->userData,
9354 &xmlDefaultSAXLocator);
9355 if ((ctxt->input->cur[2] == 'x') &&
9356 (ctxt->input->cur[3] == 'm') &&
9357 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009358 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009359 ret += 5;
9360#ifdef DEBUG_PUSH
9361 xmlGenericError(xmlGenericErrorContext,
9362 "PP: Parsing XML Decl\n");
9363#endif
9364 xmlParseXMLDecl(ctxt);
9365 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9366 /*
9367 * The XML REC instructs us to stop parsing right
9368 * here
9369 */
9370 ctxt->instate = XML_PARSER_EOF;
9371 return(0);
9372 }
9373 ctxt->standalone = ctxt->input->standalone;
9374 if ((ctxt->encoding == NULL) &&
9375 (ctxt->input->encoding != NULL))
9376 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9377 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9378 (!ctxt->disableSAX))
9379 ctxt->sax->startDocument(ctxt->userData);
9380 ctxt->instate = XML_PARSER_MISC;
9381#ifdef DEBUG_PUSH
9382 xmlGenericError(xmlGenericErrorContext,
9383 "PP: entering MISC\n");
9384#endif
9385 } else {
9386 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9387 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9388 (!ctxt->disableSAX))
9389 ctxt->sax->startDocument(ctxt->userData);
9390 ctxt->instate = XML_PARSER_MISC;
9391#ifdef DEBUG_PUSH
9392 xmlGenericError(xmlGenericErrorContext,
9393 "PP: entering MISC\n");
9394#endif
9395 }
9396 } else {
9397 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9398 ctxt->sax->setDocumentLocator(ctxt->userData,
9399 &xmlDefaultSAXLocator);
9400 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9401 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9402 (!ctxt->disableSAX))
9403 ctxt->sax->startDocument(ctxt->userData);
9404 ctxt->instate = XML_PARSER_MISC;
9405#ifdef DEBUG_PUSH
9406 xmlGenericError(xmlGenericErrorContext,
9407 "PP: entering MISC\n");
9408#endif
9409 }
9410 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009411 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009412 const xmlChar *name;
9413 const xmlChar *prefix;
9414 const xmlChar *URI;
9415 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009416
9417 if ((avail < 2) && (ctxt->inputNr == 1))
9418 goto done;
9419 cur = ctxt->input->cur[0];
9420 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009421 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009422 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009423 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9424 ctxt->sax->endDocument(ctxt->userData);
9425 goto done;
9426 }
9427 if (!terminate) {
9428 if (ctxt->progressive) {
9429 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9430 goto done;
9431 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9432 goto done;
9433 }
9434 }
9435 if (ctxt->spaceNr == 0)
9436 spacePush(ctxt, -1);
9437 else
9438 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009439#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009440 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009441#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009442 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009443#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009444 else
9445 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009446#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009447 if (name == NULL) {
9448 spacePop(ctxt);
9449 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009450 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9451 ctxt->sax->endDocument(ctxt->userData);
9452 goto done;
9453 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009454#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009455 /*
9456 * [ VC: Root Element Type ]
9457 * The Name in the document type declaration must match
9458 * the element type of the root element.
9459 */
9460 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9461 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9462 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009463#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009464
9465 /*
9466 * Check for an Empty Element.
9467 */
9468 if ((RAW == '/') && (NXT(1) == '>')) {
9469 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009470
9471 if (ctxt->sax2) {
9472 if ((ctxt->sax != NULL) &&
9473 (ctxt->sax->endElementNs != NULL) &&
9474 (!ctxt->disableSAX))
9475 ctxt->sax->endElementNs(ctxt->userData, name,
9476 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009477#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009478 } else {
9479 if ((ctxt->sax != NULL) &&
9480 (ctxt->sax->endElement != NULL) &&
9481 (!ctxt->disableSAX))
9482 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009483#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009484 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009485 spacePop(ctxt);
9486 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009487 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009488 } else {
9489 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009490 }
9491 break;
9492 }
9493 if (RAW == '>') {
9494 NEXT;
9495 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009496 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009497 "Couldn't find end of Start Tag %s\n",
9498 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009499 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009500 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009501 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009502 if (ctxt->sax2)
9503 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009504#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009505 else
9506 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009507#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009508
Daniel Veillarda880b122003-04-21 21:36:41 +00009509 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009510 break;
9511 }
9512 case XML_PARSER_CONTENT: {
9513 const xmlChar *test;
9514 unsigned int cons;
9515 if ((avail < 2) && (ctxt->inputNr == 1))
9516 goto done;
9517 cur = ctxt->input->cur[0];
9518 next = ctxt->input->cur[1];
9519
9520 test = CUR_PTR;
9521 cons = ctxt->input->consumed;
9522 if ((cur == '<') && (next == '/')) {
9523 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009524 break;
9525 } else if ((cur == '<') && (next == '?')) {
9526 if ((!terminate) &&
9527 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9528 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009529 xmlParsePI(ctxt);
9530 } else if ((cur == '<') && (next != '!')) {
9531 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009532 break;
9533 } else if ((cur == '<') && (next == '!') &&
9534 (ctxt->input->cur[2] == '-') &&
9535 (ctxt->input->cur[3] == '-')) {
9536 if ((!terminate) &&
9537 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9538 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009539 xmlParseComment(ctxt);
9540 ctxt->instate = XML_PARSER_CONTENT;
9541 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9542 (ctxt->input->cur[2] == '[') &&
9543 (ctxt->input->cur[3] == 'C') &&
9544 (ctxt->input->cur[4] == 'D') &&
9545 (ctxt->input->cur[5] == 'A') &&
9546 (ctxt->input->cur[6] == 'T') &&
9547 (ctxt->input->cur[7] == 'A') &&
9548 (ctxt->input->cur[8] == '[')) {
9549 SKIP(9);
9550 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009551 break;
9552 } else if ((cur == '<') && (next == '!') &&
9553 (avail < 9)) {
9554 goto done;
9555 } else if (cur == '&') {
9556 if ((!terminate) &&
9557 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9558 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009559 xmlParseReference(ctxt);
9560 } else {
9561 /* TODO Avoid the extra copy, handle directly !!! */
9562 /*
9563 * Goal of the following test is:
9564 * - minimize calls to the SAX 'character' callback
9565 * when they are mergeable
9566 * - handle an problem for isBlank when we only parse
9567 * a sequence of blank chars and the next one is
9568 * not available to check against '<' presence.
9569 * - tries to homogenize the differences in SAX
9570 * callbacks between the push and pull versions
9571 * of the parser.
9572 */
9573 if ((ctxt->inputNr == 1) &&
9574 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9575 if (!terminate) {
9576 if (ctxt->progressive) {
9577 if ((lastlt == NULL) ||
9578 (ctxt->input->cur > lastlt))
9579 goto done;
9580 } else if (xmlParseLookupSequence(ctxt,
9581 '<', 0, 0) < 0) {
9582 goto done;
9583 }
9584 }
9585 }
9586 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009587 xmlParseCharData(ctxt, 0);
9588 }
9589 /*
9590 * Pop-up of finished entities.
9591 */
9592 while ((RAW == 0) && (ctxt->inputNr > 1))
9593 xmlPopInput(ctxt);
9594 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009595 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9596 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009597 ctxt->instate = XML_PARSER_EOF;
9598 break;
9599 }
9600 break;
9601 }
9602 case XML_PARSER_END_TAG:
9603 if (avail < 2)
9604 goto done;
9605 if (!terminate) {
9606 if (ctxt->progressive) {
9607 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9608 goto done;
9609 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9610 goto done;
9611 }
9612 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009613 if (ctxt->sax2) {
9614 xmlParseEndTag2(ctxt,
9615 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9616 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9617 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9618 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009619 }
9620#ifdef LIBXML_SAX1_ENABLED
9621 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009622 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009623#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009624 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009625 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009626 } else {
9627 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009628 }
9629 break;
9630 case XML_PARSER_CDATA_SECTION: {
9631 /*
9632 * The Push mode need to have the SAX callback for
9633 * cdataBlock merge back contiguous callbacks.
9634 */
9635 int base;
9636
9637 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9638 if (base < 0) {
9639 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9640 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9641 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009642 ctxt->sax->cdataBlock(ctxt->userData,
9643 ctxt->input->cur,
9644 XML_PARSER_BIG_BUFFER_SIZE);
9645 else if (ctxt->sax->characters != NULL)
9646 ctxt->sax->characters(ctxt->userData,
9647 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009648 XML_PARSER_BIG_BUFFER_SIZE);
9649 }
9650 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9651 ctxt->checkIndex = 0;
9652 }
9653 goto done;
9654 } else {
9655 if ((ctxt->sax != NULL) && (base > 0) &&
9656 (!ctxt->disableSAX)) {
9657 if (ctxt->sax->cdataBlock != NULL)
9658 ctxt->sax->cdataBlock(ctxt->userData,
9659 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009660 else if (ctxt->sax->characters != NULL)
9661 ctxt->sax->characters(ctxt->userData,
9662 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009663 }
9664 SKIP(base + 3);
9665 ctxt->checkIndex = 0;
9666 ctxt->instate = XML_PARSER_CONTENT;
9667#ifdef DEBUG_PUSH
9668 xmlGenericError(xmlGenericErrorContext,
9669 "PP: entering CONTENT\n");
9670#endif
9671 }
9672 break;
9673 }
Owen Taylor3473f882001-02-23 17:55:21 +00009674 case XML_PARSER_MISC:
9675 SKIP_BLANKS;
9676 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009677 avail = ctxt->input->length -
9678 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009679 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009680 avail = ctxt->input->buf->buffer->use -
9681 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009682 if (avail < 2)
9683 goto done;
9684 cur = ctxt->input->cur[0];
9685 next = ctxt->input->cur[1];
9686 if ((cur == '<') && (next == '?')) {
9687 if ((!terminate) &&
9688 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9689 goto done;
9690#ifdef DEBUG_PUSH
9691 xmlGenericError(xmlGenericErrorContext,
9692 "PP: Parsing PI\n");
9693#endif
9694 xmlParsePI(ctxt);
9695 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009696 (ctxt->input->cur[2] == '-') &&
9697 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009698 if ((!terminate) &&
9699 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9700 goto done;
9701#ifdef DEBUG_PUSH
9702 xmlGenericError(xmlGenericErrorContext,
9703 "PP: Parsing Comment\n");
9704#endif
9705 xmlParseComment(ctxt);
9706 ctxt->instate = XML_PARSER_MISC;
9707 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009708 (ctxt->input->cur[2] == 'D') &&
9709 (ctxt->input->cur[3] == 'O') &&
9710 (ctxt->input->cur[4] == 'C') &&
9711 (ctxt->input->cur[5] == 'T') &&
9712 (ctxt->input->cur[6] == 'Y') &&
9713 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009714 (ctxt->input->cur[8] == 'E')) {
9715 if ((!terminate) &&
9716 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9717 goto done;
9718#ifdef DEBUG_PUSH
9719 xmlGenericError(xmlGenericErrorContext,
9720 "PP: Parsing internal subset\n");
9721#endif
9722 ctxt->inSubset = 1;
9723 xmlParseDocTypeDecl(ctxt);
9724 if (RAW == '[') {
9725 ctxt->instate = XML_PARSER_DTD;
9726#ifdef DEBUG_PUSH
9727 xmlGenericError(xmlGenericErrorContext,
9728 "PP: entering DTD\n");
9729#endif
9730 } else {
9731 /*
9732 * Create and update the external subset.
9733 */
9734 ctxt->inSubset = 2;
9735 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9736 (ctxt->sax->externalSubset != NULL))
9737 ctxt->sax->externalSubset(ctxt->userData,
9738 ctxt->intSubName, ctxt->extSubSystem,
9739 ctxt->extSubURI);
9740 ctxt->inSubset = 0;
9741 ctxt->instate = XML_PARSER_PROLOG;
9742#ifdef DEBUG_PUSH
9743 xmlGenericError(xmlGenericErrorContext,
9744 "PP: entering PROLOG\n");
9745#endif
9746 }
9747 } else if ((cur == '<') && (next == '!') &&
9748 (avail < 9)) {
9749 goto done;
9750 } else {
9751 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009752 ctxt->progressive = 1;
9753 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009754#ifdef DEBUG_PUSH
9755 xmlGenericError(xmlGenericErrorContext,
9756 "PP: entering START_TAG\n");
9757#endif
9758 }
9759 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009760 case XML_PARSER_PROLOG:
9761 SKIP_BLANKS;
9762 if (ctxt->input->buf == NULL)
9763 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9764 else
9765 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9766 if (avail < 2)
9767 goto done;
9768 cur = ctxt->input->cur[0];
9769 next = ctxt->input->cur[1];
9770 if ((cur == '<') && (next == '?')) {
9771 if ((!terminate) &&
9772 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9773 goto done;
9774#ifdef DEBUG_PUSH
9775 xmlGenericError(xmlGenericErrorContext,
9776 "PP: Parsing PI\n");
9777#endif
9778 xmlParsePI(ctxt);
9779 } else if ((cur == '<') && (next == '!') &&
9780 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9781 if ((!terminate) &&
9782 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9783 goto done;
9784#ifdef DEBUG_PUSH
9785 xmlGenericError(xmlGenericErrorContext,
9786 "PP: Parsing Comment\n");
9787#endif
9788 xmlParseComment(ctxt);
9789 ctxt->instate = XML_PARSER_PROLOG;
9790 } else if ((cur == '<') && (next == '!') &&
9791 (avail < 4)) {
9792 goto done;
9793 } else {
9794 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009795 ctxt->progressive = 1;
9796 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009797#ifdef DEBUG_PUSH
9798 xmlGenericError(xmlGenericErrorContext,
9799 "PP: entering START_TAG\n");
9800#endif
9801 }
9802 break;
9803 case XML_PARSER_EPILOG:
9804 SKIP_BLANKS;
9805 if (ctxt->input->buf == NULL)
9806 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9807 else
9808 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9809 if (avail < 2)
9810 goto done;
9811 cur = ctxt->input->cur[0];
9812 next = ctxt->input->cur[1];
9813 if ((cur == '<') && (next == '?')) {
9814 if ((!terminate) &&
9815 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9816 goto done;
9817#ifdef DEBUG_PUSH
9818 xmlGenericError(xmlGenericErrorContext,
9819 "PP: Parsing PI\n");
9820#endif
9821 xmlParsePI(ctxt);
9822 ctxt->instate = XML_PARSER_EPILOG;
9823 } else if ((cur == '<') && (next == '!') &&
9824 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9825 if ((!terminate) &&
9826 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9827 goto done;
9828#ifdef DEBUG_PUSH
9829 xmlGenericError(xmlGenericErrorContext,
9830 "PP: Parsing Comment\n");
9831#endif
9832 xmlParseComment(ctxt);
9833 ctxt->instate = XML_PARSER_EPILOG;
9834 } else if ((cur == '<') && (next == '!') &&
9835 (avail < 4)) {
9836 goto done;
9837 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009838 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009839 ctxt->instate = XML_PARSER_EOF;
9840#ifdef DEBUG_PUSH
9841 xmlGenericError(xmlGenericErrorContext,
9842 "PP: entering EOF\n");
9843#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009844 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009845 ctxt->sax->endDocument(ctxt->userData);
9846 goto done;
9847 }
9848 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009849 case XML_PARSER_DTD: {
9850 /*
9851 * Sorry but progressive parsing of the internal subset
9852 * is not expected to be supported. We first check that
9853 * the full content of the internal subset is available and
9854 * the parsing is launched only at that point.
9855 * Internal subset ends up with "']' S? '>'" in an unescaped
9856 * section and not in a ']]>' sequence which are conditional
9857 * sections (whoever argued to keep that crap in XML deserve
9858 * a place in hell !).
9859 */
9860 int base, i;
9861 xmlChar *buf;
9862 xmlChar quote = 0;
9863
9864 base = ctxt->input->cur - ctxt->input->base;
9865 if (base < 0) return(0);
9866 if (ctxt->checkIndex > base)
9867 base = ctxt->checkIndex;
9868 buf = ctxt->input->buf->buffer->content;
9869 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9870 base++) {
9871 if (quote != 0) {
9872 if (buf[base] == quote)
9873 quote = 0;
9874 continue;
9875 }
9876 if (buf[base] == '"') {
9877 quote = '"';
9878 continue;
9879 }
9880 if (buf[base] == '\'') {
9881 quote = '\'';
9882 continue;
9883 }
9884 if (buf[base] == ']') {
9885 if ((unsigned int) base +1 >=
9886 ctxt->input->buf->buffer->use)
9887 break;
9888 if (buf[base + 1] == ']') {
9889 /* conditional crap, skip both ']' ! */
9890 base++;
9891 continue;
9892 }
9893 for (i = 0;
9894 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9895 i++) {
9896 if (buf[base + i] == '>')
9897 goto found_end_int_subset;
9898 }
9899 break;
9900 }
9901 }
9902 /*
9903 * We didn't found the end of the Internal subset
9904 */
9905 if (quote == 0)
9906 ctxt->checkIndex = base;
9907#ifdef DEBUG_PUSH
9908 if (next == 0)
9909 xmlGenericError(xmlGenericErrorContext,
9910 "PP: lookup of int subset end filed\n");
9911#endif
9912 goto done;
9913
9914found_end_int_subset:
9915 xmlParseInternalSubset(ctxt);
9916 ctxt->inSubset = 2;
9917 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9918 (ctxt->sax->externalSubset != NULL))
9919 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9920 ctxt->extSubSystem, ctxt->extSubURI);
9921 ctxt->inSubset = 0;
9922 ctxt->instate = XML_PARSER_PROLOG;
9923 ctxt->checkIndex = 0;
9924#ifdef DEBUG_PUSH
9925 xmlGenericError(xmlGenericErrorContext,
9926 "PP: entering PROLOG\n");
9927#endif
9928 break;
9929 }
9930 case XML_PARSER_COMMENT:
9931 xmlGenericError(xmlGenericErrorContext,
9932 "PP: internal error, state == COMMENT\n");
9933 ctxt->instate = XML_PARSER_CONTENT;
9934#ifdef DEBUG_PUSH
9935 xmlGenericError(xmlGenericErrorContext,
9936 "PP: entering CONTENT\n");
9937#endif
9938 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009939 case XML_PARSER_IGNORE:
9940 xmlGenericError(xmlGenericErrorContext,
9941 "PP: internal error, state == IGNORE");
9942 ctxt->instate = XML_PARSER_DTD;
9943#ifdef DEBUG_PUSH
9944 xmlGenericError(xmlGenericErrorContext,
9945 "PP: entering DTD\n");
9946#endif
9947 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009948 case XML_PARSER_PI:
9949 xmlGenericError(xmlGenericErrorContext,
9950 "PP: internal error, state == PI\n");
9951 ctxt->instate = XML_PARSER_CONTENT;
9952#ifdef DEBUG_PUSH
9953 xmlGenericError(xmlGenericErrorContext,
9954 "PP: entering CONTENT\n");
9955#endif
9956 break;
9957 case XML_PARSER_ENTITY_DECL:
9958 xmlGenericError(xmlGenericErrorContext,
9959 "PP: internal error, state == ENTITY_DECL\n");
9960 ctxt->instate = XML_PARSER_DTD;
9961#ifdef DEBUG_PUSH
9962 xmlGenericError(xmlGenericErrorContext,
9963 "PP: entering DTD\n");
9964#endif
9965 break;
9966 case XML_PARSER_ENTITY_VALUE:
9967 xmlGenericError(xmlGenericErrorContext,
9968 "PP: internal error, state == ENTITY_VALUE\n");
9969 ctxt->instate = XML_PARSER_CONTENT;
9970#ifdef DEBUG_PUSH
9971 xmlGenericError(xmlGenericErrorContext,
9972 "PP: entering DTD\n");
9973#endif
9974 break;
9975 case XML_PARSER_ATTRIBUTE_VALUE:
9976 xmlGenericError(xmlGenericErrorContext,
9977 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9978 ctxt->instate = XML_PARSER_START_TAG;
9979#ifdef DEBUG_PUSH
9980 xmlGenericError(xmlGenericErrorContext,
9981 "PP: entering START_TAG\n");
9982#endif
9983 break;
9984 case XML_PARSER_SYSTEM_LITERAL:
9985 xmlGenericError(xmlGenericErrorContext,
9986 "PP: internal error, state == SYSTEM_LITERAL\n");
9987 ctxt->instate = XML_PARSER_START_TAG;
9988#ifdef DEBUG_PUSH
9989 xmlGenericError(xmlGenericErrorContext,
9990 "PP: entering START_TAG\n");
9991#endif
9992 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009993 case XML_PARSER_PUBLIC_LITERAL:
9994 xmlGenericError(xmlGenericErrorContext,
9995 "PP: internal error, state == PUBLIC_LITERAL\n");
9996 ctxt->instate = XML_PARSER_START_TAG;
9997#ifdef DEBUG_PUSH
9998 xmlGenericError(xmlGenericErrorContext,
9999 "PP: entering START_TAG\n");
10000#endif
10001 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010002 }
10003 }
10004done:
10005#ifdef DEBUG_PUSH
10006 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10007#endif
10008 return(ret);
10009}
10010
10011/**
Owen Taylor3473f882001-02-23 17:55:21 +000010012 * xmlParseChunk:
10013 * @ctxt: an XML parser context
10014 * @chunk: an char array
10015 * @size: the size in byte of the chunk
10016 * @terminate: last chunk indicator
10017 *
10018 * Parse a Chunk of memory
10019 *
10020 * Returns zero if no error, the xmlParserErrors otherwise.
10021 */
10022int
10023xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10024 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010025 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10026 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010027 if (ctxt->instate == XML_PARSER_START)
10028 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010029 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10030 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10031 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10032 int cur = ctxt->input->cur - ctxt->input->base;
10033
10034 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10035 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10036 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010037 ctxt->input->end =
10038 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010039#ifdef DEBUG_PUSH
10040 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10041#endif
10042
Owen Taylor3473f882001-02-23 17:55:21 +000010043 } else if (ctxt->instate != XML_PARSER_EOF) {
10044 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10045 xmlParserInputBufferPtr in = ctxt->input->buf;
10046 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10047 (in->raw != NULL)) {
10048 int nbchars;
10049
10050 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10051 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010052 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010053 xmlGenericError(xmlGenericErrorContext,
10054 "xmlParseChunk: encoder error\n");
10055 return(XML_ERR_INVALID_ENCODING);
10056 }
10057 }
10058 }
10059 }
10060 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010061 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10062 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010063 if (terminate) {
10064 /*
10065 * Check for termination
10066 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010067 int avail = 0;
10068 if (ctxt->input->buf == NULL)
10069 avail = ctxt->input->length -
10070 (ctxt->input->cur - ctxt->input->base);
10071 else
10072 avail = ctxt->input->buf->buffer->use -
10073 (ctxt->input->cur - ctxt->input->base);
10074
Owen Taylor3473f882001-02-23 17:55:21 +000010075 if ((ctxt->instate != XML_PARSER_EOF) &&
10076 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010077 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010078 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010079 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010080 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010081 }
Owen Taylor3473f882001-02-23 17:55:21 +000010082 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010083 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010084 ctxt->sax->endDocument(ctxt->userData);
10085 }
10086 ctxt->instate = XML_PARSER_EOF;
10087 }
10088 return((xmlParserErrors) ctxt->errNo);
10089}
10090
10091/************************************************************************
10092 * *
10093 * I/O front end functions to the parser *
10094 * *
10095 ************************************************************************/
10096
10097/**
10098 * xmlStopParser:
10099 * @ctxt: an XML parser context
10100 *
10101 * Blocks further parser processing
10102 */
10103void
10104xmlStopParser(xmlParserCtxtPtr ctxt) {
10105 ctxt->instate = XML_PARSER_EOF;
10106 if (ctxt->input != NULL)
10107 ctxt->input->cur = BAD_CAST"";
10108}
10109
10110/**
10111 * xmlCreatePushParserCtxt:
10112 * @sax: a SAX handler
10113 * @user_data: The user data returned on SAX callbacks
10114 * @chunk: a pointer to an array of chars
10115 * @size: number of chars in the array
10116 * @filename: an optional file name or URI
10117 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010118 * Create a parser context for using the XML parser in push mode.
10119 * If @buffer and @size are non-NULL, the data is used to detect
10120 * the encoding. The remaining characters will be parsed so they
10121 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010122 * To allow content encoding detection, @size should be >= 4
10123 * The value of @filename is used for fetching external entities
10124 * and error/warning reports.
10125 *
10126 * Returns the new parser context or NULL
10127 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010128
Owen Taylor3473f882001-02-23 17:55:21 +000010129xmlParserCtxtPtr
10130xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10131 const char *chunk, int size, const char *filename) {
10132 xmlParserCtxtPtr ctxt;
10133 xmlParserInputPtr inputStream;
10134 xmlParserInputBufferPtr buf;
10135 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10136
10137 /*
10138 * plug some encoding conversion routines
10139 */
10140 if ((chunk != NULL) && (size >= 4))
10141 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10142
10143 buf = xmlAllocParserInputBuffer(enc);
10144 if (buf == NULL) return(NULL);
10145
10146 ctxt = xmlNewParserCtxt();
10147 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010148 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010149 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010150 return(NULL);
10151 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010152 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10153 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010154 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010155 xmlFreeParserInputBuffer(buf);
10156 xmlFreeParserCtxt(ctxt);
10157 return(NULL);
10158 }
Owen Taylor3473f882001-02-23 17:55:21 +000010159 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010160#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010161 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010162#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010163 xmlFree(ctxt->sax);
10164 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10165 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010166 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010167 xmlFreeParserInputBuffer(buf);
10168 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010169 return(NULL);
10170 }
10171 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10172 if (user_data != NULL)
10173 ctxt->userData = user_data;
10174 }
10175 if (filename == NULL) {
10176 ctxt->directory = NULL;
10177 } else {
10178 ctxt->directory = xmlParserGetDirectory(filename);
10179 }
10180
10181 inputStream = xmlNewInputStream(ctxt);
10182 if (inputStream == NULL) {
10183 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010184 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010185 return(NULL);
10186 }
10187
10188 if (filename == NULL)
10189 inputStream->filename = NULL;
10190 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010191 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010192 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010193 inputStream->buf = buf;
10194 inputStream->base = inputStream->buf->buffer->content;
10195 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010196 inputStream->end =
10197 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010198
10199 inputPush(ctxt, inputStream);
10200
10201 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10202 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010203 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10204 int cur = ctxt->input->cur - ctxt->input->base;
10205
Owen Taylor3473f882001-02-23 17:55:21 +000010206 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010207
10208 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10209 ctxt->input->cur = ctxt->input->base + cur;
10210 ctxt->input->end =
10211 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010212#ifdef DEBUG_PUSH
10213 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10214#endif
10215 }
10216
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010217 if (enc != XML_CHAR_ENCODING_NONE) {
10218 xmlSwitchEncoding(ctxt, enc);
10219 }
10220
Owen Taylor3473f882001-02-23 17:55:21 +000010221 return(ctxt);
10222}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010223#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010224
10225/**
10226 * xmlCreateIOParserCtxt:
10227 * @sax: a SAX handler
10228 * @user_data: The user data returned on SAX callbacks
10229 * @ioread: an I/O read function
10230 * @ioclose: an I/O close function
10231 * @ioctx: an I/O handler
10232 * @enc: the charset encoding if known
10233 *
10234 * Create a parser context for using the XML parser with an existing
10235 * I/O stream
10236 *
10237 * Returns the new parser context or NULL
10238 */
10239xmlParserCtxtPtr
10240xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10241 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10242 void *ioctx, xmlCharEncoding enc) {
10243 xmlParserCtxtPtr ctxt;
10244 xmlParserInputPtr inputStream;
10245 xmlParserInputBufferPtr buf;
10246
10247 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10248 if (buf == NULL) return(NULL);
10249
10250 ctxt = xmlNewParserCtxt();
10251 if (ctxt == NULL) {
10252 xmlFree(buf);
10253 return(NULL);
10254 }
10255 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010256#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010257 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010258#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010259 xmlFree(ctxt->sax);
10260 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10261 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010262 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010263 xmlFree(ctxt);
10264 return(NULL);
10265 }
10266 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10267 if (user_data != NULL)
10268 ctxt->userData = user_data;
10269 }
10270
10271 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10272 if (inputStream == NULL) {
10273 xmlFreeParserCtxt(ctxt);
10274 return(NULL);
10275 }
10276 inputPush(ctxt, inputStream);
10277
10278 return(ctxt);
10279}
10280
Daniel Veillard4432df22003-09-28 18:58:27 +000010281#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010282/************************************************************************
10283 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010284 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010285 * *
10286 ************************************************************************/
10287
10288/**
10289 * xmlIOParseDTD:
10290 * @sax: the SAX handler block or NULL
10291 * @input: an Input Buffer
10292 * @enc: the charset encoding if known
10293 *
10294 * Load and parse a DTD
10295 *
10296 * Returns the resulting xmlDtdPtr or NULL in case of error.
10297 * @input will be freed at parsing end.
10298 */
10299
10300xmlDtdPtr
10301xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10302 xmlCharEncoding enc) {
10303 xmlDtdPtr ret = NULL;
10304 xmlParserCtxtPtr ctxt;
10305 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010306 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010307
10308 if (input == NULL)
10309 return(NULL);
10310
10311 ctxt = xmlNewParserCtxt();
10312 if (ctxt == NULL) {
10313 return(NULL);
10314 }
10315
10316 /*
10317 * Set-up the SAX context
10318 */
10319 if (sax != NULL) {
10320 if (ctxt->sax != NULL)
10321 xmlFree(ctxt->sax);
10322 ctxt->sax = sax;
10323 ctxt->userData = NULL;
10324 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010325 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010326
10327 /*
10328 * generate a parser input from the I/O handler
10329 */
10330
10331 pinput = xmlNewIOInputStream(ctxt, input, enc);
10332 if (pinput == NULL) {
10333 if (sax != NULL) ctxt->sax = NULL;
10334 xmlFreeParserCtxt(ctxt);
10335 return(NULL);
10336 }
10337
10338 /*
10339 * plug some encoding conversion routines here.
10340 */
10341 xmlPushInput(ctxt, pinput);
10342
10343 pinput->filename = NULL;
10344 pinput->line = 1;
10345 pinput->col = 1;
10346 pinput->base = ctxt->input->cur;
10347 pinput->cur = ctxt->input->cur;
10348 pinput->free = NULL;
10349
10350 /*
10351 * let's parse that entity knowing it's an external subset.
10352 */
10353 ctxt->inSubset = 2;
10354 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10355 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10356 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010357
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010358 if ((enc == XML_CHAR_ENCODING_NONE) &&
10359 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010360 /*
10361 * Get the 4 first bytes and decode the charset
10362 * if enc != XML_CHAR_ENCODING_NONE
10363 * plug some encoding conversion routines.
10364 */
10365 start[0] = RAW;
10366 start[1] = NXT(1);
10367 start[2] = NXT(2);
10368 start[3] = NXT(3);
10369 enc = xmlDetectCharEncoding(start, 4);
10370 if (enc != XML_CHAR_ENCODING_NONE) {
10371 xmlSwitchEncoding(ctxt, enc);
10372 }
10373 }
10374
Owen Taylor3473f882001-02-23 17:55:21 +000010375 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10376
10377 if (ctxt->myDoc != NULL) {
10378 if (ctxt->wellFormed) {
10379 ret = ctxt->myDoc->extSubset;
10380 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010381 if (ret != NULL) {
10382 xmlNodePtr tmp;
10383
10384 ret->doc = NULL;
10385 tmp = ret->children;
10386 while (tmp != NULL) {
10387 tmp->doc = NULL;
10388 tmp = tmp->next;
10389 }
10390 }
Owen Taylor3473f882001-02-23 17:55:21 +000010391 } else {
10392 ret = NULL;
10393 }
10394 xmlFreeDoc(ctxt->myDoc);
10395 ctxt->myDoc = NULL;
10396 }
10397 if (sax != NULL) ctxt->sax = NULL;
10398 xmlFreeParserCtxt(ctxt);
10399
10400 return(ret);
10401}
10402
10403/**
10404 * xmlSAXParseDTD:
10405 * @sax: the SAX handler block
10406 * @ExternalID: a NAME* containing the External ID of the DTD
10407 * @SystemID: a NAME* containing the URL to the DTD
10408 *
10409 * Load and parse an external subset.
10410 *
10411 * Returns the resulting xmlDtdPtr or NULL in case of error.
10412 */
10413
10414xmlDtdPtr
10415xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10416 const xmlChar *SystemID) {
10417 xmlDtdPtr ret = NULL;
10418 xmlParserCtxtPtr ctxt;
10419 xmlParserInputPtr input = NULL;
10420 xmlCharEncoding enc;
10421
10422 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10423
10424 ctxt = xmlNewParserCtxt();
10425 if (ctxt == NULL) {
10426 return(NULL);
10427 }
10428
10429 /*
10430 * Set-up the SAX context
10431 */
10432 if (sax != NULL) {
10433 if (ctxt->sax != NULL)
10434 xmlFree(ctxt->sax);
10435 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010436 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010437 }
10438
10439 /*
10440 * Ask the Entity resolver to load the damn thing
10441 */
10442
10443 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010444 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010445 if (input == NULL) {
10446 if (sax != NULL) ctxt->sax = NULL;
10447 xmlFreeParserCtxt(ctxt);
10448 return(NULL);
10449 }
10450
10451 /*
10452 * plug some encoding conversion routines here.
10453 */
10454 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010455 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10456 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10457 xmlSwitchEncoding(ctxt, enc);
10458 }
Owen Taylor3473f882001-02-23 17:55:21 +000010459
10460 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010461 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010462 input->line = 1;
10463 input->col = 1;
10464 input->base = ctxt->input->cur;
10465 input->cur = ctxt->input->cur;
10466 input->free = NULL;
10467
10468 /*
10469 * let's parse that entity knowing it's an external subset.
10470 */
10471 ctxt->inSubset = 2;
10472 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10473 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10474 ExternalID, SystemID);
10475 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10476
10477 if (ctxt->myDoc != NULL) {
10478 if (ctxt->wellFormed) {
10479 ret = ctxt->myDoc->extSubset;
10480 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010481 if (ret != NULL) {
10482 xmlNodePtr tmp;
10483
10484 ret->doc = NULL;
10485 tmp = ret->children;
10486 while (tmp != NULL) {
10487 tmp->doc = NULL;
10488 tmp = tmp->next;
10489 }
10490 }
Owen Taylor3473f882001-02-23 17:55:21 +000010491 } else {
10492 ret = NULL;
10493 }
10494 xmlFreeDoc(ctxt->myDoc);
10495 ctxt->myDoc = NULL;
10496 }
10497 if (sax != NULL) ctxt->sax = NULL;
10498 xmlFreeParserCtxt(ctxt);
10499
10500 return(ret);
10501}
10502
Daniel Veillard4432df22003-09-28 18:58:27 +000010503
Owen Taylor3473f882001-02-23 17:55:21 +000010504/**
10505 * xmlParseDTD:
10506 * @ExternalID: a NAME* containing the External ID of the DTD
10507 * @SystemID: a NAME* containing the URL to the DTD
10508 *
10509 * Load and parse an external subset.
10510 *
10511 * Returns the resulting xmlDtdPtr or NULL in case of error.
10512 */
10513
10514xmlDtdPtr
10515xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10516 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10517}
Daniel Veillard4432df22003-09-28 18:58:27 +000010518#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010519
10520/************************************************************************
10521 * *
10522 * Front ends when parsing an Entity *
10523 * *
10524 ************************************************************************/
10525
10526/**
Owen Taylor3473f882001-02-23 17:55:21 +000010527 * xmlParseCtxtExternalEntity:
10528 * @ctx: the existing parsing context
10529 * @URL: the URL for the entity to load
10530 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010531 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010532 *
10533 * Parse an external general entity within an existing parsing context
10534 * An external general parsed entity is well-formed if it matches the
10535 * production labeled extParsedEnt.
10536 *
10537 * [78] extParsedEnt ::= TextDecl? content
10538 *
10539 * Returns 0 if the entity is well formed, -1 in case of args problem and
10540 * the parser error code otherwise
10541 */
10542
10543int
10544xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010545 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010546 xmlParserCtxtPtr ctxt;
10547 xmlDocPtr newDoc;
10548 xmlSAXHandlerPtr oldsax = NULL;
10549 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010550 xmlChar start[4];
10551 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010552
10553 if (ctx->depth > 40) {
10554 return(XML_ERR_ENTITY_LOOP);
10555 }
10556
Daniel Veillardcda96922001-08-21 10:56:31 +000010557 if (lst != NULL)
10558 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010559 if ((URL == NULL) && (ID == NULL))
10560 return(-1);
10561 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10562 return(-1);
10563
10564
10565 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10566 if (ctxt == NULL) return(-1);
10567 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010568 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010569 oldsax = ctxt->sax;
10570 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010571 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010572 newDoc = xmlNewDoc(BAD_CAST "1.0");
10573 if (newDoc == NULL) {
10574 xmlFreeParserCtxt(ctxt);
10575 return(-1);
10576 }
10577 if (ctx->myDoc != NULL) {
10578 newDoc->intSubset = ctx->myDoc->intSubset;
10579 newDoc->extSubset = ctx->myDoc->extSubset;
10580 }
10581 if (ctx->myDoc->URL != NULL) {
10582 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10583 }
10584 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10585 if (newDoc->children == NULL) {
10586 ctxt->sax = oldsax;
10587 xmlFreeParserCtxt(ctxt);
10588 newDoc->intSubset = NULL;
10589 newDoc->extSubset = NULL;
10590 xmlFreeDoc(newDoc);
10591 return(-1);
10592 }
10593 nodePush(ctxt, newDoc->children);
10594 if (ctx->myDoc == NULL) {
10595 ctxt->myDoc = newDoc;
10596 } else {
10597 ctxt->myDoc = ctx->myDoc;
10598 newDoc->children->doc = ctx->myDoc;
10599 }
10600
Daniel Veillard87a764e2001-06-20 17:41:10 +000010601 /*
10602 * Get the 4 first bytes and decode the charset
10603 * if enc != XML_CHAR_ENCODING_NONE
10604 * plug some encoding conversion routines.
10605 */
10606 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010607 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10608 start[0] = RAW;
10609 start[1] = NXT(1);
10610 start[2] = NXT(2);
10611 start[3] = NXT(3);
10612 enc = xmlDetectCharEncoding(start, 4);
10613 if (enc != XML_CHAR_ENCODING_NONE) {
10614 xmlSwitchEncoding(ctxt, enc);
10615 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010616 }
10617
Owen Taylor3473f882001-02-23 17:55:21 +000010618 /*
10619 * Parse a possible text declaration first
10620 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010621 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010622 xmlParseTextDecl(ctxt);
10623 }
10624
10625 /*
10626 * Doing validity checking on chunk doesn't make sense
10627 */
10628 ctxt->instate = XML_PARSER_CONTENT;
10629 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010630 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010631 ctxt->loadsubset = ctx->loadsubset;
10632 ctxt->depth = ctx->depth + 1;
10633 ctxt->replaceEntities = ctx->replaceEntities;
10634 if (ctxt->validate) {
10635 ctxt->vctxt.error = ctx->vctxt.error;
10636 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010637 } else {
10638 ctxt->vctxt.error = NULL;
10639 ctxt->vctxt.warning = NULL;
10640 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010641 ctxt->vctxt.nodeTab = NULL;
10642 ctxt->vctxt.nodeNr = 0;
10643 ctxt->vctxt.nodeMax = 0;
10644 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010645 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10646 ctxt->dict = ctx->dict;
10647 ctxt->dictNames = ctx->dictNames;
10648 ctxt->attsDefault = ctx->attsDefault;
10649 ctxt->attsSpecial = ctx->attsSpecial;
Owen Taylor3473f882001-02-23 17:55:21 +000010650
10651 xmlParseContent(ctxt);
10652
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010653 ctx->validate = ctxt->validate;
10654 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010655 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010656 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010657 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010658 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010659 }
10660 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010661 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010662 }
10663
10664 if (!ctxt->wellFormed) {
10665 if (ctxt->errNo == 0)
10666 ret = 1;
10667 else
10668 ret = ctxt->errNo;
10669 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010670 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010671 xmlNodePtr cur;
10672
10673 /*
10674 * Return the newly created nodeset after unlinking it from
10675 * they pseudo parent.
10676 */
10677 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010678 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010679 while (cur != NULL) {
10680 cur->parent = NULL;
10681 cur = cur->next;
10682 }
10683 newDoc->children->children = NULL;
10684 }
10685 ret = 0;
10686 }
10687 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010688 ctxt->dict = NULL;
10689 ctxt->attsDefault = NULL;
10690 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010691 xmlFreeParserCtxt(ctxt);
10692 newDoc->intSubset = NULL;
10693 newDoc->extSubset = NULL;
10694 xmlFreeDoc(newDoc);
10695
10696 return(ret);
10697}
10698
10699/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010700 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010701 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010702 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010703 * @sax: the SAX handler bloc (possibly NULL)
10704 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10705 * @depth: Used for loop detection, use 0
10706 * @URL: the URL for the entity to load
10707 * @ID: the System ID for the entity to load
10708 * @list: the return value for the set of parsed nodes
10709 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010710 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010711 *
10712 * Returns 0 if the entity is well formed, -1 in case of args problem and
10713 * the parser error code otherwise
10714 */
10715
Daniel Veillard7d515752003-09-26 19:12:37 +000010716static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010717xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10718 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010719 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010720 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010721 xmlParserCtxtPtr ctxt;
10722 xmlDocPtr newDoc;
10723 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010724 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010725 xmlChar start[4];
10726 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010727
10728 if (depth > 40) {
10729 return(XML_ERR_ENTITY_LOOP);
10730 }
10731
10732
10733
10734 if (list != NULL)
10735 *list = NULL;
10736 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010737 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010738 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010739 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010740
10741
10742 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010743 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010744 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010745 if (oldctxt != NULL) {
10746 ctxt->_private = oldctxt->_private;
10747 ctxt->loadsubset = oldctxt->loadsubset;
10748 ctxt->validate = oldctxt->validate;
10749 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010750 ctxt->record_info = oldctxt->record_info;
10751 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10752 ctxt->node_seq.length = oldctxt->node_seq.length;
10753 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010754 } else {
10755 /*
10756 * Doing validity checking on chunk without context
10757 * doesn't make sense
10758 */
10759 ctxt->_private = NULL;
10760 ctxt->validate = 0;
10761 ctxt->external = 2;
10762 ctxt->loadsubset = 0;
10763 }
Owen Taylor3473f882001-02-23 17:55:21 +000010764 if (sax != NULL) {
10765 oldsax = ctxt->sax;
10766 ctxt->sax = sax;
10767 if (user_data != NULL)
10768 ctxt->userData = user_data;
10769 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010770 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010771 newDoc = xmlNewDoc(BAD_CAST "1.0");
10772 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010773 ctxt->node_seq.maximum = 0;
10774 ctxt->node_seq.length = 0;
10775 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010776 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010777 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010778 }
10779 if (doc != NULL) {
10780 newDoc->intSubset = doc->intSubset;
10781 newDoc->extSubset = doc->extSubset;
10782 }
10783 if (doc->URL != NULL) {
10784 newDoc->URL = xmlStrdup(doc->URL);
10785 }
10786 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10787 if (newDoc->children == NULL) {
10788 if (sax != NULL)
10789 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010790 ctxt->node_seq.maximum = 0;
10791 ctxt->node_seq.length = 0;
10792 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010793 xmlFreeParserCtxt(ctxt);
10794 newDoc->intSubset = NULL;
10795 newDoc->extSubset = NULL;
10796 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010797 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010798 }
10799 nodePush(ctxt, newDoc->children);
10800 if (doc == NULL) {
10801 ctxt->myDoc = newDoc;
10802 } else {
10803 ctxt->myDoc = doc;
10804 newDoc->children->doc = doc;
10805 }
10806
Daniel Veillard87a764e2001-06-20 17:41:10 +000010807 /*
10808 * Get the 4 first bytes and decode the charset
10809 * if enc != XML_CHAR_ENCODING_NONE
10810 * plug some encoding conversion routines.
10811 */
10812 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010813 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10814 start[0] = RAW;
10815 start[1] = NXT(1);
10816 start[2] = NXT(2);
10817 start[3] = NXT(3);
10818 enc = xmlDetectCharEncoding(start, 4);
10819 if (enc != XML_CHAR_ENCODING_NONE) {
10820 xmlSwitchEncoding(ctxt, enc);
10821 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010822 }
10823
Owen Taylor3473f882001-02-23 17:55:21 +000010824 /*
10825 * Parse a possible text declaration first
10826 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010827 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010828 xmlParseTextDecl(ctxt);
10829 }
10830
Owen Taylor3473f882001-02-23 17:55:21 +000010831 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010832 ctxt->depth = depth;
10833
10834 xmlParseContent(ctxt);
10835
Daniel Veillard561b7f82002-03-20 21:55:57 +000010836 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010837 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010838 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010839 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010840 }
10841 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010842 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010843 }
10844
10845 if (!ctxt->wellFormed) {
10846 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010847 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010848 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010849 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010850 } else {
10851 if (list != NULL) {
10852 xmlNodePtr cur;
10853
10854 /*
10855 * Return the newly created nodeset after unlinking it from
10856 * they pseudo parent.
10857 */
10858 cur = newDoc->children->children;
10859 *list = cur;
10860 while (cur != NULL) {
10861 cur->parent = NULL;
10862 cur = cur->next;
10863 }
10864 newDoc->children->children = NULL;
10865 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010866 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010867 }
10868 if (sax != NULL)
10869 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010870 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10871 oldctxt->node_seq.length = ctxt->node_seq.length;
10872 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010873 ctxt->node_seq.maximum = 0;
10874 ctxt->node_seq.length = 0;
10875 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010876 xmlFreeParserCtxt(ctxt);
10877 newDoc->intSubset = NULL;
10878 newDoc->extSubset = NULL;
10879 xmlFreeDoc(newDoc);
10880
10881 return(ret);
10882}
10883
Daniel Veillard81273902003-09-30 00:43:48 +000010884#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010885/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010886 * xmlParseExternalEntity:
10887 * @doc: the document the chunk pertains to
10888 * @sax: the SAX handler bloc (possibly NULL)
10889 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10890 * @depth: Used for loop detection, use 0
10891 * @URL: the URL for the entity to load
10892 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010893 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010894 *
10895 * Parse an external general entity
10896 * An external general parsed entity is well-formed if it matches the
10897 * production labeled extParsedEnt.
10898 *
10899 * [78] extParsedEnt ::= TextDecl? content
10900 *
10901 * Returns 0 if the entity is well formed, -1 in case of args problem and
10902 * the parser error code otherwise
10903 */
10904
10905int
10906xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010907 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010908 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010909 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010910}
10911
10912/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010913 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010914 * @doc: the document the chunk pertains to
10915 * @sax: the SAX handler bloc (possibly NULL)
10916 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10917 * @depth: Used for loop detection, use 0
10918 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010919 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010920 *
10921 * Parse a well-balanced chunk of an XML document
10922 * called by the parser
10923 * The allowed sequence for the Well Balanced Chunk is the one defined by
10924 * the content production in the XML grammar:
10925 *
10926 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10927 *
10928 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10929 * the parser error code otherwise
10930 */
10931
10932int
10933xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010934 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010935 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10936 depth, string, lst, 0 );
10937}
Daniel Veillard81273902003-09-30 00:43:48 +000010938#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010939
10940/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010941 * xmlParseBalancedChunkMemoryInternal:
10942 * @oldctxt: the existing parsing context
10943 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10944 * @user_data: the user data field for the parser context
10945 * @lst: the return value for the set of parsed nodes
10946 *
10947 *
10948 * Parse a well-balanced chunk of an XML document
10949 * called by the parser
10950 * The allowed sequence for the Well Balanced Chunk is the one defined by
10951 * the content production in the XML grammar:
10952 *
10953 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10954 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010955 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10956 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010957 *
10958 * In case recover is set to 1, the nodelist will not be empty even if
10959 * the parsed chunk is not well balanced.
10960 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010961static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010962xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10963 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10964 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010965 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010966 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010967 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010968 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010969 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010970
10971 if (oldctxt->depth > 40) {
10972 return(XML_ERR_ENTITY_LOOP);
10973 }
10974
10975
10976 if (lst != NULL)
10977 *lst = NULL;
10978 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010979 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010980
10981 size = xmlStrlen(string);
10982
10983 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010984 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010985 if (user_data != NULL)
10986 ctxt->userData = user_data;
10987 else
10988 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010989 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10990 ctxt->dict = oldctxt->dict;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010991
10992 oldsax = ctxt->sax;
10993 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010994 xmlDetectSAX2(ctxt);
10995
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010996 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010997 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010998 newDoc = xmlNewDoc(BAD_CAST "1.0");
10999 if (newDoc == NULL) {
11000 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011001 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011002 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011003 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011004 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011005 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011006 } else {
11007 ctxt->myDoc = oldctxt->myDoc;
11008 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011009 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000011010 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000011011 BAD_CAST "pseudoroot", NULL);
11012 if (ctxt->myDoc->children == NULL) {
11013 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011014 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011015 xmlFreeParserCtxt(ctxt);
11016 if (newDoc != NULL)
11017 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000011018 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011019 }
11020 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011021 ctxt->instate = XML_PARSER_CONTENT;
11022 ctxt->depth = oldctxt->depth + 1;
11023
Daniel Veillard328f48c2002-11-15 15:24:34 +000011024 ctxt->validate = 0;
11025 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011026 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11027 /*
11028 * ID/IDREF registration will be done in xmlValidateElement below
11029 */
11030 ctxt->loadsubset |= XML_SKIP_IDS;
11031 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011032 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011033 ctxt->attsDefault = oldctxt->attsDefault;
11034 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011035
Daniel Veillard68e9e742002-11-16 15:35:11 +000011036 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011037 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011038 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011039 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011040 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011041 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011042 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011043 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011044 }
11045
11046 if (!ctxt->wellFormed) {
11047 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011048 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011049 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011050 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011051 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011052 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011053 }
11054
William M. Brack7b9154b2003-09-27 19:23:50 +000011055 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011056 xmlNodePtr cur;
11057
11058 /*
11059 * Return the newly created nodeset after unlinking it from
11060 * they pseudo parent.
11061 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011062 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011063 *lst = cur;
11064 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011065#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011066 if (oldctxt->validate && oldctxt->wellFormed &&
11067 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11068 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11069 oldctxt->myDoc, cur);
11070 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011071#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011072 cur->parent = NULL;
11073 cur = cur->next;
11074 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011075 ctxt->myDoc->children->children = NULL;
11076 }
11077 if (ctxt->myDoc != NULL) {
11078 xmlFreeNode(ctxt->myDoc->children);
11079 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011080 }
11081
11082 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011083 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011084 ctxt->attsDefault = NULL;
11085 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011086 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011087 if (newDoc != NULL)
11088 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011089
11090 return(ret);
11091}
11092
Daniel Veillard81273902003-09-30 00:43:48 +000011093#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011094/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011095 * xmlParseBalancedChunkMemoryRecover:
11096 * @doc: the document the chunk pertains to
11097 * @sax: the SAX handler bloc (possibly NULL)
11098 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11099 * @depth: Used for loop detection, use 0
11100 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11101 * @lst: the return value for the set of parsed nodes
11102 * @recover: return nodes even if the data is broken (use 0)
11103 *
11104 *
11105 * Parse a well-balanced chunk of an XML document
11106 * called by the parser
11107 * The allowed sequence for the Well Balanced Chunk is the one defined by
11108 * the content production in the XML grammar:
11109 *
11110 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11111 *
11112 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11113 * the parser error code otherwise
11114 *
11115 * In case recover is set to 1, the nodelist will not be empty even if
11116 * the parsed chunk is not well balanced.
11117 */
11118int
11119xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11120 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11121 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011122 xmlParserCtxtPtr ctxt;
11123 xmlDocPtr newDoc;
11124 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011125 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011126 int size;
11127 int ret = 0;
11128
11129 if (depth > 40) {
11130 return(XML_ERR_ENTITY_LOOP);
11131 }
11132
11133
Daniel Veillardcda96922001-08-21 10:56:31 +000011134 if (lst != NULL)
11135 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011136 if (string == NULL)
11137 return(-1);
11138
11139 size = xmlStrlen(string);
11140
11141 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11142 if (ctxt == NULL) return(-1);
11143 ctxt->userData = ctxt;
11144 if (sax != NULL) {
11145 oldsax = ctxt->sax;
11146 ctxt->sax = sax;
11147 if (user_data != NULL)
11148 ctxt->userData = user_data;
11149 }
11150 newDoc = xmlNewDoc(BAD_CAST "1.0");
11151 if (newDoc == NULL) {
11152 xmlFreeParserCtxt(ctxt);
11153 return(-1);
11154 }
11155 if (doc != NULL) {
11156 newDoc->intSubset = doc->intSubset;
11157 newDoc->extSubset = doc->extSubset;
11158 }
11159 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11160 if (newDoc->children == NULL) {
11161 if (sax != NULL)
11162 ctxt->sax = oldsax;
11163 xmlFreeParserCtxt(ctxt);
11164 newDoc->intSubset = NULL;
11165 newDoc->extSubset = NULL;
11166 xmlFreeDoc(newDoc);
11167 return(-1);
11168 }
11169 nodePush(ctxt, newDoc->children);
11170 if (doc == NULL) {
11171 ctxt->myDoc = newDoc;
11172 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011173 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011174 newDoc->children->doc = doc;
11175 }
11176 ctxt->instate = XML_PARSER_CONTENT;
11177 ctxt->depth = depth;
11178
11179 /*
11180 * Doing validity checking on chunk doesn't make sense
11181 */
11182 ctxt->validate = 0;
11183 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011184 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011185
Daniel Veillardb39bc392002-10-26 19:29:51 +000011186 if ( doc != NULL ){
11187 content = doc->children;
11188 doc->children = NULL;
11189 xmlParseContent(ctxt);
11190 doc->children = content;
11191 }
11192 else {
11193 xmlParseContent(ctxt);
11194 }
Owen Taylor3473f882001-02-23 17:55:21 +000011195 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011196 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011197 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011198 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011199 }
11200 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011201 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011202 }
11203
11204 if (!ctxt->wellFormed) {
11205 if (ctxt->errNo == 0)
11206 ret = 1;
11207 else
11208 ret = ctxt->errNo;
11209 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011210 ret = 0;
11211 }
11212
11213 if (lst != NULL && (ret == 0 || recover == 1)) {
11214 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011215
11216 /*
11217 * Return the newly created nodeset after unlinking it from
11218 * they pseudo parent.
11219 */
11220 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011221 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011222 while (cur != NULL) {
11223 cur->parent = NULL;
11224 cur = cur->next;
11225 }
11226 newDoc->children->children = NULL;
11227 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011228
Owen Taylor3473f882001-02-23 17:55:21 +000011229 if (sax != NULL)
11230 ctxt->sax = oldsax;
11231 xmlFreeParserCtxt(ctxt);
11232 newDoc->intSubset = NULL;
11233 newDoc->extSubset = NULL;
11234 xmlFreeDoc(newDoc);
11235
11236 return(ret);
11237}
11238
11239/**
11240 * xmlSAXParseEntity:
11241 * @sax: the SAX handler block
11242 * @filename: the filename
11243 *
11244 * parse an XML external entity out of context and build a tree.
11245 * It use the given SAX function block to handle the parsing callback.
11246 * If sax is NULL, fallback to the default DOM tree building routines.
11247 *
11248 * [78] extParsedEnt ::= TextDecl? content
11249 *
11250 * This correspond to a "Well Balanced" chunk
11251 *
11252 * Returns the resulting document tree
11253 */
11254
11255xmlDocPtr
11256xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11257 xmlDocPtr ret;
11258 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011259
11260 ctxt = xmlCreateFileParserCtxt(filename);
11261 if (ctxt == NULL) {
11262 return(NULL);
11263 }
11264 if (sax != NULL) {
11265 if (ctxt->sax != NULL)
11266 xmlFree(ctxt->sax);
11267 ctxt->sax = sax;
11268 ctxt->userData = NULL;
11269 }
11270
Owen Taylor3473f882001-02-23 17:55:21 +000011271 xmlParseExtParsedEnt(ctxt);
11272
11273 if (ctxt->wellFormed)
11274 ret = ctxt->myDoc;
11275 else {
11276 ret = NULL;
11277 xmlFreeDoc(ctxt->myDoc);
11278 ctxt->myDoc = NULL;
11279 }
11280 if (sax != NULL)
11281 ctxt->sax = NULL;
11282 xmlFreeParserCtxt(ctxt);
11283
11284 return(ret);
11285}
11286
11287/**
11288 * xmlParseEntity:
11289 * @filename: the filename
11290 *
11291 * parse an XML external entity out of context and build a tree.
11292 *
11293 * [78] extParsedEnt ::= TextDecl? content
11294 *
11295 * This correspond to a "Well Balanced" chunk
11296 *
11297 * Returns the resulting document tree
11298 */
11299
11300xmlDocPtr
11301xmlParseEntity(const char *filename) {
11302 return(xmlSAXParseEntity(NULL, filename));
11303}
Daniel Veillard81273902003-09-30 00:43:48 +000011304#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011305
11306/**
11307 * xmlCreateEntityParserCtxt:
11308 * @URL: the entity URL
11309 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011310 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011311 *
11312 * Create a parser context for an external entity
11313 * Automatic support for ZLIB/Compress compressed document is provided
11314 * by default if found at compile-time.
11315 *
11316 * Returns the new parser context or NULL
11317 */
11318xmlParserCtxtPtr
11319xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11320 const xmlChar *base) {
11321 xmlParserCtxtPtr ctxt;
11322 xmlParserInputPtr inputStream;
11323 char *directory = NULL;
11324 xmlChar *uri;
11325
11326 ctxt = xmlNewParserCtxt();
11327 if (ctxt == NULL) {
11328 return(NULL);
11329 }
11330
11331 uri = xmlBuildURI(URL, base);
11332
11333 if (uri == NULL) {
11334 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11335 if (inputStream == NULL) {
11336 xmlFreeParserCtxt(ctxt);
11337 return(NULL);
11338 }
11339
11340 inputPush(ctxt, inputStream);
11341
11342 if ((ctxt->directory == NULL) && (directory == NULL))
11343 directory = xmlParserGetDirectory((char *)URL);
11344 if ((ctxt->directory == NULL) && (directory != NULL))
11345 ctxt->directory = directory;
11346 } else {
11347 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11348 if (inputStream == NULL) {
11349 xmlFree(uri);
11350 xmlFreeParserCtxt(ctxt);
11351 return(NULL);
11352 }
11353
11354 inputPush(ctxt, inputStream);
11355
11356 if ((ctxt->directory == NULL) && (directory == NULL))
11357 directory = xmlParserGetDirectory((char *)uri);
11358 if ((ctxt->directory == NULL) && (directory != NULL))
11359 ctxt->directory = directory;
11360 xmlFree(uri);
11361 }
Owen Taylor3473f882001-02-23 17:55:21 +000011362 return(ctxt);
11363}
11364
11365/************************************************************************
11366 * *
11367 * Front ends when parsing from a file *
11368 * *
11369 ************************************************************************/
11370
11371/**
11372 * xmlCreateFileParserCtxt:
11373 * @filename: the filename
11374 *
11375 * Create a parser context for a file content.
11376 * Automatic support for ZLIB/Compress compressed document is provided
11377 * by default if found at compile-time.
11378 *
11379 * Returns the new parser context or NULL
11380 */
11381xmlParserCtxtPtr
11382xmlCreateFileParserCtxt(const char *filename)
11383{
11384 xmlParserCtxtPtr ctxt;
11385 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011386 char *directory = NULL;
11387
Owen Taylor3473f882001-02-23 17:55:21 +000011388 ctxt = xmlNewParserCtxt();
11389 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011390 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011391 return(NULL);
11392 }
11393
Igor Zlatkovicce076162003-02-23 13:39:39 +000011394
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011395 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011396 if (inputStream == NULL) {
11397 xmlFreeParserCtxt(ctxt);
11398 return(NULL);
11399 }
11400
Owen Taylor3473f882001-02-23 17:55:21 +000011401 inputPush(ctxt, inputStream);
11402 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011403 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011404 if ((ctxt->directory == NULL) && (directory != NULL))
11405 ctxt->directory = directory;
11406
11407 return(ctxt);
11408}
11409
Daniel Veillard81273902003-09-30 00:43:48 +000011410#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011411/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011412 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011413 * @sax: the SAX handler block
11414 * @filename: the filename
11415 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11416 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011417 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011418 *
11419 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11420 * compressed document is provided by default if found at compile-time.
11421 * It use the given SAX function block to handle the parsing callback.
11422 * If sax is NULL, fallback to the default DOM tree building routines.
11423 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011424 * User data (void *) is stored within the parser context in the
11425 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011426 *
Owen Taylor3473f882001-02-23 17:55:21 +000011427 * Returns the resulting document tree
11428 */
11429
11430xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011431xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11432 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011433 xmlDocPtr ret;
11434 xmlParserCtxtPtr ctxt;
11435 char *directory = NULL;
11436
Daniel Veillard635ef722001-10-29 11:48:19 +000011437 xmlInitParser();
11438
Owen Taylor3473f882001-02-23 17:55:21 +000011439 ctxt = xmlCreateFileParserCtxt(filename);
11440 if (ctxt == NULL) {
11441 return(NULL);
11442 }
11443 if (sax != NULL) {
11444 if (ctxt->sax != NULL)
11445 xmlFree(ctxt->sax);
11446 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011447 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011448 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011449 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011450 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011451 }
Owen Taylor3473f882001-02-23 17:55:21 +000011452
11453 if ((ctxt->directory == NULL) && (directory == NULL))
11454 directory = xmlParserGetDirectory(filename);
11455 if ((ctxt->directory == NULL) && (directory != NULL))
11456 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11457
Daniel Veillarddad3f682002-11-17 16:47:27 +000011458 ctxt->recovery = recovery;
11459
Owen Taylor3473f882001-02-23 17:55:21 +000011460 xmlParseDocument(ctxt);
11461
William M. Brackc07329e2003-09-08 01:57:30 +000011462 if ((ctxt->wellFormed) || recovery) {
11463 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011464 if (ret != NULL) {
11465 if (ctxt->input->buf->compressed > 0)
11466 ret->compression = 9;
11467 else
11468 ret->compression = ctxt->input->buf->compressed;
11469 }
William M. Brackc07329e2003-09-08 01:57:30 +000011470 }
Owen Taylor3473f882001-02-23 17:55:21 +000011471 else {
11472 ret = NULL;
11473 xmlFreeDoc(ctxt->myDoc);
11474 ctxt->myDoc = NULL;
11475 }
11476 if (sax != NULL)
11477 ctxt->sax = NULL;
11478 xmlFreeParserCtxt(ctxt);
11479
11480 return(ret);
11481}
11482
11483/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011484 * xmlSAXParseFile:
11485 * @sax: the SAX handler block
11486 * @filename: the filename
11487 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11488 * documents
11489 *
11490 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11491 * compressed document is provided by default if found at compile-time.
11492 * It use the given SAX function block to handle the parsing callback.
11493 * If sax is NULL, fallback to the default DOM tree building routines.
11494 *
11495 * Returns the resulting document tree
11496 */
11497
11498xmlDocPtr
11499xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11500 int recovery) {
11501 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11502}
11503
11504/**
Owen Taylor3473f882001-02-23 17:55:21 +000011505 * xmlRecoverDoc:
11506 * @cur: a pointer to an array of xmlChar
11507 *
11508 * parse an XML in-memory document and build a tree.
11509 * In the case the document is not Well Formed, a tree is built anyway
11510 *
11511 * Returns the resulting document tree
11512 */
11513
11514xmlDocPtr
11515xmlRecoverDoc(xmlChar *cur) {
11516 return(xmlSAXParseDoc(NULL, cur, 1));
11517}
11518
11519/**
11520 * xmlParseFile:
11521 * @filename: the filename
11522 *
11523 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11524 * compressed document is provided by default if found at compile-time.
11525 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011526 * Returns the resulting document tree if the file was wellformed,
11527 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011528 */
11529
11530xmlDocPtr
11531xmlParseFile(const char *filename) {
11532 return(xmlSAXParseFile(NULL, filename, 0));
11533}
11534
11535/**
11536 * xmlRecoverFile:
11537 * @filename: the filename
11538 *
11539 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11540 * compressed document is provided by default if found at compile-time.
11541 * In the case the document is not Well Formed, a tree is built anyway
11542 *
11543 * Returns the resulting document tree
11544 */
11545
11546xmlDocPtr
11547xmlRecoverFile(const char *filename) {
11548 return(xmlSAXParseFile(NULL, filename, 1));
11549}
11550
11551
11552/**
11553 * xmlSetupParserForBuffer:
11554 * @ctxt: an XML parser context
11555 * @buffer: a xmlChar * buffer
11556 * @filename: a file name
11557 *
11558 * Setup the parser context to parse a new buffer; Clears any prior
11559 * contents from the parser context. The buffer parameter must not be
11560 * NULL, but the filename parameter can be
11561 */
11562void
11563xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11564 const char* filename)
11565{
11566 xmlParserInputPtr input;
11567
11568 input = xmlNewInputStream(ctxt);
11569 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011570 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011571 xmlFree(ctxt);
11572 return;
11573 }
11574
11575 xmlClearParserCtxt(ctxt);
11576 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011577 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011578 input->base = buffer;
11579 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011580 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011581 inputPush(ctxt, input);
11582}
11583
11584/**
11585 * xmlSAXUserParseFile:
11586 * @sax: a SAX handler
11587 * @user_data: The user data returned on SAX callbacks
11588 * @filename: a file name
11589 *
11590 * parse an XML file and call the given SAX handler routines.
11591 * Automatic support for ZLIB/Compress compressed document is provided
11592 *
11593 * Returns 0 in case of success or a error number otherwise
11594 */
11595int
11596xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11597 const char *filename) {
11598 int ret = 0;
11599 xmlParserCtxtPtr ctxt;
11600
11601 ctxt = xmlCreateFileParserCtxt(filename);
11602 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011603#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011604 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011605#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011606 xmlFree(ctxt->sax);
11607 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011608 xmlDetectSAX2(ctxt);
11609
Owen Taylor3473f882001-02-23 17:55:21 +000011610 if (user_data != NULL)
11611 ctxt->userData = user_data;
11612
11613 xmlParseDocument(ctxt);
11614
11615 if (ctxt->wellFormed)
11616 ret = 0;
11617 else {
11618 if (ctxt->errNo != 0)
11619 ret = ctxt->errNo;
11620 else
11621 ret = -1;
11622 }
11623 if (sax != NULL)
11624 ctxt->sax = NULL;
11625 xmlFreeParserCtxt(ctxt);
11626
11627 return ret;
11628}
Daniel Veillard81273902003-09-30 00:43:48 +000011629#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011630
11631/************************************************************************
11632 * *
11633 * Front ends when parsing from memory *
11634 * *
11635 ************************************************************************/
11636
11637/**
11638 * xmlCreateMemoryParserCtxt:
11639 * @buffer: a pointer to a char array
11640 * @size: the size of the array
11641 *
11642 * Create a parser context for an XML in-memory document.
11643 *
11644 * Returns the new parser context or NULL
11645 */
11646xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011647xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011648 xmlParserCtxtPtr ctxt;
11649 xmlParserInputPtr input;
11650 xmlParserInputBufferPtr buf;
11651
11652 if (buffer == NULL)
11653 return(NULL);
11654 if (size <= 0)
11655 return(NULL);
11656
11657 ctxt = xmlNewParserCtxt();
11658 if (ctxt == NULL)
11659 return(NULL);
11660
Daniel Veillard53350552003-09-18 13:35:51 +000011661 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011662 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011663 if (buf == NULL) {
11664 xmlFreeParserCtxt(ctxt);
11665 return(NULL);
11666 }
Owen Taylor3473f882001-02-23 17:55:21 +000011667
11668 input = xmlNewInputStream(ctxt);
11669 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011670 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011671 xmlFreeParserCtxt(ctxt);
11672 return(NULL);
11673 }
11674
11675 input->filename = NULL;
11676 input->buf = buf;
11677 input->base = input->buf->buffer->content;
11678 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011679 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011680
11681 inputPush(ctxt, input);
11682 return(ctxt);
11683}
11684
Daniel Veillard81273902003-09-30 00:43:48 +000011685#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011686/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011687 * xmlSAXParseMemoryWithData:
11688 * @sax: the SAX handler block
11689 * @buffer: an pointer to a char array
11690 * @size: the size of the array
11691 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11692 * documents
11693 * @data: the userdata
11694 *
11695 * parse an XML in-memory block and use the given SAX function block
11696 * to handle the parsing callback. If sax is NULL, fallback to the default
11697 * DOM tree building routines.
11698 *
11699 * User data (void *) is stored within the parser context in the
11700 * context's _private member, so it is available nearly everywhere in libxml
11701 *
11702 * Returns the resulting document tree
11703 */
11704
11705xmlDocPtr
11706xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11707 int size, int recovery, void *data) {
11708 xmlDocPtr ret;
11709 xmlParserCtxtPtr ctxt;
11710
11711 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11712 if (ctxt == NULL) return(NULL);
11713 if (sax != NULL) {
11714 if (ctxt->sax != NULL)
11715 xmlFree(ctxt->sax);
11716 ctxt->sax = sax;
11717 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011718 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011719 if (data!=NULL) {
11720 ctxt->_private=data;
11721 }
11722
Daniel Veillardadba5f12003-04-04 16:09:01 +000011723 ctxt->recovery = recovery;
11724
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011725 xmlParseDocument(ctxt);
11726
11727 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11728 else {
11729 ret = NULL;
11730 xmlFreeDoc(ctxt->myDoc);
11731 ctxt->myDoc = NULL;
11732 }
11733 if (sax != NULL)
11734 ctxt->sax = NULL;
11735 xmlFreeParserCtxt(ctxt);
11736
11737 return(ret);
11738}
11739
11740/**
Owen Taylor3473f882001-02-23 17:55:21 +000011741 * xmlSAXParseMemory:
11742 * @sax: the SAX handler block
11743 * @buffer: an pointer to a char array
11744 * @size: the size of the array
11745 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11746 * documents
11747 *
11748 * parse an XML in-memory block and use the given SAX function block
11749 * to handle the parsing callback. If sax is NULL, fallback to the default
11750 * DOM tree building routines.
11751 *
11752 * Returns the resulting document tree
11753 */
11754xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011755xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11756 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011757 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011758}
11759
11760/**
11761 * xmlParseMemory:
11762 * @buffer: an pointer to a char array
11763 * @size: the size of the array
11764 *
11765 * parse an XML in-memory block and build a tree.
11766 *
11767 * Returns the resulting document tree
11768 */
11769
Daniel Veillard50822cb2001-07-26 20:05:51 +000011770xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011771 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11772}
11773
11774/**
11775 * xmlRecoverMemory:
11776 * @buffer: an pointer to a char array
11777 * @size: the size of the array
11778 *
11779 * parse an XML in-memory block and build a tree.
11780 * In the case the document is not Well Formed, a tree is built anyway
11781 *
11782 * Returns the resulting document tree
11783 */
11784
Daniel Veillard50822cb2001-07-26 20:05:51 +000011785xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011786 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11787}
11788
11789/**
11790 * xmlSAXUserParseMemory:
11791 * @sax: a SAX handler
11792 * @user_data: The user data returned on SAX callbacks
11793 * @buffer: an in-memory XML document input
11794 * @size: the length of the XML document in bytes
11795 *
11796 * A better SAX parsing routine.
11797 * parse an XML in-memory buffer and call the given SAX handler routines.
11798 *
11799 * Returns 0 in case of success or a error number otherwise
11800 */
11801int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011802 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011803 int ret = 0;
11804 xmlParserCtxtPtr ctxt;
11805 xmlSAXHandlerPtr oldsax = NULL;
11806
Daniel Veillard9e923512002-08-14 08:48:52 +000011807 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011808 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11809 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011810 oldsax = ctxt->sax;
11811 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011812 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011813 if (user_data != NULL)
11814 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011815
11816 xmlParseDocument(ctxt);
11817
11818 if (ctxt->wellFormed)
11819 ret = 0;
11820 else {
11821 if (ctxt->errNo != 0)
11822 ret = ctxt->errNo;
11823 else
11824 ret = -1;
11825 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011826 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011827 xmlFreeParserCtxt(ctxt);
11828
11829 return ret;
11830}
Daniel Veillard81273902003-09-30 00:43:48 +000011831#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011832
11833/**
11834 * xmlCreateDocParserCtxt:
11835 * @cur: a pointer to an array of xmlChar
11836 *
11837 * Creates a parser context for an XML in-memory document.
11838 *
11839 * Returns the new parser context or NULL
11840 */
11841xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011842xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011843 int len;
11844
11845 if (cur == NULL)
11846 return(NULL);
11847 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011848 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011849}
11850
Daniel Veillard81273902003-09-30 00:43:48 +000011851#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011852/**
11853 * xmlSAXParseDoc:
11854 * @sax: the SAX handler block
11855 * @cur: a pointer to an array of xmlChar
11856 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11857 * documents
11858 *
11859 * parse an XML in-memory document and build a tree.
11860 * It use the given SAX function block to handle the parsing callback.
11861 * If sax is NULL, fallback to the default DOM tree building routines.
11862 *
11863 * Returns the resulting document tree
11864 */
11865
11866xmlDocPtr
11867xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11868 xmlDocPtr ret;
11869 xmlParserCtxtPtr ctxt;
11870
11871 if (cur == NULL) return(NULL);
11872
11873
11874 ctxt = xmlCreateDocParserCtxt(cur);
11875 if (ctxt == NULL) return(NULL);
11876 if (sax != NULL) {
11877 ctxt->sax = sax;
11878 ctxt->userData = NULL;
11879 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011880 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011881
11882 xmlParseDocument(ctxt);
11883 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11884 else {
11885 ret = NULL;
11886 xmlFreeDoc(ctxt->myDoc);
11887 ctxt->myDoc = NULL;
11888 }
11889 if (sax != NULL)
11890 ctxt->sax = NULL;
11891 xmlFreeParserCtxt(ctxt);
11892
11893 return(ret);
11894}
11895
11896/**
11897 * xmlParseDoc:
11898 * @cur: a pointer to an array of xmlChar
11899 *
11900 * parse an XML in-memory document and build a tree.
11901 *
11902 * Returns the resulting document tree
11903 */
11904
11905xmlDocPtr
11906xmlParseDoc(xmlChar *cur) {
11907 return(xmlSAXParseDoc(NULL, cur, 0));
11908}
Daniel Veillard81273902003-09-30 00:43:48 +000011909#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011910
Daniel Veillard81273902003-09-30 00:43:48 +000011911#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011912/************************************************************************
11913 * *
11914 * Specific function to keep track of entities references *
11915 * and used by the XSLT debugger *
11916 * *
11917 ************************************************************************/
11918
11919static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11920
11921/**
11922 * xmlAddEntityReference:
11923 * @ent : A valid entity
11924 * @firstNode : A valid first node for children of entity
11925 * @lastNode : A valid last node of children entity
11926 *
11927 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11928 */
11929static void
11930xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11931 xmlNodePtr lastNode)
11932{
11933 if (xmlEntityRefFunc != NULL) {
11934 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11935 }
11936}
11937
11938
11939/**
11940 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011941 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011942 *
11943 * Set the function to call call back when a xml reference has been made
11944 */
11945void
11946xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11947{
11948 xmlEntityRefFunc = func;
11949}
Daniel Veillard81273902003-09-30 00:43:48 +000011950#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011951
11952/************************************************************************
11953 * *
11954 * Miscellaneous *
11955 * *
11956 ************************************************************************/
11957
11958#ifdef LIBXML_XPATH_ENABLED
11959#include <libxml/xpath.h>
11960#endif
11961
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011962extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011963static int xmlParserInitialized = 0;
11964
11965/**
11966 * xmlInitParser:
11967 *
11968 * Initialization function for the XML parser.
11969 * This is not reentrant. Call once before processing in case of
11970 * use in multithreaded programs.
11971 */
11972
11973void
11974xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011975 if (xmlParserInitialized != 0)
11976 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011977
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011978 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11979 (xmlGenericError == NULL))
11980 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011981 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011982 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011983 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011984 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011985 xmlDefaultSAXHandlerInit();
11986 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011987#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011988 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011989#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011990#ifdef LIBXML_HTML_ENABLED
11991 htmlInitAutoClose();
11992 htmlDefaultSAXHandlerInit();
11993#endif
11994#ifdef LIBXML_XPATH_ENABLED
11995 xmlXPathInit();
11996#endif
11997 xmlParserInitialized = 1;
11998}
11999
12000/**
12001 * xmlCleanupParser:
12002 *
12003 * Cleanup function for the XML parser. It tries to reclaim all
12004 * parsing related global memory allocated for the parser processing.
12005 * It doesn't deallocate any document related memory. Calling this
12006 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000012007 * One should call xmlCleanupParser() only when the process has
12008 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012009 */
12010
12011void
12012xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012013 if (!xmlParserInitialized)
12014 return;
12015
Owen Taylor3473f882001-02-23 17:55:21 +000012016 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012017#ifdef LIBXML_CATALOG_ENABLED
12018 xmlCatalogCleanup();
12019#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000012020 xmlCleanupInputCallbacks();
12021#ifdef LIBXML_OUTPUT_ENABLED
12022 xmlCleanupOutputCallbacks();
12023#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000012024 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012025 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012026 xmlResetLastError();
Daniel Veillardd0463562001-10-13 09:15:48 +000012027 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012028}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012029
12030/************************************************************************
12031 * *
12032 * New set (2.6.0) of simpler and more flexible APIs *
12033 * *
12034 ************************************************************************/
12035
12036/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012037 * DICT_FREE:
12038 * @str: a string
12039 *
12040 * Free a string if it is not owned by the "dict" dictionnary in the
12041 * current scope
12042 */
12043#define DICT_FREE(str) \
12044 if ((str) && ((!dict) || \
12045 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12046 xmlFree((char *)(str));
12047
12048/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012049 * xmlCtxtReset:
12050 * @ctxt: an XML parser context
12051 *
12052 * Reset a parser context
12053 */
12054void
12055xmlCtxtReset(xmlParserCtxtPtr ctxt)
12056{
12057 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012058 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012059
12060 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12061 xmlFreeInputStream(input);
12062 }
12063 ctxt->inputNr = 0;
12064 ctxt->input = NULL;
12065
12066 ctxt->spaceNr = 0;
12067 ctxt->spaceTab[0] = -1;
12068 ctxt->space = &ctxt->spaceTab[0];
12069
12070
12071 ctxt->nodeNr = 0;
12072 ctxt->node = NULL;
12073
12074 ctxt->nameNr = 0;
12075 ctxt->name = NULL;
12076
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012077 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012078 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012079 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012080 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012081 DICT_FREE(ctxt->directory);
12082 ctxt->directory = NULL;
12083 DICT_FREE(ctxt->extSubURI);
12084 ctxt->extSubURI = NULL;
12085 DICT_FREE(ctxt->extSubSystem);
12086 ctxt->extSubSystem = NULL;
12087 if (ctxt->myDoc != NULL)
12088 xmlFreeDoc(ctxt->myDoc);
12089 ctxt->myDoc = NULL;
12090
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012091 ctxt->standalone = -1;
12092 ctxt->hasExternalSubset = 0;
12093 ctxt->hasPErefs = 0;
12094 ctxt->html = 0;
12095 ctxt->external = 0;
12096 ctxt->instate = XML_PARSER_START;
12097 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012098
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012099 ctxt->wellFormed = 1;
12100 ctxt->nsWellFormed = 1;
12101 ctxt->valid = 1;
12102 ctxt->vctxt.userData = ctxt;
12103 ctxt->vctxt.error = xmlParserValidityError;
12104 ctxt->vctxt.warning = xmlParserValidityWarning;
12105 ctxt->record_info = 0;
12106 ctxt->nbChars = 0;
12107 ctxt->checkIndex = 0;
12108 ctxt->inSubset = 0;
12109 ctxt->errNo = XML_ERR_OK;
12110 ctxt->depth = 0;
12111 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12112 ctxt->catalogs = NULL;
12113 xmlInitNodeInfoSeq(&ctxt->node_seq);
12114
12115 if (ctxt->attsDefault != NULL) {
12116 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12117 ctxt->attsDefault = NULL;
12118 }
12119 if (ctxt->attsSpecial != NULL) {
12120 xmlHashFree(ctxt->attsSpecial, NULL);
12121 ctxt->attsSpecial = NULL;
12122 }
12123
Daniel Veillard4432df22003-09-28 18:58:27 +000012124#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012125 if (ctxt->catalogs != NULL)
12126 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012127#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012128 if (ctxt->lastError.code != XML_ERR_OK)
12129 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012130}
12131
12132/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012133 * xmlCtxtResetPush:
12134 * @ctxt: an XML parser context
12135 * @chunk: a pointer to an array of chars
12136 * @size: number of chars in the array
12137 * @filename: an optional file name or URI
12138 * @encoding: the document encoding, or NULL
12139 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012140 * Reset a push parser context
12141 *
12142 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012143 */
12144int
12145xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12146 int size, const char *filename, const char *encoding)
12147{
12148 xmlParserInputPtr inputStream;
12149 xmlParserInputBufferPtr buf;
12150 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12151
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012152 if (ctxt == NULL)
12153 return(1);
12154
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012155 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12156 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12157
12158 buf = xmlAllocParserInputBuffer(enc);
12159 if (buf == NULL)
12160 return(1);
12161
12162 if (ctxt == NULL) {
12163 xmlFreeParserInputBuffer(buf);
12164 return(1);
12165 }
12166
12167 xmlCtxtReset(ctxt);
12168
12169 if (ctxt->pushTab == NULL) {
12170 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12171 sizeof(xmlChar *));
12172 if (ctxt->pushTab == NULL) {
12173 xmlErrMemory(ctxt, NULL);
12174 xmlFreeParserInputBuffer(buf);
12175 return(1);
12176 }
12177 }
12178
12179 if (filename == NULL) {
12180 ctxt->directory = NULL;
12181 } else {
12182 ctxt->directory = xmlParserGetDirectory(filename);
12183 }
12184
12185 inputStream = xmlNewInputStream(ctxt);
12186 if (inputStream == NULL) {
12187 xmlFreeParserInputBuffer(buf);
12188 return(1);
12189 }
12190
12191 if (filename == NULL)
12192 inputStream->filename = NULL;
12193 else
12194 inputStream->filename = (char *)
12195 xmlCanonicPath((const xmlChar *) filename);
12196 inputStream->buf = buf;
12197 inputStream->base = inputStream->buf->buffer->content;
12198 inputStream->cur = inputStream->buf->buffer->content;
12199 inputStream->end =
12200 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12201
12202 inputPush(ctxt, inputStream);
12203
12204 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12205 (ctxt->input->buf != NULL)) {
12206 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12207 int cur = ctxt->input->cur - ctxt->input->base;
12208
12209 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12210
12211 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12212 ctxt->input->cur = ctxt->input->base + cur;
12213 ctxt->input->end =
12214 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12215 use];
12216#ifdef DEBUG_PUSH
12217 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12218#endif
12219 }
12220
12221 if (encoding != NULL) {
12222 xmlCharEncodingHandlerPtr hdlr;
12223
12224 hdlr = xmlFindCharEncodingHandler(encoding);
12225 if (hdlr != NULL) {
12226 xmlSwitchToEncoding(ctxt, hdlr);
12227 } else {
12228 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12229 "Unsupported encoding %s\n", BAD_CAST encoding);
12230 }
12231 } else if (enc != XML_CHAR_ENCODING_NONE) {
12232 xmlSwitchEncoding(ctxt, enc);
12233 }
12234
12235 return(0);
12236}
12237
12238/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012239 * xmlCtxtUseOptions:
12240 * @ctxt: an XML parser context
12241 * @options: a combination of xmlParserOption(s)
12242 *
12243 * Applies the options to the parser context
12244 *
12245 * Returns 0 in case of success, the set of unknown or unimplemented options
12246 * in case of error.
12247 */
12248int
12249xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12250{
12251 if (options & XML_PARSE_RECOVER) {
12252 ctxt->recovery = 1;
12253 options -= XML_PARSE_RECOVER;
12254 } else
12255 ctxt->recovery = 0;
12256 if (options & XML_PARSE_DTDLOAD) {
12257 ctxt->loadsubset = XML_DETECT_IDS;
12258 options -= XML_PARSE_DTDLOAD;
12259 } else
12260 ctxt->loadsubset = 0;
12261 if (options & XML_PARSE_DTDATTR) {
12262 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12263 options -= XML_PARSE_DTDATTR;
12264 }
12265 if (options & XML_PARSE_NOENT) {
12266 ctxt->replaceEntities = 1;
12267 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12268 options -= XML_PARSE_NOENT;
12269 } else
12270 ctxt->replaceEntities = 0;
12271 if (options & XML_PARSE_NOWARNING) {
12272 ctxt->sax->warning = NULL;
12273 options -= XML_PARSE_NOWARNING;
12274 }
12275 if (options & XML_PARSE_NOERROR) {
12276 ctxt->sax->error = NULL;
12277 ctxt->sax->fatalError = NULL;
12278 options -= XML_PARSE_NOERROR;
12279 }
12280 if (options & XML_PARSE_PEDANTIC) {
12281 ctxt->pedantic = 1;
12282 options -= XML_PARSE_PEDANTIC;
12283 } else
12284 ctxt->pedantic = 0;
12285 if (options & XML_PARSE_NOBLANKS) {
12286 ctxt->keepBlanks = 0;
12287 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12288 options -= XML_PARSE_NOBLANKS;
12289 } else
12290 ctxt->keepBlanks = 1;
12291 if (options & XML_PARSE_DTDVALID) {
12292 ctxt->validate = 1;
12293 if (options & XML_PARSE_NOWARNING)
12294 ctxt->vctxt.warning = NULL;
12295 if (options & XML_PARSE_NOERROR)
12296 ctxt->vctxt.error = NULL;
12297 options -= XML_PARSE_DTDVALID;
12298 } else
12299 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012300#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012301 if (options & XML_PARSE_SAX1) {
12302 ctxt->sax->startElement = xmlSAX2StartElement;
12303 ctxt->sax->endElement = xmlSAX2EndElement;
12304 ctxt->sax->startElementNs = NULL;
12305 ctxt->sax->endElementNs = NULL;
12306 ctxt->sax->initialized = 1;
12307 options -= XML_PARSE_SAX1;
12308 }
Daniel Veillard81273902003-09-30 00:43:48 +000012309#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012310 if (options & XML_PARSE_NODICT) {
12311 ctxt->dictNames = 0;
12312 options -= XML_PARSE_NODICT;
12313 } else {
12314 ctxt->dictNames = 1;
12315 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012316 if (options & XML_PARSE_NOCDATA) {
12317 ctxt->sax->cdataBlock = NULL;
12318 options -= XML_PARSE_NOCDATA;
12319 }
12320 if (options & XML_PARSE_NSCLEAN) {
12321 ctxt->options |= XML_PARSE_NSCLEAN;
12322 options -= XML_PARSE_NSCLEAN;
12323 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012324 return (options);
12325}
12326
12327/**
12328 * xmlDoRead:
12329 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012330 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012331 * @encoding: the document encoding, or NULL
12332 * @options: a combination of xmlParserOption(s)
12333 * @reuse: keep the context for reuse
12334 *
12335 * Common front-end for the xmlRead functions
12336 *
12337 * Returns the resulting document tree or NULL
12338 */
12339static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012340xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12341 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012342{
12343 xmlDocPtr ret;
12344
12345 xmlCtxtUseOptions(ctxt, options);
12346 if (encoding != NULL) {
12347 xmlCharEncodingHandlerPtr hdlr;
12348
12349 hdlr = xmlFindCharEncodingHandler(encoding);
12350 if (hdlr != NULL)
12351 xmlSwitchToEncoding(ctxt, hdlr);
12352 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012353 if ((URL != NULL) && (ctxt->input != NULL) &&
12354 (ctxt->input->filename == NULL))
12355 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012356 xmlParseDocument(ctxt);
12357 if ((ctxt->wellFormed) || ctxt->recovery)
12358 ret = ctxt->myDoc;
12359 else {
12360 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012361 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012362 if ((ctxt->dictNames) &&
12363 (ctxt->myDoc->dict == ctxt->dict))
12364 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012365 xmlFreeDoc(ctxt->myDoc);
12366 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012367 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012368 ctxt->myDoc = NULL;
12369 if (!reuse) {
12370 if ((ctxt->dictNames) &&
12371 (ret != NULL) &&
12372 (ret->dict == ctxt->dict))
12373 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012374 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012375 } else {
12376 /* Must duplicate the reference to the dictionary */
12377 if ((ctxt->dictNames) &&
12378 (ret != NULL) &&
12379 (ret->dict == ctxt->dict))
12380 xmlDictReference(ctxt->dict);
12381 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012382
12383 return (ret);
12384}
12385
12386/**
12387 * xmlReadDoc:
12388 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012389 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012390 * @encoding: the document encoding, or NULL
12391 * @options: a combination of xmlParserOption(s)
12392 *
12393 * parse an XML in-memory document and build a tree.
12394 *
12395 * Returns the resulting document tree
12396 */
12397xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012398xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012399{
12400 xmlParserCtxtPtr ctxt;
12401
12402 if (cur == NULL)
12403 return (NULL);
12404
12405 ctxt = xmlCreateDocParserCtxt(cur);
12406 if (ctxt == NULL)
12407 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012408 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012409}
12410
12411/**
12412 * xmlReadFile:
12413 * @filename: a file or URL
12414 * @encoding: the document encoding, or NULL
12415 * @options: a combination of xmlParserOption(s)
12416 *
12417 * parse an XML file from the filesystem or the network.
12418 *
12419 * Returns the resulting document tree
12420 */
12421xmlDocPtr
12422xmlReadFile(const char *filename, const char *encoding, int options)
12423{
12424 xmlParserCtxtPtr ctxt;
12425
12426 ctxt = xmlCreateFileParserCtxt(filename);
12427 if (ctxt == NULL)
12428 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012429 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012430}
12431
12432/**
12433 * xmlReadMemory:
12434 * @buffer: a pointer to a char array
12435 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012436 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012437 * @encoding: the document encoding, or NULL
12438 * @options: a combination of xmlParserOption(s)
12439 *
12440 * parse an XML in-memory document and build a tree.
12441 *
12442 * Returns the resulting document tree
12443 */
12444xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012445xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012446{
12447 xmlParserCtxtPtr ctxt;
12448
12449 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12450 if (ctxt == NULL)
12451 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012452 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012453}
12454
12455/**
12456 * xmlReadFd:
12457 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012458 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012459 * @encoding: the document encoding, or NULL
12460 * @options: a combination of xmlParserOption(s)
12461 *
12462 * parse an XML from a file descriptor and build a tree.
12463 *
12464 * Returns the resulting document tree
12465 */
12466xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012467xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012468{
12469 xmlParserCtxtPtr ctxt;
12470 xmlParserInputBufferPtr input;
12471 xmlParserInputPtr stream;
12472
12473 if (fd < 0)
12474 return (NULL);
12475
12476 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12477 if (input == NULL)
12478 return (NULL);
12479 ctxt = xmlNewParserCtxt();
12480 if (ctxt == NULL) {
12481 xmlFreeParserInputBuffer(input);
12482 return (NULL);
12483 }
12484 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12485 if (stream == NULL) {
12486 xmlFreeParserInputBuffer(input);
12487 xmlFreeParserCtxt(ctxt);
12488 return (NULL);
12489 }
12490 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012491 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012492}
12493
12494/**
12495 * xmlReadIO:
12496 * @ioread: an I/O read function
12497 * @ioclose: an I/O close function
12498 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012499 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012500 * @encoding: the document encoding, or NULL
12501 * @options: a combination of xmlParserOption(s)
12502 *
12503 * parse an XML document from I/O functions and source and build a tree.
12504 *
12505 * Returns the resulting document tree
12506 */
12507xmlDocPtr
12508xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012509 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012510{
12511 xmlParserCtxtPtr ctxt;
12512 xmlParserInputBufferPtr input;
12513 xmlParserInputPtr stream;
12514
12515 if (ioread == NULL)
12516 return (NULL);
12517
12518 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12519 XML_CHAR_ENCODING_NONE);
12520 if (input == NULL)
12521 return (NULL);
12522 ctxt = xmlNewParserCtxt();
12523 if (ctxt == NULL) {
12524 xmlFreeParserInputBuffer(input);
12525 return (NULL);
12526 }
12527 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12528 if (stream == NULL) {
12529 xmlFreeParserInputBuffer(input);
12530 xmlFreeParserCtxt(ctxt);
12531 return (NULL);
12532 }
12533 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012534 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012535}
12536
12537/**
12538 * xmlCtxtReadDoc:
12539 * @ctxt: an XML parser context
12540 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012541 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012542 * @encoding: the document encoding, or NULL
12543 * @options: a combination of xmlParserOption(s)
12544 *
12545 * parse an XML in-memory document and build a tree.
12546 * This reuses the existing @ctxt parser context
12547 *
12548 * Returns the resulting document tree
12549 */
12550xmlDocPtr
12551xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012552 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012553{
12554 xmlParserInputPtr stream;
12555
12556 if (cur == NULL)
12557 return (NULL);
12558 if (ctxt == NULL)
12559 return (NULL);
12560
12561 xmlCtxtReset(ctxt);
12562
12563 stream = xmlNewStringInputStream(ctxt, cur);
12564 if (stream == NULL) {
12565 return (NULL);
12566 }
12567 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012568 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012569}
12570
12571/**
12572 * xmlCtxtReadFile:
12573 * @ctxt: an XML parser context
12574 * @filename: a file or URL
12575 * @encoding: the document encoding, or NULL
12576 * @options: a combination of xmlParserOption(s)
12577 *
12578 * parse an XML file from the filesystem or the network.
12579 * This reuses the existing @ctxt parser context
12580 *
12581 * Returns the resulting document tree
12582 */
12583xmlDocPtr
12584xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12585 const char *encoding, int options)
12586{
12587 xmlParserInputPtr stream;
12588
12589 if (filename == NULL)
12590 return (NULL);
12591 if (ctxt == NULL)
12592 return (NULL);
12593
12594 xmlCtxtReset(ctxt);
12595
12596 stream = xmlNewInputFromFile(ctxt, filename);
12597 if (stream == NULL) {
12598 return (NULL);
12599 }
12600 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012601 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012602}
12603
12604/**
12605 * xmlCtxtReadMemory:
12606 * @ctxt: an XML parser context
12607 * @buffer: a pointer to a char array
12608 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012609 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012610 * @encoding: the document encoding, or NULL
12611 * @options: a combination of xmlParserOption(s)
12612 *
12613 * parse an XML in-memory document and build a tree.
12614 * This reuses the existing @ctxt parser context
12615 *
12616 * Returns the resulting document tree
12617 */
12618xmlDocPtr
12619xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012620 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012621{
12622 xmlParserInputBufferPtr input;
12623 xmlParserInputPtr stream;
12624
12625 if (ctxt == NULL)
12626 return (NULL);
12627 if (buffer == NULL)
12628 return (NULL);
12629
12630 xmlCtxtReset(ctxt);
12631
12632 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12633 if (input == NULL) {
12634 return(NULL);
12635 }
12636
12637 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12638 if (stream == NULL) {
12639 xmlFreeParserInputBuffer(input);
12640 return(NULL);
12641 }
12642
12643 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012644 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012645}
12646
12647/**
12648 * xmlCtxtReadFd:
12649 * @ctxt: an XML parser context
12650 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012651 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012652 * @encoding: the document encoding, or NULL
12653 * @options: a combination of xmlParserOption(s)
12654 *
12655 * parse an XML from a file descriptor and build a tree.
12656 * This reuses the existing @ctxt parser context
12657 *
12658 * Returns the resulting document tree
12659 */
12660xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012661xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12662 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012663{
12664 xmlParserInputBufferPtr input;
12665 xmlParserInputPtr stream;
12666
12667 if (fd < 0)
12668 return (NULL);
12669 if (ctxt == NULL)
12670 return (NULL);
12671
12672 xmlCtxtReset(ctxt);
12673
12674
12675 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12676 if (input == NULL)
12677 return (NULL);
12678 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12679 if (stream == NULL) {
12680 xmlFreeParserInputBuffer(input);
12681 return (NULL);
12682 }
12683 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012684 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012685}
12686
12687/**
12688 * xmlCtxtReadIO:
12689 * @ctxt: an XML parser context
12690 * @ioread: an I/O read function
12691 * @ioclose: an I/O close function
12692 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012693 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012694 * @encoding: the document encoding, or NULL
12695 * @options: a combination of xmlParserOption(s)
12696 *
12697 * parse an XML document from I/O functions and source and build a tree.
12698 * This reuses the existing @ctxt parser context
12699 *
12700 * Returns the resulting document tree
12701 */
12702xmlDocPtr
12703xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12704 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012705 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012706 const char *encoding, int options)
12707{
12708 xmlParserInputBufferPtr input;
12709 xmlParserInputPtr stream;
12710
12711 if (ioread == NULL)
12712 return (NULL);
12713 if (ctxt == NULL)
12714 return (NULL);
12715
12716 xmlCtxtReset(ctxt);
12717
12718 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12719 XML_CHAR_ENCODING_NONE);
12720 if (input == NULL)
12721 return (NULL);
12722 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12723 if (stream == NULL) {
12724 xmlFreeParserInputBuffer(input);
12725 return (NULL);
12726 }
12727 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012728 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012729}