blob: e5ce1dac94cd8677edddda2e383f9132c4f665e4 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
81 * MAX_DEPTH:
82 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
87#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
105/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000106xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
107 const xmlChar **str);
108
Daniel Veillard7d515752003-09-26 19:12:37 +0000109static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
111 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000112 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000113 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000114
Daniel Veillard81273902003-09-30 00:43:48 +0000115#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000116static void
117xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
118 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000119#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000120
Daniel Veillard7d515752003-09-26 19:12:37 +0000121static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000122xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
123 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000124
125/************************************************************************
126 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000127 * Some factorized error routines *
128 * *
129 ************************************************************************/
130
131/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000132 * xmlErrAttributeDup:
133 * @ctxt: an XML parser context
134 * @prefix: the attribute prefix
135 * @localname: the attribute localname
136 *
137 * Handle a redefinition of attribute error
138 */
139static void
140xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
141 const xmlChar * localname)
142{
143 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000144 if (prefix == NULL)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000145 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
146 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
147 (const char *) localname, NULL, NULL, 0, 0,
148 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000149 else
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000150 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
151 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
152 (const char *) prefix, (const char *) localname,
153 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
154 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000155 ctxt->wellFormed = 0;
156 if (ctxt->recovery == 0)
157 ctxt->disableSAX = 1;
158}
159
160/**
161 * xmlFatalErr:
162 * @ctxt: an XML parser context
163 * @error: the error number
164 * @extra: extra information string
165 *
166 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
167 */
168static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000169xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000170{
171 const char *errmsg;
172
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000173 switch (error) {
174 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000175 errmsg = "CharRef: invalid hexadecimal value\n";
176 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000178 errmsg = "CharRef: invalid decimal value\n";
179 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000181 errmsg = "CharRef: invalid value\n";
182 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000184 errmsg = "internal error";
185 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000187 errmsg = "PEReference at end of document\n";
188 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000189 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000190 errmsg = "PEReference in prolog\n";
191 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000192 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000193 errmsg = "PEReference in epilog\n";
194 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000195 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000196 errmsg = "PEReference: no name\n";
197 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000198 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000199 errmsg = "PEReference: expecting ';'\n";
200 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000201 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000202 errmsg = "Detected an entity reference loop\n";
203 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000204 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000205 errmsg = "EntityValue: \" or ' expected\n";
206 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000207 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000208 errmsg = "PEReferences forbidden in internal subset\n";
209 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000210 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000211 errmsg = "EntityValue: \" or ' expected\n";
212 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000213 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000214 errmsg = "AttValue: \" or ' expected\n";
215 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000216 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000217 errmsg = "Unescaped '<' not allowed in attributes values\n";
218 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000219 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000220 errmsg = "SystemLiteral \" or ' expected\n";
221 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000222 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000223 errmsg = "Unfinished System or Public ID \" or ' expected\n";
224 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000225 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000226 errmsg = "Sequence ']]>' not allowed in content\n";
227 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000228 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000229 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
230 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000231 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000232 errmsg = "PUBLIC, the Public Identifier is missing\n";
233 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000234 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000235 errmsg = "Comment must not contain '--' (double-hyphen)\n";
236 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000238 errmsg = "xmlParsePI : no target name\n";
239 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000240 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000241 errmsg = "Invalid PI name\n";
242 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000243 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000244 errmsg = "NOTATION: Name expected here\n";
245 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000246 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000247 errmsg = "'>' required to close NOTATION declaration\n";
248 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000249 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000250 errmsg = "Entity value required\n";
251 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000252 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000253 errmsg = "Fragment not allowed";
254 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000255 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000256 errmsg = "'(' required to start ATTLIST enumeration\n";
257 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000258 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000259 errmsg = "NmToken expected in ATTLIST enumeration\n";
260 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000261 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 errmsg = "')' required to finish ATTLIST enumeration\n";
263 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000264 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000265 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
266 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000267 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000268 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
269 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000270 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000271 errmsg = "ContentDecl : Name or '(' expected\n";
272 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000273 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000274 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
275 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000276 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000277 errmsg =
278 "PEReference: forbidden within markup decl in internal subset\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "expected '>'\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "XML conditional section '[' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "Content error in the external subset\n";
288 break;
289 case XML_ERR_CONDSEC_INVALID_KEYWORD:
290 errmsg =
291 "conditional section INCLUDE or IGNORE keyword expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section not closed\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Text declaration '<?xml' required\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "parsing XML declaration: '?>' expected\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "external parsed entities cannot be standalone\n";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "EntityRef: expecting ';'\n";
307 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000309 errmsg = "DOCTYPE improperly terminated\n";
310 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000311 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000312 errmsg = "EndTag: '</' not found\n";
313 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000314 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000315 errmsg = "expected '='\n";
316 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000318 errmsg = "String not closed expecting \" or '\n";
319 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000321 errmsg = "String not started expecting ' or \"\n";
322 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000323 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000324 errmsg = "Invalid XML encoding name\n";
325 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000326 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 errmsg = "standalone accepts only 'yes' or 'no'\n";
328 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000329 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 errmsg = "Document is empty\n";
331 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000332 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 errmsg = "Extra content at the end of the document\n";
334 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000335 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000336 errmsg = "chunk is not well balanced\n";
337 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000338 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 errmsg = "extra content at the end of well balanced chunk\n";
340 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000341 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 errmsg = "Malformed declaration expecting version\n";
343 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000344#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 case:
346 errmsg = "\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 default:
350 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 }
352 ctxt->errNo = error;
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000353 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
354 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
355 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 ctxt->wellFormed = 0;
357 if (ctxt->recovery == 0)
358 ctxt->disableSAX = 1;
359}
360
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000361/**
362 * xmlFatalErrMsg:
363 * @ctxt: an XML parser context
364 * @error: the error number
365 * @msg: the error message
366 *
367 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
368 */
369static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
371 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000372{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000373 ctxt->errNo = error;
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
375 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000376 ctxt->wellFormed = 0;
377 if (ctxt->recovery == 0)
378 ctxt->disableSAX = 1;
379}
380
381/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000382 * xmlWarningMsg:
383 * @ctxt: an XML parser context
384 * @error: the error number
385 * @msg: the error message
386 * @str1: extra data
387 * @str2: extra data
388 *
389 * Handle a warning.
390 */
391static void
392xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
393 const char *msg, const xmlChar *str1, const xmlChar *str2)
394{
395 ctxt->errNo = error;
396 __xmlRaiseError((ctxt->sax) ? ctxt->sax->warning : NULL, ctxt->userData,
397 ctxt, NULL, XML_FROM_PARSER, error,
398 XML_ERR_WARNING, NULL, 0,
399 (const char *) str1, (const char *) str2, NULL, 0, 0,
400 msg, (const char *) str1, (const char *) str2);
401}
402
403/**
404 * xmlValidityError:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 *
410 * Handle a warning.
411 */
412static void
413xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
414 const char *msg, const xmlChar *str1)
415{
416 ctxt->errNo = error;
417 __xmlRaiseError(ctxt->vctxt.error, ctxt->vctxt.userData,
418 ctxt, NULL, XML_FROM_DTD, error,
419 XML_ERR_ERROR, NULL, 0, (const char *) str1,
420 NULL, NULL, 0, 0,
421 msg, (const char *) str1);
422 ctxt->valid = 0;
423}
424
425/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000426 * xmlFatalErrMsgInt:
427 * @ctxt: an XML parser context
428 * @error: the error number
429 * @msg: the error message
430 * @val: an integer value
431 *
432 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
433 */
434static void
435xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000437{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000438 ctxt->errNo = error;
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 __xmlRaiseError(NULL, NULL,
440 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
441 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000442 ctxt->wellFormed = 0;
443 if (ctxt->recovery == 0)
444 ctxt->disableSAX = 1;
445}
446
447/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000448 * xmlFatalErrMsgStrIntStr:
449 * @ctxt: an XML parser context
450 * @error: the error number
451 * @msg: the error message
452 * @str1: an string info
453 * @val: an integer value
454 * @str2: an string info
455 *
456 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
457 */
458static void
459xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
460 const char *msg, const xmlChar *str1, int val,
461 const xmlChar *str2)
462{
463 ctxt->errNo = error;
464 __xmlRaiseError(NULL, NULL,
465 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
466 NULL, 0, (const char *) str1, (const char *) str2,
467 NULL, val, 0, msg, str1, val, str2);
468 ctxt->wellFormed = 0;
469 if (ctxt->recovery == 0)
470 ctxt->disableSAX = 1;
471}
472
473/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000474 * xmlFatalErrMsgStr:
475 * @ctxt: an XML parser context
476 * @error: the error number
477 * @msg: the error message
478 * @val: a string value
479 *
480 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
481 */
482static void
483xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000484 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000485{
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000486 ctxt->errNo = error;
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 __xmlRaiseError(NULL, NULL, ctxt, NULL,
488 XML_FROM_PARSER, error, XML_ERR_FATAL,
489 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
490 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000491 ctxt->wellFormed = 0;
492 if (ctxt->recovery == 0)
493 ctxt->disableSAX = 1;
494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlErrMsgStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @val: a string value
502 *
503 * Handle a non fatal parser error
504 */
505static void
506xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
507 const char *msg, const xmlChar * val)
508{
509 ctxt->errNo = error;
510 __xmlRaiseError(NULL, NULL, ctxt, NULL,
511 XML_FROM_PARSER, error, XML_ERR_ERROR,
512 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
513 val);
514}
515
516/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000517 * xmlNsErr:
518 * @ctxt: an XML parser context
519 * @error: the error number
520 * @msg: the message
521 * @info1: extra information string
522 * @info2: extra information string
523 *
524 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
525 */
526static void
527xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
528 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000529 const xmlChar * info1, const xmlChar * info2,
530 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000531{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000532 ctxt->errNo = error;
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000533 __xmlRaiseError(NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
534 XML_ERR_ERROR, NULL, 0, (const char *) info1,
535 (const char *) info2, (const char *) info3, 0, 0, msg,
536 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000537 ctxt->nsWellFormed = 0;
538}
539
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000540/************************************************************************
541 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000542 * SAX2 defaulted attributes handling *
543 * *
544 ************************************************************************/
545
546/**
547 * xmlDetectSAX2:
548 * @ctxt: an XML parser context
549 *
550 * Do the SAX2 detection and specific intialization
551 */
552static void
553xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
554 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000555#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000556 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
557 ((ctxt->sax->startElementNs != NULL) ||
558 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000559#else
560 ctxt->sax2 = 1;
561#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000562
563 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
564 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
565 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
566}
567
Daniel Veillarde57ec792003-09-10 10:50:59 +0000568typedef struct _xmlDefAttrs xmlDefAttrs;
569typedef xmlDefAttrs *xmlDefAttrsPtr;
570struct _xmlDefAttrs {
571 int nbAttrs; /* number of defaulted attributes on that element */
572 int maxAttrs; /* the size of the array */
573 const xmlChar *values[4]; /* array of localname/prefix/values */
574};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000575
576/**
577 * xmlAddDefAttrs:
578 * @ctxt: an XML parser context
579 * @fullname: the element fullname
580 * @fullattr: the attribute fullname
581 * @value: the attribute value
582 *
583 * Add a defaulted attribute for an element
584 */
585static void
586xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
587 const xmlChar *fullname,
588 const xmlChar *fullattr,
589 const xmlChar *value) {
590 xmlDefAttrsPtr defaults;
591 int len;
592 const xmlChar *name;
593 const xmlChar *prefix;
594
595 if (ctxt->attsDefault == NULL) {
596 ctxt->attsDefault = xmlHashCreate(10);
597 if (ctxt->attsDefault == NULL)
598 goto mem_error;
599 }
600
601 /*
602 * plit the element name into prefix:localname , the string found
603 * are within the DTD and hen not associated to namespace names.
604 */
605 name = xmlSplitQName3(fullname, &len);
606 if (name == NULL) {
607 name = xmlDictLookup(ctxt->dict, fullname, -1);
608 prefix = NULL;
609 } else {
610 name = xmlDictLookup(ctxt->dict, name, -1);
611 prefix = xmlDictLookup(ctxt->dict, fullname, len);
612 }
613
614 /*
615 * make sure there is some storage
616 */
617 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
618 if (defaults == NULL) {
619 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
620 12 * sizeof(const xmlChar *));
621 if (defaults == NULL)
622 goto mem_error;
623 defaults->maxAttrs = 4;
624 defaults->nbAttrs = 0;
625 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
626 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
627 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
628 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
629 if (defaults == NULL)
630 goto mem_error;
631 defaults->maxAttrs *= 2;
632 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
633 }
634
635 /*
636 * plit the element name into prefix:localname , the string found
637 * are within the DTD and hen not associated to namespace names.
638 */
639 name = xmlSplitQName3(fullattr, &len);
640 if (name == NULL) {
641 name = xmlDictLookup(ctxt->dict, fullattr, -1);
642 prefix = NULL;
643 } else {
644 name = xmlDictLookup(ctxt->dict, name, -1);
645 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
646 }
647
648 defaults->values[4 * defaults->nbAttrs] = name;
649 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
650 /* intern the string and precompute the end */
651 len = xmlStrlen(value);
652 value = xmlDictLookup(ctxt->dict, value, len);
653 defaults->values[4 * defaults->nbAttrs + 2] = value;
654 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
655 defaults->nbAttrs++;
656
657 return;
658
659mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000660 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000661 return;
662}
663
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000664/**
665 * xmlAddSpecialAttr:
666 * @ctxt: an XML parser context
667 * @fullname: the element fullname
668 * @fullattr: the attribute fullname
669 * @type: the attribute type
670 *
671 * Register that this attribute is not CDATA
672 */
673static void
674xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
675 const xmlChar *fullname,
676 const xmlChar *fullattr,
677 int type)
678{
679 if (ctxt->attsSpecial == NULL) {
680 ctxt->attsSpecial = xmlHashCreate(10);
681 if (ctxt->attsSpecial == NULL)
682 goto mem_error;
683 }
684
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000685 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
686 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000687 return;
688
689mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000690 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000691 return;
692}
693
Daniel Veillard4432df22003-09-28 18:58:27 +0000694/**
695 * xmlCheckLanguageID:
696 * @lang: pointer to the string value
697 *
698 * Checks that the value conforms to the LanguageID production:
699 *
700 * NOTE: this is somewhat deprecated, those productions were removed from
701 * the XML Second edition.
702 *
703 * [33] LanguageID ::= Langcode ('-' Subcode)*
704 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
705 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
706 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
707 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
708 * [38] Subcode ::= ([a-z] | [A-Z])+
709 *
710 * Returns 1 if correct 0 otherwise
711 **/
712int
713xmlCheckLanguageID(const xmlChar * lang)
714{
715 const xmlChar *cur = lang;
716
717 if (cur == NULL)
718 return (0);
719 if (((cur[0] == 'i') && (cur[1] == '-')) ||
720 ((cur[0] == 'I') && (cur[1] == '-'))) {
721 /*
722 * IANA code
723 */
724 cur += 2;
725 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
726 ((cur[0] >= 'a') && (cur[0] <= 'z')))
727 cur++;
728 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
729 ((cur[0] == 'X') && (cur[1] == '-'))) {
730 /*
731 * User code
732 */
733 cur += 2;
734 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
735 ((cur[0] >= 'a') && (cur[0] <= 'z')))
736 cur++;
737 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
738 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
739 /*
740 * ISO639
741 */
742 cur++;
743 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
744 ((cur[0] >= 'a') && (cur[0] <= 'z')))
745 cur++;
746 else
747 return (0);
748 } else
749 return (0);
750 while (cur[0] != 0) { /* non input consuming */
751 if (cur[0] != '-')
752 return (0);
753 cur++;
754 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
755 ((cur[0] >= 'a') && (cur[0] <= 'z')))
756 cur++;
757 else
758 return (0);
759 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
760 ((cur[0] >= 'a') && (cur[0] <= 'z')))
761 cur++;
762 }
763 return (1);
764}
765
Owen Taylor3473f882001-02-23 17:55:21 +0000766/************************************************************************
767 * *
768 * Parser stacks related functions and macros *
769 * *
770 ************************************************************************/
771
772xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
773 const xmlChar ** str);
774
Daniel Veillard0fb18932003-09-07 09:14:37 +0000775#ifdef SAX2
776/**
777 * nsPush:
778 * @ctxt: an XML parser context
779 * @prefix: the namespace prefix or NULL
780 * @URL: the namespace name
781 *
782 * Pushes a new parser namespace on top of the ns stack
783 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000784 * Returns -1 in case of error, -2 if the namespace should be discarded
785 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000786 */
787static int
788nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
789{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000790 if (ctxt->options & XML_PARSE_NSCLEAN) {
791 int i;
792 for (i = 0;i < ctxt->nsNr;i += 2) {
793 if (ctxt->nsTab[i] == prefix) {
794 /* in scope */
795 if (ctxt->nsTab[i + 1] == URL)
796 return(-2);
797 /* out of scope keep it */
798 break;
799 }
800 }
801 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000802 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
803 ctxt->nsMax = 10;
804 ctxt->nsNr = 0;
805 ctxt->nsTab = (const xmlChar **)
806 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
807 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000808 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000809 ctxt->nsMax = 0;
810 return (-1);
811 }
812 } else if (ctxt->nsNr >= ctxt->nsMax) {
813 ctxt->nsMax *= 2;
814 ctxt->nsTab = (const xmlChar **)
815 xmlRealloc(ctxt->nsTab,
816 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
817 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000818 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000819 ctxt->nsMax /= 2;
820 return (-1);
821 }
822 }
823 ctxt->nsTab[ctxt->nsNr++] = prefix;
824 ctxt->nsTab[ctxt->nsNr++] = URL;
825 return (ctxt->nsNr);
826}
827/**
828 * nsPop:
829 * @ctxt: an XML parser context
830 * @nr: the number to pop
831 *
832 * Pops the top @nr parser prefix/namespace from the ns stack
833 *
834 * Returns the number of namespaces removed
835 */
836static int
837nsPop(xmlParserCtxtPtr ctxt, int nr)
838{
839 int i;
840
841 if (ctxt->nsTab == NULL) return(0);
842 if (ctxt->nsNr < nr) {
843 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
844 nr = ctxt->nsNr;
845 }
846 if (ctxt->nsNr <= 0)
847 return (0);
848
849 for (i = 0;i < nr;i++) {
850 ctxt->nsNr--;
851 ctxt->nsTab[ctxt->nsNr] = NULL;
852 }
853 return(nr);
854}
855#endif
856
857static int
858xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
859 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000860 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 int maxatts;
862
863 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000864 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000865 atts = (const xmlChar **)
866 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000867 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000868 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000869 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
870 if (attallocs == NULL) goto mem_error;
871 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000872 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000873 } else if (nr + 5 > ctxt->maxatts) {
874 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000875 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
876 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000878 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000879 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
880 (maxatts / 5) * sizeof(int));
881 if (attallocs == NULL) goto mem_error;
882 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000883 ctxt->maxatts = maxatts;
884 }
885 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000886mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000887 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000889}
890
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000891/**
892 * inputPush:
893 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000894 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000895 *
896 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000897 *
898 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000899 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000900extern int
901inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
902{
903 if (ctxt->inputNr >= ctxt->inputMax) {
904 ctxt->inputMax *= 2;
905 ctxt->inputTab =
906 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
907 ctxt->inputMax *
908 sizeof(ctxt->inputTab[0]));
909 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000910 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000911 return (0);
912 }
913 }
914 ctxt->inputTab[ctxt->inputNr] = value;
915 ctxt->input = value;
916 return (ctxt->inputNr++);
917}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000918/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000919 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000920 * @ctxt: an XML parser context
921 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000922 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000923 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000924 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000925 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000926extern xmlParserInputPtr
927inputPop(xmlParserCtxtPtr ctxt)
928{
929 xmlParserInputPtr ret;
930
931 if (ctxt->inputNr <= 0)
932 return (0);
933 ctxt->inputNr--;
934 if (ctxt->inputNr > 0)
935 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
936 else
937 ctxt->input = NULL;
938 ret = ctxt->inputTab[ctxt->inputNr];
939 ctxt->inputTab[ctxt->inputNr] = 0;
940 return (ret);
941}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000942/**
943 * nodePush:
944 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000945 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000946 *
947 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000948 *
949 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000950 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000951extern int
952nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
953{
954 if (ctxt->nodeNr >= ctxt->nodeMax) {
955 ctxt->nodeMax *= 2;
956 ctxt->nodeTab =
957 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
958 ctxt->nodeMax *
959 sizeof(ctxt->nodeTab[0]));
960 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000961 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000962 return (0);
963 }
964 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000965#ifdef MAX_DEPTH
966 if (ctxt->nodeNr > MAX_DEPTH) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000967 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000968 "Excessive depth in document: change MAX_DEPTH = %d\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000969 MAX_DEPTH);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000970 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000971 return(0);
972 }
973#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000974 ctxt->nodeTab[ctxt->nodeNr] = value;
975 ctxt->node = value;
976 return (ctxt->nodeNr++);
977}
978/**
979 * nodePop:
980 * @ctxt: an XML parser context
981 *
982 * Pops the top element node from the node stack
983 *
984 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000985 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000986extern xmlNodePtr
987nodePop(xmlParserCtxtPtr ctxt)
988{
989 xmlNodePtr ret;
990
991 if (ctxt->nodeNr <= 0)
992 return (0);
993 ctxt->nodeNr--;
994 if (ctxt->nodeNr > 0)
995 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
996 else
997 ctxt->node = NULL;
998 ret = ctxt->nodeTab[ctxt->nodeNr];
999 ctxt->nodeTab[ctxt->nodeNr] = 0;
1000 return (ret);
1001}
1002/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001003 * nameNsPush:
1004 * @ctxt: an XML parser context
1005 * @value: the element name
1006 * @prefix: the element prefix
1007 * @URI: the element namespace name
1008 *
1009 * Pushes a new element name/prefix/URL on top of the name stack
1010 *
1011 * Returns -1 in case of error, the index in the stack otherwise
1012 */
1013static int
1014nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1015 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1016{
1017 if (ctxt->nameNr >= ctxt->nameMax) {
1018 const xmlChar * *tmp;
1019 void **tmp2;
1020 ctxt->nameMax *= 2;
1021 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1022 ctxt->nameMax *
1023 sizeof(ctxt->nameTab[0]));
1024 if (tmp == NULL) {
1025 ctxt->nameMax /= 2;
1026 goto mem_error;
1027 }
1028 ctxt->nameTab = tmp;
1029 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1030 ctxt->nameMax * 3 *
1031 sizeof(ctxt->pushTab[0]));
1032 if (tmp2 == NULL) {
1033 ctxt->nameMax /= 2;
1034 goto mem_error;
1035 }
1036 ctxt->pushTab = tmp2;
1037 }
1038 ctxt->nameTab[ctxt->nameNr] = value;
1039 ctxt->name = value;
1040 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1041 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001042 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001043 return (ctxt->nameNr++);
1044mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001045 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001046 return (-1);
1047}
1048/**
1049 * nameNsPop:
1050 * @ctxt: an XML parser context
1051 *
1052 * Pops the top element/prefix/URI name from the name stack
1053 *
1054 * Returns the name just removed
1055 */
1056static const xmlChar *
1057nameNsPop(xmlParserCtxtPtr ctxt)
1058{
1059 const xmlChar *ret;
1060
1061 if (ctxt->nameNr <= 0)
1062 return (0);
1063 ctxt->nameNr--;
1064 if (ctxt->nameNr > 0)
1065 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1066 else
1067 ctxt->name = NULL;
1068 ret = ctxt->nameTab[ctxt->nameNr];
1069 ctxt->nameTab[ctxt->nameNr] = NULL;
1070 return (ret);
1071}
1072
1073/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001074 * namePush:
1075 * @ctxt: an XML parser context
1076 * @value: the element name
1077 *
1078 * Pushes a new element name on top of the name stack
1079 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001080 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001081 */
1082extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001083namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001084{
1085 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001087 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001088 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001089 ctxt->nameMax *
1090 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001091 if (tmp == NULL) {
1092 ctxt->nameMax /= 2;
1093 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001094 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001095 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001096 }
1097 ctxt->nameTab[ctxt->nameNr] = value;
1098 ctxt->name = value;
1099 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001100mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001101 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001103}
1104/**
1105 * namePop:
1106 * @ctxt: an XML parser context
1107 *
1108 * Pops the top element name from the name stack
1109 *
1110 * Returns the name just removed
1111 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001112extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001113namePop(xmlParserCtxtPtr ctxt)
1114{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001115 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001116
1117 if (ctxt->nameNr <= 0)
1118 return (0);
1119 ctxt->nameNr--;
1120 if (ctxt->nameNr > 0)
1121 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1122 else
1123 ctxt->name = NULL;
1124 ret = ctxt->nameTab[ctxt->nameNr];
1125 ctxt->nameTab[ctxt->nameNr] = 0;
1126 return (ret);
1127}
Owen Taylor3473f882001-02-23 17:55:21 +00001128
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001129static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001130 if (ctxt->spaceNr >= ctxt->spaceMax) {
1131 ctxt->spaceMax *= 2;
1132 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1133 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1134 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001135 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001136 return(0);
1137 }
1138 }
1139 ctxt->spaceTab[ctxt->spaceNr] = val;
1140 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1141 return(ctxt->spaceNr++);
1142}
1143
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001144static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001145 int ret;
1146 if (ctxt->spaceNr <= 0) return(0);
1147 ctxt->spaceNr--;
1148 if (ctxt->spaceNr > 0)
1149 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1150 else
1151 ctxt->space = NULL;
1152 ret = ctxt->spaceTab[ctxt->spaceNr];
1153 ctxt->spaceTab[ctxt->spaceNr] = -1;
1154 return(ret);
1155}
1156
1157/*
1158 * Macros for accessing the content. Those should be used only by the parser,
1159 * and not exported.
1160 *
1161 * Dirty macros, i.e. one often need to make assumption on the context to
1162 * use them
1163 *
1164 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1165 * To be used with extreme caution since operations consuming
1166 * characters may move the input buffer to a different location !
1167 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1168 * This should be used internally by the parser
1169 * only to compare to ASCII values otherwise it would break when
1170 * running with UTF-8 encoding.
1171 * RAW same as CUR but in the input buffer, bypass any token
1172 * extraction that may have been done
1173 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1174 * to compare on ASCII based substring.
1175 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001176 * strings without newlines within the parser.
1177 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1178 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001179 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1180 *
1181 * NEXT Skip to the next character, this does the proper decoding
1182 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001183 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001184 * CUR_CHAR(l) returns the current unicode character (int), set l
1185 * to the number of xmlChars used for the encoding [0-5].
1186 * CUR_SCHAR same but operate on a string instead of the context
1187 * COPY_BUF copy the current unicode char to the target buffer, increment
1188 * the index
1189 * GROW, SHRINK handling of input buffers
1190 */
1191
Daniel Veillardfdc91562002-07-01 21:52:03 +00001192#define RAW (*ctxt->input->cur)
1193#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001194#define NXT(val) ctxt->input->cur[(val)]
1195#define CUR_PTR ctxt->input->cur
1196
1197#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001198 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001199 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001200 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001201 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1202 xmlPopInput(ctxt); \
1203 } while (0)
1204
Daniel Veillarda880b122003-04-21 21:36:41 +00001205#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001206 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1207 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001208 xmlSHRINK (ctxt);
1209
1210static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1211 xmlParserInputShrink(ctxt->input);
1212 if ((*ctxt->input->cur == 0) &&
1213 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1214 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001215 }
Owen Taylor3473f882001-02-23 17:55:21 +00001216
Daniel Veillarda880b122003-04-21 21:36:41 +00001217#define GROW if ((ctxt->progressive == 0) && \
1218 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001219 xmlGROW (ctxt);
1220
1221static void xmlGROW (xmlParserCtxtPtr ctxt) {
1222 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1223 if ((*ctxt->input->cur == 0) &&
1224 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1225 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001226}
Owen Taylor3473f882001-02-23 17:55:21 +00001227
1228#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1229
1230#define NEXT xmlNextChar(ctxt)
1231
Daniel Veillard21a0f912001-02-25 19:54:14 +00001232#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001233 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001234 ctxt->input->cur++; \
1235 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001236 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001237 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1238 }
1239
Owen Taylor3473f882001-02-23 17:55:21 +00001240#define NEXTL(l) do { \
1241 if (*(ctxt->input->cur) == '\n') { \
1242 ctxt->input->line++; ctxt->input->col = 1; \
1243 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001244 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001245 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001246 } while (0)
1247
1248#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1249#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1250
1251#define COPY_BUF(l,b,i,v) \
1252 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001253 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001254
1255/**
1256 * xmlSkipBlankChars:
1257 * @ctxt: the XML parser context
1258 *
1259 * skip all blanks character found at that point in the input streams.
1260 * It pops up finished entities in the process if allowable at that point.
1261 *
1262 * Returns the number of space chars skipped
1263 */
1264
1265int
1266xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001267 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001268
1269 /*
1270 * It's Okay to use CUR/NEXT here since all the blanks are on
1271 * the ASCII range.
1272 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001273 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1274 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001275 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001276 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001277 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001278 cur = ctxt->input->cur;
1279 while (IS_BLANK(*cur)) {
1280 if (*cur == '\n') {
1281 ctxt->input->line++; ctxt->input->col = 1;
1282 }
1283 cur++;
1284 res++;
1285 if (*cur == 0) {
1286 ctxt->input->cur = cur;
1287 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1288 cur = ctxt->input->cur;
1289 }
1290 }
1291 ctxt->input->cur = cur;
1292 } else {
1293 int cur;
1294 do {
1295 cur = CUR;
1296 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1297 NEXT;
1298 cur = CUR;
1299 res++;
1300 }
1301 while ((cur == 0) && (ctxt->inputNr > 1) &&
1302 (ctxt->instate != XML_PARSER_COMMENT)) {
1303 xmlPopInput(ctxt);
1304 cur = CUR;
1305 }
1306 /*
1307 * Need to handle support of entities branching here
1308 */
1309 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1310 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1311 }
Owen Taylor3473f882001-02-23 17:55:21 +00001312 return(res);
1313}
1314
1315/************************************************************************
1316 * *
1317 * Commodity functions to handle entities *
1318 * *
1319 ************************************************************************/
1320
1321/**
1322 * xmlPopInput:
1323 * @ctxt: an XML parser context
1324 *
1325 * xmlPopInput: the current input pointed by ctxt->input came to an end
1326 * pop it and return the next char.
1327 *
1328 * Returns the current xmlChar in the parser context
1329 */
1330xmlChar
1331xmlPopInput(xmlParserCtxtPtr ctxt) {
1332 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1333 if (xmlParserDebugEntities)
1334 xmlGenericError(xmlGenericErrorContext,
1335 "Popping input %d\n", ctxt->inputNr);
1336 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001337 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001338 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1339 return(xmlPopInput(ctxt));
1340 return(CUR);
1341}
1342
1343/**
1344 * xmlPushInput:
1345 * @ctxt: an XML parser context
1346 * @input: an XML parser input fragment (entity, XML fragment ...).
1347 *
1348 * xmlPushInput: switch to a new input stream which is stacked on top
1349 * of the previous one(s).
1350 */
1351void
1352xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1353 if (input == NULL) return;
1354
1355 if (xmlParserDebugEntities) {
1356 if ((ctxt->input != NULL) && (ctxt->input->filename))
1357 xmlGenericError(xmlGenericErrorContext,
1358 "%s(%d): ", ctxt->input->filename,
1359 ctxt->input->line);
1360 xmlGenericError(xmlGenericErrorContext,
1361 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1362 }
1363 inputPush(ctxt, input);
1364 GROW;
1365}
1366
1367/**
1368 * xmlParseCharRef:
1369 * @ctxt: an XML parser context
1370 *
1371 * parse Reference declarations
1372 *
1373 * [66] CharRef ::= '&#' [0-9]+ ';' |
1374 * '&#x' [0-9a-fA-F]+ ';'
1375 *
1376 * [ WFC: Legal Character ]
1377 * Characters referred to using character references must match the
1378 * production for Char.
1379 *
1380 * Returns the value parsed (as an int), 0 in case of error
1381 */
1382int
1383xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001384 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001385 int count = 0;
1386
Owen Taylor3473f882001-02-23 17:55:21 +00001387 /*
1388 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1389 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001390 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001391 (NXT(2) == 'x')) {
1392 SKIP(3);
1393 GROW;
1394 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001395 if (count++ > 20) {
1396 count = 0;
1397 GROW;
1398 }
1399 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001400 val = val * 16 + (CUR - '0');
1401 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1402 val = val * 16 + (CUR - 'a') + 10;
1403 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1404 val = val * 16 + (CUR - 'A') + 10;
1405 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001406 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001407 val = 0;
1408 break;
1409 }
1410 NEXT;
1411 count++;
1412 }
1413 if (RAW == ';') {
1414 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001415 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001416 ctxt->nbChars ++;
1417 ctxt->input->cur++;
1418 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001419 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001420 SKIP(2);
1421 GROW;
1422 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001423 if (count++ > 20) {
1424 count = 0;
1425 GROW;
1426 }
1427 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001428 val = val * 10 + (CUR - '0');
1429 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001430 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001431 val = 0;
1432 break;
1433 }
1434 NEXT;
1435 count++;
1436 }
1437 if (RAW == ';') {
1438 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001439 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001440 ctxt->nbChars ++;
1441 ctxt->input->cur++;
1442 }
1443 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001444 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001445 }
1446
1447 /*
1448 * [ WFC: Legal Character ]
1449 * Characters referred to using character references must match the
1450 * production for Char.
1451 */
Daniel Veillard73b013f2003-09-30 12:36:01 +00001452 if (xmlIsChar(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001453 return(val);
1454 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001455 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1456 "xmlParseCharRef: invalid xmlChar value %d\n",
1457 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001458 }
1459 return(0);
1460}
1461
1462/**
1463 * xmlParseStringCharRef:
1464 * @ctxt: an XML parser context
1465 * @str: a pointer to an index in the string
1466 *
1467 * parse Reference declarations, variant parsing from a string rather
1468 * than an an input flow.
1469 *
1470 * [66] CharRef ::= '&#' [0-9]+ ';' |
1471 * '&#x' [0-9a-fA-F]+ ';'
1472 *
1473 * [ WFC: Legal Character ]
1474 * Characters referred to using character references must match the
1475 * production for Char.
1476 *
1477 * Returns the value parsed (as an int), 0 in case of error, str will be
1478 * updated to the current value of the index
1479 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001480static int
Owen Taylor3473f882001-02-23 17:55:21 +00001481xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1482 const xmlChar *ptr;
1483 xmlChar cur;
1484 int val = 0;
1485
1486 if ((str == NULL) || (*str == NULL)) return(0);
1487 ptr = *str;
1488 cur = *ptr;
1489 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1490 ptr += 3;
1491 cur = *ptr;
1492 while (cur != ';') { /* Non input consuming loop */
1493 if ((cur >= '0') && (cur <= '9'))
1494 val = val * 16 + (cur - '0');
1495 else if ((cur >= 'a') && (cur <= 'f'))
1496 val = val * 16 + (cur - 'a') + 10;
1497 else if ((cur >= 'A') && (cur <= 'F'))
1498 val = val * 16 + (cur - 'A') + 10;
1499 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001500 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001501 val = 0;
1502 break;
1503 }
1504 ptr++;
1505 cur = *ptr;
1506 }
1507 if (cur == ';')
1508 ptr++;
1509 } else if ((cur == '&') && (ptr[1] == '#')){
1510 ptr += 2;
1511 cur = *ptr;
1512 while (cur != ';') { /* Non input consuming loops */
1513 if ((cur >= '0') && (cur <= '9'))
1514 val = val * 10 + (cur - '0');
1515 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001516 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001517 val = 0;
1518 break;
1519 }
1520 ptr++;
1521 cur = *ptr;
1522 }
1523 if (cur == ';')
1524 ptr++;
1525 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001526 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001527 return(0);
1528 }
1529 *str = ptr;
1530
1531 /*
1532 * [ WFC: Legal Character ]
1533 * Characters referred to using character references must match the
1534 * production for Char.
1535 */
Daniel Veillard73b013f2003-09-30 12:36:01 +00001536 if (xmlIsChar(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001537 return(val);
1538 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001539 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1540 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1541 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001542 }
1543 return(0);
1544}
1545
1546/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001547 * xmlNewBlanksWrapperInputStream:
1548 * @ctxt: an XML parser context
1549 * @entity: an Entity pointer
1550 *
1551 * Create a new input stream for wrapping
1552 * blanks around a PEReference
1553 *
1554 * Returns the new input stream or NULL
1555 */
1556
1557static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1558
Daniel Veillardf4862f02002-09-10 11:13:43 +00001559static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001560xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1561 xmlParserInputPtr input;
1562 xmlChar *buffer;
1563 size_t length;
1564 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001565 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1566 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001567 return(NULL);
1568 }
1569 if (xmlParserDebugEntities)
1570 xmlGenericError(xmlGenericErrorContext,
1571 "new blanks wrapper for entity: %s\n", entity->name);
1572 input = xmlNewInputStream(ctxt);
1573 if (input == NULL) {
1574 return(NULL);
1575 }
1576 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001577 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001578 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001579 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001580 return(NULL);
1581 }
1582 buffer [0] = ' ';
1583 buffer [1] = '%';
1584 buffer [length-3] = ';';
1585 buffer [length-2] = ' ';
1586 buffer [length-1] = 0;
1587 memcpy(buffer + 2, entity->name, length - 5);
1588 input->free = deallocblankswrapper;
1589 input->base = buffer;
1590 input->cur = buffer;
1591 input->length = length;
1592 input->end = &buffer[length];
1593 return(input);
1594}
1595
1596/**
Owen Taylor3473f882001-02-23 17:55:21 +00001597 * xmlParserHandlePEReference:
1598 * @ctxt: the parser context
1599 *
1600 * [69] PEReference ::= '%' Name ';'
1601 *
1602 * [ WFC: No Recursion ]
1603 * A parsed entity must not contain a recursive
1604 * reference to itself, either directly or indirectly.
1605 *
1606 * [ WFC: Entity Declared ]
1607 * In a document without any DTD, a document with only an internal DTD
1608 * subset which contains no parameter entity references, or a document
1609 * with "standalone='yes'", ... ... The declaration of a parameter
1610 * entity must precede any reference to it...
1611 *
1612 * [ VC: Entity Declared ]
1613 * In a document with an external subset or external parameter entities
1614 * with "standalone='no'", ... ... The declaration of a parameter entity
1615 * must precede any reference to it...
1616 *
1617 * [ WFC: In DTD ]
1618 * Parameter-entity references may only appear in the DTD.
1619 * NOTE: misleading but this is handled.
1620 *
1621 * A PEReference may have been detected in the current input stream
1622 * the handling is done accordingly to
1623 * http://www.w3.org/TR/REC-xml#entproc
1624 * i.e.
1625 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001626 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001627 */
1628void
1629xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001630 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001631 xmlEntityPtr entity = NULL;
1632 xmlParserInputPtr input;
1633
Owen Taylor3473f882001-02-23 17:55:21 +00001634 if (RAW != '%') return;
1635 switch(ctxt->instate) {
1636 case XML_PARSER_CDATA_SECTION:
1637 return;
1638 case XML_PARSER_COMMENT:
1639 return;
1640 case XML_PARSER_START_TAG:
1641 return;
1642 case XML_PARSER_END_TAG:
1643 return;
1644 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001645 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001646 return;
1647 case XML_PARSER_PROLOG:
1648 case XML_PARSER_START:
1649 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001650 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001651 return;
1652 case XML_PARSER_ENTITY_DECL:
1653 case XML_PARSER_CONTENT:
1654 case XML_PARSER_ATTRIBUTE_VALUE:
1655 case XML_PARSER_PI:
1656 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001657 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001658 /* we just ignore it there */
1659 return;
1660 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001661 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001662 return;
1663 case XML_PARSER_ENTITY_VALUE:
1664 /*
1665 * NOTE: in the case of entity values, we don't do the
1666 * substitution here since we need the literal
1667 * entity value to be able to save the internal
1668 * subset of the document.
1669 * This will be handled by xmlStringDecodeEntities
1670 */
1671 return;
1672 case XML_PARSER_DTD:
1673 /*
1674 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1675 * In the internal DTD subset, parameter-entity references
1676 * can occur only where markup declarations can occur, not
1677 * within markup declarations.
1678 * In that case this is handled in xmlParseMarkupDecl
1679 */
1680 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1681 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +00001682 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
1683 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001684 break;
1685 case XML_PARSER_IGNORE:
1686 return;
1687 }
1688
1689 NEXT;
1690 name = xmlParseName(ctxt);
1691 if (xmlParserDebugEntities)
1692 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001693 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001694 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001695 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001696 } else {
1697 if (RAW == ';') {
1698 NEXT;
1699 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1700 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1701 if (entity == NULL) {
1702
1703 /*
1704 * [ WFC: Entity Declared ]
1705 * In a document without any DTD, a document with only an
1706 * internal DTD subset which contains no parameter entity
1707 * references, or a document with "standalone='yes'", ...
1708 * ... The declaration of a parameter entity must precede
1709 * any reference to it...
1710 */
1711 if ((ctxt->standalone == 1) ||
1712 ((ctxt->hasExternalSubset == 0) &&
1713 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001714 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001715 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001716 } else {
1717 /*
1718 * [ VC: Entity Declared ]
1719 * In a document with an external subset or external
1720 * parameter entities with "standalone='no'", ...
1721 * ... The declaration of a parameter entity must precede
1722 * any reference to it...
1723 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001724 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1725 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1726 "PEReference: %%%s; not found\n",
1727 name);
1728 } else
1729 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1730 "PEReference: %%%s; not found\n",
1731 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001732 ctxt->valid = 0;
1733 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001734 } else if (ctxt->input->free != deallocblankswrapper) {
1735 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1736 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001737 } else {
1738 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1739 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001740 xmlChar start[4];
1741 xmlCharEncoding enc;
1742
Owen Taylor3473f882001-02-23 17:55:21 +00001743 /*
1744 * handle the extra spaces added before and after
1745 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001746 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001747 */
1748 input = xmlNewEntityInputStream(ctxt, entity);
1749 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001750
1751 /*
1752 * Get the 4 first bytes and decode the charset
1753 * if enc != XML_CHAR_ENCODING_NONE
1754 * plug some encoding conversion routines.
1755 */
1756 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001757 if (entity->length >= 4) {
1758 start[0] = RAW;
1759 start[1] = NXT(1);
1760 start[2] = NXT(2);
1761 start[3] = NXT(3);
1762 enc = xmlDetectCharEncoding(start, 4);
1763 if (enc != XML_CHAR_ENCODING_NONE) {
1764 xmlSwitchEncoding(ctxt, enc);
1765 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001766 }
1767
Owen Taylor3473f882001-02-23 17:55:21 +00001768 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillard8f597c32003-10-06 08:19:27 +00001769 (memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001770 xmlParseTextDecl(ctxt);
1771 }
Owen Taylor3473f882001-02-23 17:55:21 +00001772 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001773 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1774 "PEReference: %s is not a parameter entity\n",
1775 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001776 }
1777 }
1778 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001779 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001780 }
Owen Taylor3473f882001-02-23 17:55:21 +00001781 }
1782}
1783
1784/*
1785 * Macro used to grow the current buffer.
1786 */
1787#define growBuffer(buffer) { \
1788 buffer##_size *= 2; \
1789 buffer = (xmlChar *) \
1790 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001791 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001792}
1793
1794/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001795 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001796 * @ctxt: the parser context
1797 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001798 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001799 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1800 * @end: an end marker xmlChar, 0 if none
1801 * @end2: an end marker xmlChar, 0 if none
1802 * @end3: an end marker xmlChar, 0 if none
1803 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001804 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001805 *
1806 * [67] Reference ::= EntityRef | CharRef
1807 *
1808 * [69] PEReference ::= '%' Name ';'
1809 *
1810 * Returns A newly allocated string with the substitution done. The caller
1811 * must deallocate it !
1812 */
1813xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001814xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1815 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001816 xmlChar *buffer = NULL;
1817 int buffer_size = 0;
1818
1819 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001820 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001821 xmlEntityPtr ent;
1822 int c,l;
1823 int nbchars = 0;
1824
Daniel Veillarde57ec792003-09-10 10:50:59 +00001825 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001826 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001827 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001828
1829 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001830 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001831 return(NULL);
1832 }
1833
1834 /*
1835 * allocate a translation buffer.
1836 */
1837 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001838 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001839 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001840
1841 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001842 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001843 * we are operating on already parsed values.
1844 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001845 if (str < last)
1846 c = CUR_SCHAR(str, l);
1847 else
1848 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001849 while ((c != 0) && (c != end) && /* non input consuming loop */
1850 (c != end2) && (c != end3)) {
1851
1852 if (c == 0) break;
1853 if ((c == '&') && (str[1] == '#')) {
1854 int val = xmlParseStringCharRef(ctxt, &str);
1855 if (val != 0) {
1856 COPY_BUF(0,buffer,nbchars,val);
1857 }
1858 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1859 if (xmlParserDebugEntities)
1860 xmlGenericError(xmlGenericErrorContext,
1861 "String decoding Entity Reference: %.30s\n",
1862 str);
1863 ent = xmlParseStringEntityRef(ctxt, &str);
1864 if ((ent != NULL) &&
1865 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1866 if (ent->content != NULL) {
1867 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1868 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001869 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1870 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001871 }
1872 } else if ((ent != NULL) && (ent->content != NULL)) {
1873 xmlChar *rep;
1874
1875 ctxt->depth++;
1876 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1877 0, 0, 0);
1878 ctxt->depth--;
1879 if (rep != NULL) {
1880 current = rep;
1881 while (*current != 0) { /* non input consuming loop */
1882 buffer[nbchars++] = *current++;
1883 if (nbchars >
1884 buffer_size - XML_PARSER_BUFFER_SIZE) {
1885 growBuffer(buffer);
1886 }
1887 }
1888 xmlFree(rep);
1889 }
1890 } else if (ent != NULL) {
1891 int i = xmlStrlen(ent->name);
1892 const xmlChar *cur = ent->name;
1893
1894 buffer[nbchars++] = '&';
1895 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1896 growBuffer(buffer);
1897 }
1898 for (;i > 0;i--)
1899 buffer[nbchars++] = *cur++;
1900 buffer[nbchars++] = ';';
1901 }
1902 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1903 if (xmlParserDebugEntities)
1904 xmlGenericError(xmlGenericErrorContext,
1905 "String decoding PE Reference: %.30s\n", str);
1906 ent = xmlParseStringPEReference(ctxt, &str);
1907 if (ent != NULL) {
1908 xmlChar *rep;
1909
1910 ctxt->depth++;
1911 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1912 0, 0, 0);
1913 ctxt->depth--;
1914 if (rep != NULL) {
1915 current = rep;
1916 while (*current != 0) { /* non input consuming loop */
1917 buffer[nbchars++] = *current++;
1918 if (nbchars >
1919 buffer_size - XML_PARSER_BUFFER_SIZE) {
1920 growBuffer(buffer);
1921 }
1922 }
1923 xmlFree(rep);
1924 }
1925 }
1926 } else {
1927 COPY_BUF(l,buffer,nbchars,c);
1928 str += l;
1929 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1930 growBuffer(buffer);
1931 }
1932 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001933 if (str < last)
1934 c = CUR_SCHAR(str, l);
1935 else
1936 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001937 }
1938 buffer[nbchars++] = 0;
1939 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001940
1941mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001942 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001943 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001944}
1945
Daniel Veillarde57ec792003-09-10 10:50:59 +00001946/**
1947 * xmlStringDecodeEntities:
1948 * @ctxt: the parser context
1949 * @str: the input string
1950 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1951 * @end: an end marker xmlChar, 0 if none
1952 * @end2: an end marker xmlChar, 0 if none
1953 * @end3: an end marker xmlChar, 0 if none
1954 *
1955 * Takes a entity string content and process to do the adequate substitutions.
1956 *
1957 * [67] Reference ::= EntityRef | CharRef
1958 *
1959 * [69] PEReference ::= '%' Name ';'
1960 *
1961 * Returns A newly allocated string with the substitution done. The caller
1962 * must deallocate it !
1963 */
1964xmlChar *
1965xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1966 xmlChar end, xmlChar end2, xmlChar end3) {
1967 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
1968 end, end2, end3));
1969}
Owen Taylor3473f882001-02-23 17:55:21 +00001970
1971/************************************************************************
1972 * *
1973 * Commodity functions to handle xmlChars *
1974 * *
1975 ************************************************************************/
1976
1977/**
1978 * xmlStrndup:
1979 * @cur: the input xmlChar *
1980 * @len: the len of @cur
1981 *
1982 * a strndup for array of xmlChar's
1983 *
1984 * Returns a new xmlChar * or NULL
1985 */
1986xmlChar *
1987xmlStrndup(const xmlChar *cur, int len) {
1988 xmlChar *ret;
1989
1990 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001991 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00001992 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001993 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001994 return(NULL);
1995 }
1996 memcpy(ret, cur, len * sizeof(xmlChar));
1997 ret[len] = 0;
1998 return(ret);
1999}
2000
2001/**
2002 * xmlStrdup:
2003 * @cur: the input xmlChar *
2004 *
2005 * a strdup for array of xmlChar's. Since they are supposed to be
2006 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2007 * a termination mark of '0'.
2008 *
2009 * Returns a new xmlChar * or NULL
2010 */
2011xmlChar *
2012xmlStrdup(const xmlChar *cur) {
2013 const xmlChar *p = cur;
2014
2015 if (cur == NULL) return(NULL);
2016 while (*p != 0) p++; /* non input consuming */
2017 return(xmlStrndup(cur, p - cur));
2018}
2019
2020/**
2021 * xmlCharStrndup:
2022 * @cur: the input char *
2023 * @len: the len of @cur
2024 *
2025 * a strndup for char's to xmlChar's
2026 *
2027 * Returns a new xmlChar * or NULL
2028 */
2029
2030xmlChar *
2031xmlCharStrndup(const char *cur, int len) {
2032 int i;
2033 xmlChar *ret;
2034
2035 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002036 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002037 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002038 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002039 return(NULL);
2040 }
2041 for (i = 0;i < len;i++)
2042 ret[i] = (xmlChar) cur[i];
2043 ret[len] = 0;
2044 return(ret);
2045}
2046
2047/**
2048 * xmlCharStrdup:
2049 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00002050 *
2051 * a strdup for char's to xmlChar's
2052 *
2053 * Returns a new xmlChar * or NULL
2054 */
2055
2056xmlChar *
2057xmlCharStrdup(const char *cur) {
2058 const char *p = cur;
2059
2060 if (cur == NULL) return(NULL);
2061 while (*p != '\0') p++; /* non input consuming */
2062 return(xmlCharStrndup(cur, p - cur));
2063}
2064
2065/**
2066 * xmlStrcmp:
2067 * @str1: the first xmlChar *
2068 * @str2: the second xmlChar *
2069 *
2070 * a strcmp for xmlChar's
2071 *
2072 * Returns the integer result of the comparison
2073 */
2074
2075int
2076xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
2077 register int tmp;
2078
2079 if (str1 == str2) return(0);
2080 if (str1 == NULL) return(-1);
2081 if (str2 == NULL) return(1);
2082 do {
2083 tmp = *str1++ - *str2;
2084 if (tmp != 0) return(tmp);
2085 } while (*str2++ != 0);
2086 return 0;
2087}
2088
2089/**
2090 * xmlStrEqual:
2091 * @str1: the first xmlChar *
2092 * @str2: the second xmlChar *
2093 *
2094 * Check if both string are equal of have same content
2095 * Should be a bit more readable and faster than xmlStrEqual()
2096 *
2097 * Returns 1 if they are equal, 0 if they are different
2098 */
2099
2100int
2101xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
2102 if (str1 == str2) return(1);
2103 if (str1 == NULL) return(0);
2104 if (str2 == NULL) return(0);
2105 do {
2106 if (*str1++ != *str2) return(0);
2107 } while (*str2++);
2108 return(1);
2109}
2110
2111/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002112 * xmlStrQEqual:
2113 * @pref: the prefix of the QName
2114 * @name: the localname of the QName
2115 * @str: the second xmlChar *
2116 *
2117 * Check if a QName is Equal to a given string
2118 *
2119 * Returns 1 if they are equal, 0 if they are different
2120 */
2121
2122int
2123xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2124 if (pref == NULL) return(xmlStrEqual(name, str));
2125 if (name == NULL) return(0);
2126 if (str == NULL) return(0);
2127
2128 do {
2129 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002130 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002131 if (*str++ != ':') return(0);
2132 do {
2133 if (*name++ != *str) return(0);
2134 } while (*str++);
2135 return(1);
2136}
2137
2138/**
Owen Taylor3473f882001-02-23 17:55:21 +00002139 * xmlStrncmp:
2140 * @str1: the first xmlChar *
2141 * @str2: the second xmlChar *
2142 * @len: the max comparison length
2143 *
2144 * a strncmp for xmlChar's
2145 *
2146 * Returns the integer result of the comparison
2147 */
2148
2149int
2150xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2151 register int tmp;
2152
2153 if (len <= 0) return(0);
2154 if (str1 == str2) return(0);
2155 if (str1 == NULL) return(-1);
2156 if (str2 == NULL) return(1);
2157 do {
2158 tmp = *str1++ - *str2;
2159 if (tmp != 0 || --len == 0) return(tmp);
2160 } while (*str2++ != 0);
2161 return 0;
2162}
2163
Daniel Veillardb44025c2001-10-11 22:55:55 +00002164static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002165 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2166 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2167 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2168 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2169 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2170 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2171 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2172 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2173 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2174 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2175 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2176 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2177 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2178 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2179 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2180 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2181 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2182 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2183 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2184 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2185 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2186 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2187 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2188 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2189 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2190 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2191 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2192 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2193 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2194 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2195 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2196 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2197};
2198
2199/**
2200 * xmlStrcasecmp:
2201 * @str1: the first xmlChar *
2202 * @str2: the second xmlChar *
2203 *
2204 * a strcasecmp for xmlChar's
2205 *
2206 * Returns the integer result of the comparison
2207 */
2208
2209int
2210xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2211 register int tmp;
2212
2213 if (str1 == str2) return(0);
2214 if (str1 == NULL) return(-1);
2215 if (str2 == NULL) return(1);
2216 do {
2217 tmp = casemap[*str1++] - casemap[*str2];
2218 if (tmp != 0) return(tmp);
2219 } while (*str2++ != 0);
2220 return 0;
2221}
2222
2223/**
2224 * xmlStrncasecmp:
2225 * @str1: the first xmlChar *
2226 * @str2: the second xmlChar *
2227 * @len: the max comparison length
2228 *
2229 * a strncasecmp for xmlChar's
2230 *
2231 * Returns the integer result of the comparison
2232 */
2233
2234int
2235xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2236 register int tmp;
2237
2238 if (len <= 0) return(0);
2239 if (str1 == str2) return(0);
2240 if (str1 == NULL) return(-1);
2241 if (str2 == NULL) return(1);
2242 do {
2243 tmp = casemap[*str1++] - casemap[*str2];
2244 if (tmp != 0 || --len == 0) return(tmp);
2245 } while (*str2++ != 0);
2246 return 0;
2247}
2248
2249/**
2250 * xmlStrchr:
2251 * @str: the xmlChar * array
2252 * @val: the xmlChar to search
2253 *
2254 * a strchr for xmlChar's
2255 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002256 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002257 */
2258
2259const xmlChar *
2260xmlStrchr(const xmlChar *str, xmlChar val) {
2261 if (str == NULL) return(NULL);
2262 while (*str != 0) { /* non input consuming */
2263 if (*str == val) return((xmlChar *) str);
2264 str++;
2265 }
2266 return(NULL);
2267}
2268
2269/**
2270 * xmlStrstr:
2271 * @str: the xmlChar * array (haystack)
2272 * @val: the xmlChar to search (needle)
2273 *
2274 * a strstr for xmlChar's
2275 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002276 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002277 */
2278
2279const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002280xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002281 int n;
2282
2283 if (str == NULL) return(NULL);
2284 if (val == NULL) return(NULL);
2285 n = xmlStrlen(val);
2286
2287 if (n == 0) return(str);
2288 while (*str != 0) { /* non input consuming */
2289 if (*str == *val) {
2290 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2291 }
2292 str++;
2293 }
2294 return(NULL);
2295}
2296
2297/**
2298 * xmlStrcasestr:
2299 * @str: the xmlChar * array (haystack)
2300 * @val: the xmlChar to search (needle)
2301 *
2302 * a case-ignoring strstr for xmlChar's
2303 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002304 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002305 */
2306
2307const xmlChar *
2308xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2309 int n;
2310
2311 if (str == NULL) return(NULL);
2312 if (val == NULL) return(NULL);
2313 n = xmlStrlen(val);
2314
2315 if (n == 0) return(str);
2316 while (*str != 0) { /* non input consuming */
2317 if (casemap[*str] == casemap[*val])
2318 if (!xmlStrncasecmp(str, val, n)) return(str);
2319 str++;
2320 }
2321 return(NULL);
2322}
2323
2324/**
2325 * xmlStrsub:
2326 * @str: the xmlChar * array (haystack)
2327 * @start: the index of the first char (zero based)
2328 * @len: the length of the substring
2329 *
2330 * Extract a substring of a given string
2331 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002332 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002333 */
2334
2335xmlChar *
2336xmlStrsub(const xmlChar *str, int start, int len) {
2337 int i;
2338
2339 if (str == NULL) return(NULL);
2340 if (start < 0) return(NULL);
2341 if (len < 0) return(NULL);
2342
2343 for (i = 0;i < start;i++) {
2344 if (*str == 0) return(NULL);
2345 str++;
2346 }
2347 if (*str == 0) return(NULL);
2348 return(xmlStrndup(str, len));
2349}
2350
2351/**
2352 * xmlStrlen:
2353 * @str: the xmlChar * array
2354 *
2355 * length of a xmlChar's string
2356 *
2357 * Returns the number of xmlChar contained in the ARRAY.
2358 */
2359
2360int
2361xmlStrlen(const xmlChar *str) {
2362 int len = 0;
2363
2364 if (str == NULL) return(0);
2365 while (*str != 0) { /* non input consuming */
2366 str++;
2367 len++;
2368 }
2369 return(len);
2370}
2371
2372/**
2373 * xmlStrncat:
2374 * @cur: the original xmlChar * array
2375 * @add: the xmlChar * array added
2376 * @len: the length of @add
2377 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002378 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002379 * first bytes of @add.
2380 *
2381 * Returns a new xmlChar *, the original @cur is reallocated if needed
2382 * and should not be freed
2383 */
2384
2385xmlChar *
2386xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2387 int size;
2388 xmlChar *ret;
2389
2390 if ((add == NULL) || (len == 0))
2391 return(cur);
2392 if (cur == NULL)
2393 return(xmlStrndup(add, len));
2394
2395 size = xmlStrlen(cur);
2396 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2397 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002398 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002399 return(cur);
2400 }
2401 memcpy(&ret[size], add, len * sizeof(xmlChar));
2402 ret[size + len] = 0;
2403 return(ret);
2404}
2405
2406/**
2407 * xmlStrcat:
2408 * @cur: the original xmlChar * array
2409 * @add: the xmlChar * array added
2410 *
2411 * a strcat for array of xmlChar's. Since they are supposed to be
2412 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2413 * a termination mark of '0'.
2414 *
2415 * Returns a new xmlChar * containing the concatenated string.
2416 */
2417xmlChar *
2418xmlStrcat(xmlChar *cur, const xmlChar *add) {
2419 const xmlChar *p = add;
2420
2421 if (add == NULL) return(cur);
2422 if (cur == NULL)
2423 return(xmlStrdup(add));
2424
2425 while (*p != 0) p++; /* non input consuming */
2426 return(xmlStrncat(cur, add, p - add));
2427}
2428
Aleksey Sanine7acf432003-10-02 20:05:27 +00002429/**
2430 * xmlStrPrintf:
2431 * @buf: the result buffer.
2432 * @len: the result buffer length.
2433 * @msg: the message with printf formatting.
2434 * @...: extra parameters for the message.
2435 *
2436 * Formats @msg and places result into @buf.
2437 *
2438 * Returns the number of characters written to @buf or -1 if an error occurs.
2439 */
2440int
2441xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
2442 va_list args;
2443 int ret;
2444
2445 if((buf == NULL) || (msg == NULL)) {
2446 return(-1);
2447 }
2448
2449 va_start(args, msg);
Daniel Veillardbb5abab2003-10-03 22:21:51 +00002450 ret = vsnprintf((char *) buf, len, (const char *) msg, args);
Aleksey Sanine7acf432003-10-02 20:05:27 +00002451 va_end(args);
2452
2453 return(ret);
2454}
2455
Owen Taylor3473f882001-02-23 17:55:21 +00002456/************************************************************************
2457 * *
2458 * Commodity functions, cleanup needed ? *
2459 * *
2460 ************************************************************************/
2461
2462/**
2463 * areBlanks:
2464 * @ctxt: an XML parser context
2465 * @str: a xmlChar *
2466 * @len: the size of @str
2467 *
2468 * Is this a sequence of blank chars that one can ignore ?
2469 *
2470 * Returns 1 if ignorable 0 otherwise.
2471 */
2472
2473static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2474 int i, ret;
2475 xmlNodePtr lastChild;
2476
Daniel Veillard05c13a22001-09-09 08:38:09 +00002477 /*
2478 * Don't spend time trying to differentiate them, the same callback is
2479 * used !
2480 */
2481 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002482 return(0);
2483
Owen Taylor3473f882001-02-23 17:55:21 +00002484 /*
2485 * Check for xml:space value.
2486 */
2487 if (*(ctxt->space) == 1)
2488 return(0);
2489
2490 /*
2491 * Check that the string is made of blanks
2492 */
2493 for (i = 0;i < len;i++)
2494 if (!(IS_BLANK(str[i]))) return(0);
2495
2496 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002497 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002498 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002499 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002500 if (ctxt->myDoc != NULL) {
2501 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2502 if (ret == 0) return(1);
2503 if (ret == 1) return(0);
2504 }
2505
2506 /*
2507 * Otherwise, heuristic :-\
2508 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002509 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002510 if ((ctxt->node->children == NULL) &&
2511 (RAW == '<') && (NXT(1) == '/')) return(0);
2512
2513 lastChild = xmlGetLastChild(ctxt->node);
2514 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002515 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2516 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002517 } else if (xmlNodeIsText(lastChild))
2518 return(0);
2519 else if ((ctxt->node->children != NULL) &&
2520 (xmlNodeIsText(ctxt->node->children)))
2521 return(0);
2522 return(1);
2523}
2524
Owen Taylor3473f882001-02-23 17:55:21 +00002525/************************************************************************
2526 * *
2527 * Extra stuff for namespace support *
2528 * Relates to http://www.w3.org/TR/WD-xml-names *
2529 * *
2530 ************************************************************************/
2531
2532/**
2533 * xmlSplitQName:
2534 * @ctxt: an XML parser context
2535 * @name: an XML parser context
2536 * @prefix: a xmlChar **
2537 *
2538 * parse an UTF8 encoded XML qualified name string
2539 *
2540 * [NS 5] QName ::= (Prefix ':')? LocalPart
2541 *
2542 * [NS 6] Prefix ::= NCName
2543 *
2544 * [NS 7] LocalPart ::= NCName
2545 *
2546 * Returns the local part, and prefix is updated
2547 * to get the Prefix if any.
2548 */
2549
2550xmlChar *
2551xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2552 xmlChar buf[XML_MAX_NAMELEN + 5];
2553 xmlChar *buffer = NULL;
2554 int len = 0;
2555 int max = XML_MAX_NAMELEN;
2556 xmlChar *ret = NULL;
2557 const xmlChar *cur = name;
2558 int c;
2559
2560 *prefix = NULL;
2561
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002562 if (cur == NULL) return(NULL);
2563
Owen Taylor3473f882001-02-23 17:55:21 +00002564#ifndef XML_XML_NAMESPACE
2565 /* xml: prefix is not really a namespace */
2566 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2567 (cur[2] == 'l') && (cur[3] == ':'))
2568 return(xmlStrdup(name));
2569#endif
2570
Daniel Veillard597bc482003-07-24 16:08:28 +00002571 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002572 if (cur[0] == ':')
2573 return(xmlStrdup(name));
2574
2575 c = *cur++;
2576 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2577 buf[len++] = c;
2578 c = *cur++;
2579 }
2580 if (len >= max) {
2581 /*
2582 * Okay someone managed to make a huge name, so he's ready to pay
2583 * for the processing speed.
2584 */
2585 max = len * 2;
2586
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002587 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002588 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002589 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002590 return(NULL);
2591 }
2592 memcpy(buffer, buf, len);
2593 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2594 if (len + 10 > max) {
2595 max *= 2;
2596 buffer = (xmlChar *) xmlRealloc(buffer,
2597 max * sizeof(xmlChar));
2598 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002599 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002600 return(NULL);
2601 }
2602 }
2603 buffer[len++] = c;
2604 c = *cur++;
2605 }
2606 buffer[len] = 0;
2607 }
2608
Daniel Veillard597bc482003-07-24 16:08:28 +00002609 /* nasty but well=formed
2610 if ((c == ':') && (*cur == 0)) {
2611 return(xmlStrdup(name));
2612 } */
2613
Owen Taylor3473f882001-02-23 17:55:21 +00002614 if (buffer == NULL)
2615 ret = xmlStrndup(buf, len);
2616 else {
2617 ret = buffer;
2618 buffer = NULL;
2619 max = XML_MAX_NAMELEN;
2620 }
2621
2622
2623 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002624 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002625 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002626 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002627 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002628 }
Owen Taylor3473f882001-02-23 17:55:21 +00002629 len = 0;
2630
Daniel Veillardbb284f42002-10-16 18:02:47 +00002631 /*
2632 * Check that the first character is proper to start
2633 * a new name
2634 */
2635 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2636 ((c >= 0x41) && (c <= 0x5A)) ||
2637 (c == '_') || (c == ':'))) {
2638 int l;
2639 int first = CUR_SCHAR(cur, l);
2640
2641 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002642 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002643 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002644 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002645 }
2646 }
2647 cur++;
2648
Owen Taylor3473f882001-02-23 17:55:21 +00002649 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2650 buf[len++] = c;
2651 c = *cur++;
2652 }
2653 if (len >= max) {
2654 /*
2655 * Okay someone managed to make a huge name, so he's ready to pay
2656 * for the processing speed.
2657 */
2658 max = len * 2;
2659
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002660 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002661 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002662 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002663 return(NULL);
2664 }
2665 memcpy(buffer, buf, len);
2666 while (c != 0) { /* tested bigname2.xml */
2667 if (len + 10 > max) {
2668 max *= 2;
2669 buffer = (xmlChar *) xmlRealloc(buffer,
2670 max * sizeof(xmlChar));
2671 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002672 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002673 return(NULL);
2674 }
2675 }
2676 buffer[len++] = c;
2677 c = *cur++;
2678 }
2679 buffer[len] = 0;
2680 }
2681
2682 if (buffer == NULL)
2683 ret = xmlStrndup(buf, len);
2684 else {
2685 ret = buffer;
2686 }
2687 }
2688
2689 return(ret);
2690}
2691
2692/************************************************************************
2693 * *
2694 * The parser itself *
2695 * Relates to http://www.w3.org/TR/REC-xml *
2696 * *
2697 ************************************************************************/
2698
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002699static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002700static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002701 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002702
Owen Taylor3473f882001-02-23 17:55:21 +00002703/**
2704 * xmlParseName:
2705 * @ctxt: an XML parser context
2706 *
2707 * parse an XML name.
2708 *
2709 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2710 * CombiningChar | Extender
2711 *
2712 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2713 *
2714 * [6] Names ::= Name (S Name)*
2715 *
2716 * Returns the Name parsed or NULL
2717 */
2718
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002719const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002720xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002721 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002722 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002723 int count = 0;
2724
2725 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002726
2727 /*
2728 * Accelerator for simple ASCII names
2729 */
2730 in = ctxt->input->cur;
2731 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2732 ((*in >= 0x41) && (*in <= 0x5A)) ||
2733 (*in == '_') || (*in == ':')) {
2734 in++;
2735 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2736 ((*in >= 0x41) && (*in <= 0x5A)) ||
2737 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002738 (*in == '_') || (*in == '-') ||
2739 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002740 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002741 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002742 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002743 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002744 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002745 ctxt->nbChars += count;
2746 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002747 if (ret == NULL)
2748 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002749 return(ret);
2750 }
2751 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002752 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002753}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002754
Daniel Veillard46de64e2002-05-29 08:21:33 +00002755/**
2756 * xmlParseNameAndCompare:
2757 * @ctxt: an XML parser context
2758 *
2759 * parse an XML name and compares for match
2760 * (specialized for endtag parsing)
2761 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002762 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2763 * and the name for mismatch
2764 */
2765
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002766static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002767xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2768 const xmlChar *cmp = other;
2769 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002770 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002771
2772 GROW;
2773
2774 in = ctxt->input->cur;
2775 while (*in != 0 && *in == *cmp) {
2776 ++in;
2777 ++cmp;
2778 }
2779 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
2780 /* success */
2781 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002782 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002783 }
2784 /* failure (or end of input buffer), check with full function */
2785 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002786 /* strings coming from the dictionnary direct compare possible */
2787 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002788 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002789 }
2790 return ret;
2791}
2792
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002793static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002794xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002795 int len = 0, l;
2796 int c;
2797 int count = 0;
2798
2799 /*
2800 * Handler for more complex cases
2801 */
2802 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002803 c = CUR_CHAR(l);
2804 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2805 (!IS_LETTER(c) && (c != '_') &&
2806 (c != ':'))) {
2807 return(NULL);
2808 }
2809
2810 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
Daniel Veillard73b013f2003-09-30 12:36:01 +00002811 ((xmlIsLetter(c)) || (xmlIsDigit(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002812 (c == '.') || (c == '-') ||
2813 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002814 (xmlIsCombining(c)) ||
2815 (xmlIsExtender(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002816 if (count++ > 100) {
2817 count = 0;
2818 GROW;
2819 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002820 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002821 NEXTL(l);
2822 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002823 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002824 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002825}
2826
2827/**
2828 * xmlParseStringName:
2829 * @ctxt: an XML parser context
2830 * @str: a pointer to the string pointer (IN/OUT)
2831 *
2832 * parse an XML name.
2833 *
2834 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2835 * CombiningChar | Extender
2836 *
2837 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2838 *
2839 * [6] Names ::= Name (S Name)*
2840 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002841 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002842 * is updated to the current location in the string.
2843 */
2844
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002845static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002846xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2847 xmlChar buf[XML_MAX_NAMELEN + 5];
2848 const xmlChar *cur = *str;
2849 int len = 0, l;
2850 int c;
2851
2852 c = CUR_SCHAR(cur, l);
Daniel Veillard73b013f2003-09-30 12:36:01 +00002853 if (!xmlIsLetter(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002854 (c != ':')) {
2855 return(NULL);
2856 }
2857
Daniel Veillard73b013f2003-09-30 12:36:01 +00002858 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002859 (c == '.') || (c == '-') ||
2860 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002861 (xmlIsCombining(c)) ||
2862 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002863 COPY_BUF(l,buf,len,c);
2864 cur += l;
2865 c = CUR_SCHAR(cur, l);
2866 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2867 /*
2868 * Okay someone managed to make a huge name, so he's ready to pay
2869 * for the processing speed.
2870 */
2871 xmlChar *buffer;
2872 int max = len * 2;
2873
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002874 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002876 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002877 return(NULL);
2878 }
2879 memcpy(buffer, buf, len);
Daniel Veillard73b013f2003-09-30 12:36:01 +00002880 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) ||
2881 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002882 (c == '.') || (c == '-') ||
2883 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002884 (xmlIsCombining(c)) ||
2885 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (len + 10 > max) {
2887 max *= 2;
2888 buffer = (xmlChar *) xmlRealloc(buffer,
2889 max * sizeof(xmlChar));
2890 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002891 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002892 return(NULL);
2893 }
2894 }
2895 COPY_BUF(l,buffer,len,c);
2896 cur += l;
2897 c = CUR_SCHAR(cur, l);
2898 }
2899 buffer[len] = 0;
2900 *str = cur;
2901 return(buffer);
2902 }
2903 }
2904 *str = cur;
2905 return(xmlStrndup(buf, len));
2906}
2907
2908/**
2909 * xmlParseNmtoken:
2910 * @ctxt: an XML parser context
2911 *
2912 * parse an XML Nmtoken.
2913 *
2914 * [7] Nmtoken ::= (NameChar)+
2915 *
2916 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2917 *
2918 * Returns the Nmtoken parsed or NULL
2919 */
2920
2921xmlChar *
2922xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2923 xmlChar buf[XML_MAX_NAMELEN + 5];
2924 int len = 0, l;
2925 int c;
2926 int count = 0;
2927
2928 GROW;
2929 c = CUR_CHAR(l);
2930
Daniel Veillard73b013f2003-09-30 12:36:01 +00002931 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002932 (c == '.') || (c == '-') ||
2933 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002934 (xmlIsCombining(c)) ||
2935 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002936 if (count++ > 100) {
2937 count = 0;
2938 GROW;
2939 }
2940 COPY_BUF(l,buf,len,c);
2941 NEXTL(l);
2942 c = CUR_CHAR(l);
2943 if (len >= XML_MAX_NAMELEN) {
2944 /*
2945 * Okay someone managed to make a huge token, so he's ready to pay
2946 * for the processing speed.
2947 */
2948 xmlChar *buffer;
2949 int max = len * 2;
2950
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002951 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002952 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002953 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 return(NULL);
2955 }
2956 memcpy(buffer, buf, len);
Daniel Veillard73b013f2003-09-30 12:36:01 +00002957 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002958 (c == '.') || (c == '-') ||
2959 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002960 (xmlIsCombining(c)) ||
2961 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002962 if (count++ > 100) {
2963 count = 0;
2964 GROW;
2965 }
2966 if (len + 10 > max) {
2967 max *= 2;
2968 buffer = (xmlChar *) xmlRealloc(buffer,
2969 max * sizeof(xmlChar));
2970 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002971 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002972 return(NULL);
2973 }
2974 }
2975 COPY_BUF(l,buffer,len,c);
2976 NEXTL(l);
2977 c = CUR_CHAR(l);
2978 }
2979 buffer[len] = 0;
2980 return(buffer);
2981 }
2982 }
2983 if (len == 0)
2984 return(NULL);
2985 return(xmlStrndup(buf, len));
2986}
2987
2988/**
2989 * xmlParseEntityValue:
2990 * @ctxt: an XML parser context
2991 * @orig: if non-NULL store a copy of the original entity value
2992 *
2993 * parse a value for ENTITY declarations
2994 *
2995 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2996 * "'" ([^%&'] | PEReference | Reference)* "'"
2997 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002998 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002999 */
3000
3001xmlChar *
3002xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3003 xmlChar *buf = NULL;
3004 int len = 0;
3005 int size = XML_PARSER_BUFFER_SIZE;
3006 int c, l;
3007 xmlChar stop;
3008 xmlChar *ret = NULL;
3009 const xmlChar *cur = NULL;
3010 xmlParserInputPtr input;
3011
3012 if (RAW == '"') stop = '"';
3013 else if (RAW == '\'') stop = '\'';
3014 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003015 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003016 return(NULL);
3017 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003018 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003019 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003020 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003021 return(NULL);
3022 }
3023
3024 /*
3025 * The content of the entity definition is copied in a buffer.
3026 */
3027
3028 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3029 input = ctxt->input;
3030 GROW;
3031 NEXT;
3032 c = CUR_CHAR(l);
3033 /*
3034 * NOTE: 4.4.5 Included in Literal
3035 * When a parameter entity reference appears in a literal entity
3036 * value, ... a single or double quote character in the replacement
3037 * text is always treated as a normal data character and will not
3038 * terminate the literal.
3039 * In practice it means we stop the loop only when back at parsing
3040 * the initial entity and the quote is found
3041 */
Daniel Veillard73b013f2003-09-30 12:36:01 +00003042 while ((xmlIsChar(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003043 (ctxt->input != input))) {
3044 if (len + 5 >= size) {
3045 size *= 2;
3046 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3047 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003048 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003049 return(NULL);
3050 }
3051 }
3052 COPY_BUF(l,buf,len,c);
3053 NEXTL(l);
3054 /*
3055 * Pop-up of finished entities.
3056 */
3057 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3058 xmlPopInput(ctxt);
3059
3060 GROW;
3061 c = CUR_CHAR(l);
3062 if (c == 0) {
3063 GROW;
3064 c = CUR_CHAR(l);
3065 }
3066 }
3067 buf[len] = 0;
3068
3069 /*
3070 * Raise problem w.r.t. '&' and '%' being used in non-entities
3071 * reference constructs. Note Charref will be handled in
3072 * xmlStringDecodeEntities()
3073 */
3074 cur = buf;
3075 while (*cur != 0) { /* non input consuming */
3076 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3077 xmlChar *name;
3078 xmlChar tmp = *cur;
3079
3080 cur++;
3081 name = xmlParseStringName(ctxt, &cur);
3082 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003083 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003084 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003085 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003086 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003087 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3088 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003089 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003090 }
3091 if (name != NULL)
3092 xmlFree(name);
3093 }
3094 cur++;
3095 }
3096
3097 /*
3098 * Then PEReference entities are substituted.
3099 */
3100 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003101 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003102 xmlFree(buf);
3103 } else {
3104 NEXT;
3105 /*
3106 * NOTE: 4.4.7 Bypassed
3107 * When a general entity reference appears in the EntityValue in
3108 * an entity declaration, it is bypassed and left as is.
3109 * so XML_SUBSTITUTE_REF is not set here.
3110 */
3111 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3112 0, 0, 0);
3113 if (orig != NULL)
3114 *orig = buf;
3115 else
3116 xmlFree(buf);
3117 }
3118
3119 return(ret);
3120}
3121
3122/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003123 * xmlParseAttValueComplex:
3124 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003125 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003126 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003127 *
3128 * parse a value for an attribute, this is the fallback function
3129 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003130 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003131 *
3132 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3133 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003134static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003135xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003136 xmlChar limit = 0;
3137 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003138 int len = 0;
3139 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003140 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003141 xmlChar *current = NULL;
3142 xmlEntityPtr ent;
3143
Owen Taylor3473f882001-02-23 17:55:21 +00003144 if (NXT(0) == '"') {
3145 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3146 limit = '"';
3147 NEXT;
3148 } else if (NXT(0) == '\'') {
3149 limit = '\'';
3150 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3151 NEXT;
3152 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003153 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003154 return(NULL);
3155 }
3156
3157 /*
3158 * allocate a translation buffer.
3159 */
3160 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003161 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003162 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003163
3164 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003165 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003166 */
3167 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003168 while ((NXT(0) != limit) && /* checked */
3169 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003170 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003171 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003172 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003173 if (NXT(1) == '#') {
3174 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003175
Owen Taylor3473f882001-02-23 17:55:21 +00003176 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003177 if (ctxt->replaceEntities) {
3178 if (len > buf_size - 10) {
3179 growBuffer(buf);
3180 }
3181 buf[len++] = '&';
3182 } else {
3183 /*
3184 * The reparsing will be done in xmlStringGetNodeList()
3185 * called by the attribute() function in SAX.c
3186 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003187 if (len > buf_size - 10) {
3188 growBuffer(buf);
3189 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003190 buf[len++] = '&';
3191 buf[len++] = '#';
3192 buf[len++] = '3';
3193 buf[len++] = '8';
3194 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003195 }
3196 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003197 if (len > buf_size - 10) {
3198 growBuffer(buf);
3199 }
Owen Taylor3473f882001-02-23 17:55:21 +00003200 len += xmlCopyChar(0, &buf[len], val);
3201 }
3202 } else {
3203 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003204 if ((ent != NULL) &&
3205 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3206 if (len > buf_size - 10) {
3207 growBuffer(buf);
3208 }
3209 if ((ctxt->replaceEntities == 0) &&
3210 (ent->content[0] == '&')) {
3211 buf[len++] = '&';
3212 buf[len++] = '#';
3213 buf[len++] = '3';
3214 buf[len++] = '8';
3215 buf[len++] = ';';
3216 } else {
3217 buf[len++] = ent->content[0];
3218 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003219 } else if ((ent != NULL) &&
3220 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003221 xmlChar *rep;
3222
3223 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3224 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003225 XML_SUBSTITUTE_REF,
3226 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003227 if (rep != NULL) {
3228 current = rep;
3229 while (*current != 0) { /* non input consuming */
3230 buf[len++] = *current++;
3231 if (len > buf_size - 10) {
3232 growBuffer(buf);
3233 }
3234 }
3235 xmlFree(rep);
3236 }
3237 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003238 if (len > buf_size - 10) {
3239 growBuffer(buf);
3240 }
Owen Taylor3473f882001-02-23 17:55:21 +00003241 if (ent->content != NULL)
3242 buf[len++] = ent->content[0];
3243 }
3244 } else if (ent != NULL) {
3245 int i = xmlStrlen(ent->name);
3246 const xmlChar *cur = ent->name;
3247
3248 /*
3249 * This may look absurd but is needed to detect
3250 * entities problems
3251 */
3252 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3253 (ent->content != NULL)) {
3254 xmlChar *rep;
3255 rep = xmlStringDecodeEntities(ctxt, ent->content,
3256 XML_SUBSTITUTE_REF, 0, 0, 0);
3257 if (rep != NULL)
3258 xmlFree(rep);
3259 }
3260
3261 /*
3262 * Just output the reference
3263 */
3264 buf[len++] = '&';
3265 if (len > buf_size - i - 10) {
3266 growBuffer(buf);
3267 }
3268 for (;i > 0;i--)
3269 buf[len++] = *cur++;
3270 buf[len++] = ';';
3271 }
3272 }
3273 } else {
3274 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003275 if ((len != 0) || (!normalize)) {
3276 if ((!normalize) || (!in_space)) {
3277 COPY_BUF(l,buf,len,0x20);
3278 if (len > buf_size - 10) {
3279 growBuffer(buf);
3280 }
3281 }
3282 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003283 }
3284 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003285 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003286 COPY_BUF(l,buf,len,c);
3287 if (len > buf_size - 10) {
3288 growBuffer(buf);
3289 }
3290 }
3291 NEXTL(l);
3292 }
3293 GROW;
3294 c = CUR_CHAR(l);
3295 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003296 if ((in_space) && (normalize)) {
3297 while (buf[len - 1] == 0x20) len--;
3298 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003299 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003300 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003301 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003302 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003303 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3304 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003305 } else
3306 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003307 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003308 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003309
3310mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003311 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003312 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003313}
3314
3315/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003316 * xmlParseAttValue:
3317 * @ctxt: an XML parser context
3318 *
3319 * parse a value for an attribute
3320 * Note: the parser won't do substitution of entities here, this
3321 * will be handled later in xmlStringGetNodeList
3322 *
3323 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3324 * "'" ([^<&'] | Reference)* "'"
3325 *
3326 * 3.3.3 Attribute-Value Normalization:
3327 * Before the value of an attribute is passed to the application or
3328 * checked for validity, the XML processor must normalize it as follows:
3329 * - a character reference is processed by appending the referenced
3330 * character to the attribute value
3331 * - an entity reference is processed by recursively processing the
3332 * replacement text of the entity
3333 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3334 * appending #x20 to the normalized value, except that only a single
3335 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3336 * parsed entity or the literal entity value of an internal parsed entity
3337 * - other characters are processed by appending them to the normalized value
3338 * If the declared value is not CDATA, then the XML processor must further
3339 * process the normalized attribute value by discarding any leading and
3340 * trailing space (#x20) characters, and by replacing sequences of space
3341 * (#x20) characters by a single space (#x20) character.
3342 * All attributes for which no declaration has been read should be treated
3343 * by a non-validating parser as if declared CDATA.
3344 *
3345 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3346 */
3347
3348
3349xmlChar *
3350xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003351 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003352}
3353
3354/**
Owen Taylor3473f882001-02-23 17:55:21 +00003355 * xmlParseSystemLiteral:
3356 * @ctxt: an XML parser context
3357 *
3358 * parse an XML Literal
3359 *
3360 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3361 *
3362 * Returns the SystemLiteral parsed or NULL
3363 */
3364
3365xmlChar *
3366xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3367 xmlChar *buf = NULL;
3368 int len = 0;
3369 int size = XML_PARSER_BUFFER_SIZE;
3370 int cur, l;
3371 xmlChar stop;
3372 int state = ctxt->instate;
3373 int count = 0;
3374
3375 SHRINK;
3376 if (RAW == '"') {
3377 NEXT;
3378 stop = '"';
3379 } else if (RAW == '\'') {
3380 NEXT;
3381 stop = '\'';
3382 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003383 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003384 return(NULL);
3385 }
3386
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003387 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003388 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003389 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003390 return(NULL);
3391 }
3392 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3393 cur = CUR_CHAR(l);
Daniel Veillard73b013f2003-09-30 12:36:01 +00003394 while ((xmlIsChar(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003395 if (len + 5 >= size) {
3396 size *= 2;
3397 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3398 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003399 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003400 ctxt->instate = (xmlParserInputState) state;
3401 return(NULL);
3402 }
3403 }
3404 count++;
3405 if (count > 50) {
3406 GROW;
3407 count = 0;
3408 }
3409 COPY_BUF(l,buf,len,cur);
3410 NEXTL(l);
3411 cur = CUR_CHAR(l);
3412 if (cur == 0) {
3413 GROW;
3414 SHRINK;
3415 cur = CUR_CHAR(l);
3416 }
3417 }
3418 buf[len] = 0;
3419 ctxt->instate = (xmlParserInputState) state;
Daniel Veillard73b013f2003-09-30 12:36:01 +00003420 if (!xmlIsChar(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003421 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003422 } else {
3423 NEXT;
3424 }
3425 return(buf);
3426}
3427
3428/**
3429 * xmlParsePubidLiteral:
3430 * @ctxt: an XML parser context
3431 *
3432 * parse an XML public literal
3433 *
3434 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3435 *
3436 * Returns the PubidLiteral parsed or NULL.
3437 */
3438
3439xmlChar *
3440xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3441 xmlChar *buf = NULL;
3442 int len = 0;
3443 int size = XML_PARSER_BUFFER_SIZE;
3444 xmlChar cur;
3445 xmlChar stop;
3446 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003447 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003448
3449 SHRINK;
3450 if (RAW == '"') {
3451 NEXT;
3452 stop = '"';
3453 } else if (RAW == '\'') {
3454 NEXT;
3455 stop = '\'';
3456 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003457 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003458 return(NULL);
3459 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003460 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003461 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003462 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003463 return(NULL);
3464 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003465 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003466 cur = CUR;
3467 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
3468 if (len + 1 >= size) {
3469 size *= 2;
3470 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3471 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003472 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003473 return(NULL);
3474 }
3475 }
3476 buf[len++] = cur;
3477 count++;
3478 if (count > 50) {
3479 GROW;
3480 count = 0;
3481 }
3482 NEXT;
3483 cur = CUR;
3484 if (cur == 0) {
3485 GROW;
3486 SHRINK;
3487 cur = CUR;
3488 }
3489 }
3490 buf[len] = 0;
3491 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003492 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003493 } else {
3494 NEXT;
3495 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003496 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003497 return(buf);
3498}
3499
Daniel Veillard48b2f892001-02-25 16:11:03 +00003500void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003501/**
3502 * xmlParseCharData:
3503 * @ctxt: an XML parser context
3504 * @cdata: int indicating whether we are within a CDATA section
3505 *
3506 * parse a CharData section.
3507 * if we are within a CDATA section ']]>' marks an end of section.
3508 *
3509 * The right angle bracket (>) may be represented using the string "&gt;",
3510 * and must, for compatibility, be escaped using "&gt;" or a character
3511 * reference when it appears in the string "]]>" in content, when that
3512 * string is not marking the end of a CDATA section.
3513 *
3514 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3515 */
3516
3517void
3518xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003519 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003520 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003521 int line = ctxt->input->line;
3522 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003523
3524 SHRINK;
3525 GROW;
3526 /*
3527 * Accelerated common case where input don't need to be
3528 * modified before passing it to the handler.
3529 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003530 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003531 in = ctxt->input->cur;
3532 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003533get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003534 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3535 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003536 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003537 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003538 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003539 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003540 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003541 ctxt->input->line++;
3542 in++;
3543 }
3544 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003545 }
3546 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003547 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003548 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003549 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003550 return;
3551 }
3552 in++;
3553 goto get_more;
3554 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003555 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003556 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003557 if ((ctxt->sax->ignorableWhitespace !=
3558 ctxt->sax->characters) &&
3559 (IS_BLANK(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003560 const xmlChar *tmp = ctxt->input->cur;
3561 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003562
Daniel Veillarda7374592001-05-10 14:17:55 +00003563 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003564 ctxt->sax->ignorableWhitespace(ctxt->userData,
3565 tmp, nbchar);
3566 } else if (ctxt->sax->characters != NULL)
3567 ctxt->sax->characters(ctxt->userData,
3568 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003569 line = ctxt->input->line;
3570 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003571 } else {
3572 if (ctxt->sax->characters != NULL)
3573 ctxt->sax->characters(ctxt->userData,
3574 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003575 line = ctxt->input->line;
3576 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003577 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003578 }
3579 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003580 if (*in == 0xD) {
3581 in++;
3582 if (*in == 0xA) {
3583 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003584 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003585 ctxt->input->line++;
3586 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003587 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003588 in--;
3589 }
3590 if (*in == '<') {
3591 return;
3592 }
3593 if (*in == '&') {
3594 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003595 }
3596 SHRINK;
3597 GROW;
3598 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003599 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003600 nbchar = 0;
3601 }
Daniel Veillard50582112001-03-26 22:52:16 +00003602 ctxt->input->line = line;
3603 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003604 xmlParseCharDataComplex(ctxt, cdata);
3605}
3606
Daniel Veillard01c13b52002-12-10 15:19:08 +00003607/**
3608 * xmlParseCharDataComplex:
3609 * @ctxt: an XML parser context
3610 * @cdata: int indicating whether we are within a CDATA section
3611 *
3612 * parse a CharData section.this is the fallback function
3613 * of xmlParseCharData() when the parsing requires handling
3614 * of non-ASCII characters.
3615 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003616void
3617xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003618 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3619 int nbchar = 0;
3620 int cur, l;
3621 int count = 0;
3622
3623 SHRINK;
3624 GROW;
3625 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003626 while ((cur != '<') && /* checked */
3627 (cur != '&') &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00003628 (xmlIsChar(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003629 if ((cur == ']') && (NXT(1) == ']') &&
3630 (NXT(2) == '>')) {
3631 if (cdata) break;
3632 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003633 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003634 }
3635 }
3636 COPY_BUF(l,buf,nbchar,cur);
3637 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003638 buf[nbchar] = 0;
3639
Owen Taylor3473f882001-02-23 17:55:21 +00003640 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003641 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003642 */
3643 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3644 if (areBlanks(ctxt, buf, nbchar)) {
3645 if (ctxt->sax->ignorableWhitespace != NULL)
3646 ctxt->sax->ignorableWhitespace(ctxt->userData,
3647 buf, nbchar);
3648 } else {
3649 if (ctxt->sax->characters != NULL)
3650 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3651 }
3652 }
3653 nbchar = 0;
3654 }
3655 count++;
3656 if (count > 50) {
3657 GROW;
3658 count = 0;
3659 }
3660 NEXTL(l);
3661 cur = CUR_CHAR(l);
3662 }
3663 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003664 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003665 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003666 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003667 */
3668 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3669 if (areBlanks(ctxt, buf, nbchar)) {
3670 if (ctxt->sax->ignorableWhitespace != NULL)
3671 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3672 } else {
3673 if (ctxt->sax->characters != NULL)
3674 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3675 }
3676 }
3677 }
3678}
3679
3680/**
3681 * xmlParseExternalID:
3682 * @ctxt: an XML parser context
3683 * @publicID: a xmlChar** receiving PubidLiteral
3684 * @strict: indicate whether we should restrict parsing to only
3685 * production [75], see NOTE below
3686 *
3687 * Parse an External ID or a Public ID
3688 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003689 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003690 * 'PUBLIC' S PubidLiteral S SystemLiteral
3691 *
3692 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3693 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3694 *
3695 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3696 *
3697 * Returns the function returns SystemLiteral and in the second
3698 * case publicID receives PubidLiteral, is strict is off
3699 * it is possible to return NULL and have publicID set.
3700 */
3701
3702xmlChar *
3703xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3704 xmlChar *URI = NULL;
3705
3706 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003707
3708 *publicID = NULL;
Daniel Veillard8f597c32003-10-06 08:19:27 +00003709 if (memcmp(CUR_PTR, "SYSTEM", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003710 SKIP(6);
3711 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003712 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3713 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003714 }
3715 SKIP_BLANKS;
3716 URI = xmlParseSystemLiteral(ctxt);
3717 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003718 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003719 }
Daniel Veillard8f597c32003-10-06 08:19:27 +00003720 } else if (memcmp(CUR_PTR, "PUBLIC", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003721 SKIP(6);
3722 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003723 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003724 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003725 }
3726 SKIP_BLANKS;
3727 *publicID = xmlParsePubidLiteral(ctxt);
3728 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003729 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003730 }
3731 if (strict) {
3732 /*
3733 * We don't handle [83] so "S SystemLiteral" is required.
3734 */
3735 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003736 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003737 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003738 }
3739 } else {
3740 /*
3741 * We handle [83] so we return immediately, if
3742 * "S SystemLiteral" is not detected. From a purely parsing
3743 * point of view that's a nice mess.
3744 */
3745 const xmlChar *ptr;
3746 GROW;
3747
3748 ptr = CUR_PTR;
3749 if (!IS_BLANK(*ptr)) return(NULL);
3750
3751 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3752 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3753 }
3754 SKIP_BLANKS;
3755 URI = xmlParseSystemLiteral(ctxt);
3756 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003757 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003758 }
3759 }
3760 return(URI);
3761}
3762
3763/**
3764 * xmlParseComment:
3765 * @ctxt: an XML parser context
3766 *
3767 * Skip an XML (SGML) comment <!-- .... -->
3768 * The spec says that "For compatibility, the string "--" (double-hyphen)
3769 * must not occur within comments. "
3770 *
3771 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3772 */
3773void
3774xmlParseComment(xmlParserCtxtPtr ctxt) {
3775 xmlChar *buf = NULL;
3776 int len;
3777 int size = XML_PARSER_BUFFER_SIZE;
3778 int q, ql;
3779 int r, rl;
3780 int cur, l;
3781 xmlParserInputState state;
3782 xmlParserInputPtr input = ctxt->input;
3783 int count = 0;
3784
3785 /*
3786 * Check that there is a comment right here.
3787 */
3788 if ((RAW != '<') || (NXT(1) != '!') ||
3789 (NXT(2) != '-') || (NXT(3) != '-')) return;
3790
3791 state = ctxt->instate;
3792 ctxt->instate = XML_PARSER_COMMENT;
3793 SHRINK;
3794 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003795 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003796 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003797 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003798 ctxt->instate = state;
3799 return;
3800 }
3801 q = CUR_CHAR(ql);
3802 NEXTL(ql);
3803 r = CUR_CHAR(rl);
3804 NEXTL(rl);
3805 cur = CUR_CHAR(l);
3806 len = 0;
Daniel Veillard73b013f2003-09-30 12:36:01 +00003807 while (xmlIsChar(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003808 ((cur != '>') ||
3809 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003810 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003811 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003812 }
3813 if (len + 5 >= size) {
3814 size *= 2;
3815 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3816 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003817 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003818 ctxt->instate = state;
3819 return;
3820 }
3821 }
3822 COPY_BUF(ql,buf,len,q);
3823 q = r;
3824 ql = rl;
3825 r = cur;
3826 rl = l;
3827
3828 count++;
3829 if (count > 50) {
3830 GROW;
3831 count = 0;
3832 }
3833 NEXTL(l);
3834 cur = CUR_CHAR(l);
3835 if (cur == 0) {
3836 SHRINK;
3837 GROW;
3838 cur = CUR_CHAR(l);
3839 }
3840 }
3841 buf[len] = 0;
Daniel Veillard73b013f2003-09-30 12:36:01 +00003842 if (!xmlIsChar(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003843 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003844 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003845 xmlFree(buf);
3846 } else {
3847 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003848 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3849 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003850 }
3851 NEXT;
3852 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3853 (!ctxt->disableSAX))
3854 ctxt->sax->comment(ctxt->userData, buf);
3855 xmlFree(buf);
3856 }
3857 ctxt->instate = state;
3858}
3859
3860/**
3861 * xmlParsePITarget:
3862 * @ctxt: an XML parser context
3863 *
3864 * parse the name of a PI
3865 *
3866 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3867 *
3868 * Returns the PITarget name or NULL
3869 */
3870
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003871const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003872xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003873 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003874
3875 name = xmlParseName(ctxt);
3876 if ((name != NULL) &&
3877 ((name[0] == 'x') || (name[0] == 'X')) &&
3878 ((name[1] == 'm') || (name[1] == 'M')) &&
3879 ((name[2] == 'l') || (name[2] == 'L'))) {
3880 int i;
3881 if ((name[0] == 'x') && (name[1] == 'm') &&
3882 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003883 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003884 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003885 return(name);
3886 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003887 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003888 return(name);
3889 }
3890 for (i = 0;;i++) {
3891 if (xmlW3CPIs[i] == NULL) break;
3892 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3893 return(name);
3894 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003895 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3896 "xmlParsePITarget: invalid name prefix 'xml'\n",
3897 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003898 }
3899 return(name);
3900}
3901
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003902#ifdef LIBXML_CATALOG_ENABLED
3903/**
3904 * xmlParseCatalogPI:
3905 * @ctxt: an XML parser context
3906 * @catalog: the PI value string
3907 *
3908 * parse an XML Catalog Processing Instruction.
3909 *
3910 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3911 *
3912 * Occurs only if allowed by the user and if happening in the Misc
3913 * part of the document before any doctype informations
3914 * This will add the given catalog to the parsing context in order
3915 * to be used if there is a resolution need further down in the document
3916 */
3917
3918static void
3919xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3920 xmlChar *URL = NULL;
3921 const xmlChar *tmp, *base;
3922 xmlChar marker;
3923
3924 tmp = catalog;
3925 while (IS_BLANK(*tmp)) tmp++;
3926 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3927 goto error;
3928 tmp += 7;
3929 while (IS_BLANK(*tmp)) tmp++;
3930 if (*tmp != '=') {
3931 return;
3932 }
3933 tmp++;
3934 while (IS_BLANK(*tmp)) tmp++;
3935 marker = *tmp;
3936 if ((marker != '\'') && (marker != '"'))
3937 goto error;
3938 tmp++;
3939 base = tmp;
3940 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3941 if (*tmp == 0)
3942 goto error;
3943 URL = xmlStrndup(base, tmp - base);
3944 tmp++;
3945 while (IS_BLANK(*tmp)) tmp++;
3946 if (*tmp != 0)
3947 goto error;
3948
3949 if (URL != NULL) {
3950 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3951 xmlFree(URL);
3952 }
3953 return;
3954
3955error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003956 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3957 "Catalog PI syntax error: %s\n",
3958 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003959 if (URL != NULL)
3960 xmlFree(URL);
3961}
3962#endif
3963
Owen Taylor3473f882001-02-23 17:55:21 +00003964/**
3965 * xmlParsePI:
3966 * @ctxt: an XML parser context
3967 *
3968 * parse an XML Processing Instruction.
3969 *
3970 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3971 *
3972 * The processing is transfered to SAX once parsed.
3973 */
3974
3975void
3976xmlParsePI(xmlParserCtxtPtr ctxt) {
3977 xmlChar *buf = NULL;
3978 int len = 0;
3979 int size = XML_PARSER_BUFFER_SIZE;
3980 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003981 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003982 xmlParserInputState state;
3983 int count = 0;
3984
3985 if ((RAW == '<') && (NXT(1) == '?')) {
3986 xmlParserInputPtr input = ctxt->input;
3987 state = ctxt->instate;
3988 ctxt->instate = XML_PARSER_PI;
3989 /*
3990 * this is a Processing Instruction.
3991 */
3992 SKIP(2);
3993 SHRINK;
3994
3995 /*
3996 * Parse the target name and check for special support like
3997 * namespace.
3998 */
3999 target = xmlParsePITarget(ctxt);
4000 if (target != NULL) {
4001 if ((RAW == '?') && (NXT(1) == '>')) {
4002 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004003 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4004 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004005 }
4006 SKIP(2);
4007
4008 /*
4009 * SAX: PI detected.
4010 */
4011 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4012 (ctxt->sax->processingInstruction != NULL))
4013 ctxt->sax->processingInstruction(ctxt->userData,
4014 target, NULL);
4015 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004016 return;
4017 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004018 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004019 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004020 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004021 ctxt->instate = state;
4022 return;
4023 }
4024 cur = CUR;
4025 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004026 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4027 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004028 }
4029 SKIP_BLANKS;
4030 cur = CUR_CHAR(l);
Daniel Veillard73b013f2003-09-30 12:36:01 +00004031 while (xmlIsChar(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004032 ((cur != '?') || (NXT(1) != '>'))) {
4033 if (len + 5 >= size) {
4034 size *= 2;
4035 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4036 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004037 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004038 ctxt->instate = state;
4039 return;
4040 }
4041 }
4042 count++;
4043 if (count > 50) {
4044 GROW;
4045 count = 0;
4046 }
4047 COPY_BUF(l,buf,len,cur);
4048 NEXTL(l);
4049 cur = CUR_CHAR(l);
4050 if (cur == 0) {
4051 SHRINK;
4052 GROW;
4053 cur = CUR_CHAR(l);
4054 }
4055 }
4056 buf[len] = 0;
4057 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004058 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4059 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004060 } else {
4061 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004062 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4063 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004064 }
4065 SKIP(2);
4066
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004067#ifdef LIBXML_CATALOG_ENABLED
4068 if (((state == XML_PARSER_MISC) ||
4069 (state == XML_PARSER_START)) &&
4070 (xmlStrEqual(target, XML_CATALOG_PI))) {
4071 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4072 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4073 (allow == XML_CATA_ALLOW_ALL))
4074 xmlParseCatalogPI(ctxt, buf);
4075 }
4076#endif
4077
4078
Owen Taylor3473f882001-02-23 17:55:21 +00004079 /*
4080 * SAX: PI detected.
4081 */
4082 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4083 (ctxt->sax->processingInstruction != NULL))
4084 ctxt->sax->processingInstruction(ctxt->userData,
4085 target, buf);
4086 }
4087 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004088 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004089 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004090 }
4091 ctxt->instate = state;
4092 }
4093}
4094
4095/**
4096 * xmlParseNotationDecl:
4097 * @ctxt: an XML parser context
4098 *
4099 * parse a notation declaration
4100 *
4101 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4102 *
4103 * Hence there is actually 3 choices:
4104 * 'PUBLIC' S PubidLiteral
4105 * 'PUBLIC' S PubidLiteral S SystemLiteral
4106 * and 'SYSTEM' S SystemLiteral
4107 *
4108 * See the NOTE on xmlParseExternalID().
4109 */
4110
4111void
4112xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004113 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004114 xmlChar *Pubid;
4115 xmlChar *Systemid;
4116
Daniel Veillard8f597c32003-10-06 08:19:27 +00004117 if (memcmp(CUR_PTR, "<!NOTATION", 10) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004118 xmlParserInputPtr input = ctxt->input;
4119 SHRINK;
4120 SKIP(10);
4121 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004122 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4123 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004124 return;
4125 }
4126 SKIP_BLANKS;
4127
Daniel Veillard76d66f42001-05-16 21:05:17 +00004128 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004129 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004130 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004131 return;
4132 }
4133 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004134 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004135 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004136 return;
4137 }
4138 SKIP_BLANKS;
4139
4140 /*
4141 * Parse the IDs.
4142 */
4143 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4144 SKIP_BLANKS;
4145
4146 if (RAW == '>') {
4147 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004148 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4149 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004150 }
4151 NEXT;
4152 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4153 (ctxt->sax->notationDecl != NULL))
4154 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4155 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004156 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004157 }
Owen Taylor3473f882001-02-23 17:55:21 +00004158 if (Systemid != NULL) xmlFree(Systemid);
4159 if (Pubid != NULL) xmlFree(Pubid);
4160 }
4161}
4162
4163/**
4164 * xmlParseEntityDecl:
4165 * @ctxt: an XML parser context
4166 *
4167 * parse <!ENTITY declarations
4168 *
4169 * [70] EntityDecl ::= GEDecl | PEDecl
4170 *
4171 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4172 *
4173 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4174 *
4175 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4176 *
4177 * [74] PEDef ::= EntityValue | ExternalID
4178 *
4179 * [76] NDataDecl ::= S 'NDATA' S Name
4180 *
4181 * [ VC: Notation Declared ]
4182 * The Name must match the declared name of a notation.
4183 */
4184
4185void
4186xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004187 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004188 xmlChar *value = NULL;
4189 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004190 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004191 int isParameter = 0;
4192 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004193 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004194
4195 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004196 if (memcmp(CUR_PTR, "<!ENTITY", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004197 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004198 SHRINK;
4199 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004200 skipped = SKIP_BLANKS;
4201 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004202 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4203 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004204 }
Owen Taylor3473f882001-02-23 17:55:21 +00004205
4206 if (RAW == '%') {
4207 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004208 skipped = SKIP_BLANKS;
4209 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004210 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4211 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004212 }
Owen Taylor3473f882001-02-23 17:55:21 +00004213 isParameter = 1;
4214 }
4215
Daniel Veillard76d66f42001-05-16 21:05:17 +00004216 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004217 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004218 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4219 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004220 return;
4221 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004222 skipped = SKIP_BLANKS;
4223 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004224 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4225 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004226 }
Owen Taylor3473f882001-02-23 17:55:21 +00004227
Daniel Veillardf5582f12002-06-11 10:08:16 +00004228 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004229 /*
4230 * handle the various case of definitions...
4231 */
4232 if (isParameter) {
4233 if ((RAW == '"') || (RAW == '\'')) {
4234 value = xmlParseEntityValue(ctxt, &orig);
4235 if (value) {
4236 if ((ctxt->sax != NULL) &&
4237 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4238 ctxt->sax->entityDecl(ctxt->userData, name,
4239 XML_INTERNAL_PARAMETER_ENTITY,
4240 NULL, NULL, value);
4241 }
4242 } else {
4243 URI = xmlParseExternalID(ctxt, &literal, 1);
4244 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004245 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 }
4247 if (URI) {
4248 xmlURIPtr uri;
4249
4250 uri = xmlParseURI((const char *) URI);
4251 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004252 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4253 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004254 /*
4255 * This really ought to be a well formedness error
4256 * but the XML Core WG decided otherwise c.f. issue
4257 * E26 of the XML erratas.
4258 */
Owen Taylor3473f882001-02-23 17:55:21 +00004259 } else {
4260 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004261 /*
4262 * Okay this is foolish to block those but not
4263 * invalid URIs.
4264 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004265 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004266 } else {
4267 if ((ctxt->sax != NULL) &&
4268 (!ctxt->disableSAX) &&
4269 (ctxt->sax->entityDecl != NULL))
4270 ctxt->sax->entityDecl(ctxt->userData, name,
4271 XML_EXTERNAL_PARAMETER_ENTITY,
4272 literal, URI, NULL);
4273 }
4274 xmlFreeURI(uri);
4275 }
4276 }
4277 }
4278 } else {
4279 if ((RAW == '"') || (RAW == '\'')) {
4280 value = xmlParseEntityValue(ctxt, &orig);
4281 if ((ctxt->sax != NULL) &&
4282 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4283 ctxt->sax->entityDecl(ctxt->userData, name,
4284 XML_INTERNAL_GENERAL_ENTITY,
4285 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004286 /*
4287 * For expat compatibility in SAX mode.
4288 */
4289 if ((ctxt->myDoc == NULL) ||
4290 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4291 if (ctxt->myDoc == NULL) {
4292 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4293 }
4294 if (ctxt->myDoc->intSubset == NULL)
4295 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4296 BAD_CAST "fake", NULL, NULL);
4297
Daniel Veillard1af9a412003-08-20 22:54:39 +00004298 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4299 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004300 }
Owen Taylor3473f882001-02-23 17:55:21 +00004301 } else {
4302 URI = xmlParseExternalID(ctxt, &literal, 1);
4303 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004304 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004305 }
4306 if (URI) {
4307 xmlURIPtr uri;
4308
4309 uri = xmlParseURI((const char *)URI);
4310 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004311 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4312 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004313 /*
4314 * This really ought to be a well formedness error
4315 * but the XML Core WG decided otherwise c.f. issue
4316 * E26 of the XML erratas.
4317 */
Owen Taylor3473f882001-02-23 17:55:21 +00004318 } else {
4319 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004320 /*
4321 * Okay this is foolish to block those but not
4322 * invalid URIs.
4323 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004324 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 }
4326 xmlFreeURI(uri);
4327 }
4328 }
4329 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004330 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4331 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004332 }
4333 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004334 if (memcmp(CUR_PTR, "NDATA", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004335 SKIP(5);
4336 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004337 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4338 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004339 }
4340 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004341 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004342 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4343 (ctxt->sax->unparsedEntityDecl != NULL))
4344 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4345 literal, URI, ndata);
4346 } else {
4347 if ((ctxt->sax != NULL) &&
4348 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4349 ctxt->sax->entityDecl(ctxt->userData, name,
4350 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4351 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004352 /*
4353 * For expat compatibility in SAX mode.
4354 * assuming the entity repalcement was asked for
4355 */
4356 if ((ctxt->replaceEntities != 0) &&
4357 ((ctxt->myDoc == NULL) ||
4358 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4359 if (ctxt->myDoc == NULL) {
4360 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4361 }
4362
4363 if (ctxt->myDoc->intSubset == NULL)
4364 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4365 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004366 xmlSAX2EntityDecl(ctxt, name,
4367 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4368 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004369 }
Owen Taylor3473f882001-02-23 17:55:21 +00004370 }
4371 }
4372 }
4373 SKIP_BLANKS;
4374 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004375 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004376 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004377 } else {
4378 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004379 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4380 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004381 }
4382 NEXT;
4383 }
4384 if (orig != NULL) {
4385 /*
4386 * Ugly mechanism to save the raw entity value.
4387 */
4388 xmlEntityPtr cur = NULL;
4389
4390 if (isParameter) {
4391 if ((ctxt->sax != NULL) &&
4392 (ctxt->sax->getParameterEntity != NULL))
4393 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4394 } else {
4395 if ((ctxt->sax != NULL) &&
4396 (ctxt->sax->getEntity != NULL))
4397 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004398 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004399 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004400 }
Owen Taylor3473f882001-02-23 17:55:21 +00004401 }
4402 if (cur != NULL) {
4403 if (cur->orig != NULL)
4404 xmlFree(orig);
4405 else
4406 cur->orig = orig;
4407 } else
4408 xmlFree(orig);
4409 }
Owen Taylor3473f882001-02-23 17:55:21 +00004410 if (value != NULL) xmlFree(value);
4411 if (URI != NULL) xmlFree(URI);
4412 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004413 }
4414}
4415
4416/**
4417 * xmlParseDefaultDecl:
4418 * @ctxt: an XML parser context
4419 * @value: Receive a possible fixed default value for the attribute
4420 *
4421 * Parse an attribute default declaration
4422 *
4423 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4424 *
4425 * [ VC: Required Attribute ]
4426 * if the default declaration is the keyword #REQUIRED, then the
4427 * attribute must be specified for all elements of the type in the
4428 * attribute-list declaration.
4429 *
4430 * [ VC: Attribute Default Legal ]
4431 * The declared default value must meet the lexical constraints of
4432 * the declared attribute type c.f. xmlValidateAttributeDecl()
4433 *
4434 * [ VC: Fixed Attribute Default ]
4435 * if an attribute has a default value declared with the #FIXED
4436 * keyword, instances of that attribute must match the default value.
4437 *
4438 * [ WFC: No < in Attribute Values ]
4439 * handled in xmlParseAttValue()
4440 *
4441 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4442 * or XML_ATTRIBUTE_FIXED.
4443 */
4444
4445int
4446xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4447 int val;
4448 xmlChar *ret;
4449
4450 *value = NULL;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004451 if (memcmp(CUR_PTR, "#REQUIRED", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004452 SKIP(9);
4453 return(XML_ATTRIBUTE_REQUIRED);
4454 }
Daniel Veillard8f597c32003-10-06 08:19:27 +00004455 if (memcmp(CUR_PTR, "#IMPLIED", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004456 SKIP(8);
4457 return(XML_ATTRIBUTE_IMPLIED);
4458 }
4459 val = XML_ATTRIBUTE_NONE;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004460 if (memcmp(CUR_PTR, "#FIXED", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004461 SKIP(6);
4462 val = XML_ATTRIBUTE_FIXED;
4463 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004464 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4465 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004466 }
4467 SKIP_BLANKS;
4468 }
4469 ret = xmlParseAttValue(ctxt);
4470 ctxt->instate = XML_PARSER_DTD;
4471 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004472 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004473 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004474 } else
4475 *value = ret;
4476 return(val);
4477}
4478
4479/**
4480 * xmlParseNotationType:
4481 * @ctxt: an XML parser context
4482 *
4483 * parse an Notation attribute type.
4484 *
4485 * Note: the leading 'NOTATION' S part has already being parsed...
4486 *
4487 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4488 *
4489 * [ VC: Notation Attributes ]
4490 * Values of this type must match one of the notation names included
4491 * in the declaration; all notation names in the declaration must be declared.
4492 *
4493 * Returns: the notation attribute tree built while parsing
4494 */
4495
4496xmlEnumerationPtr
4497xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004498 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004499 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4500
4501 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004502 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004503 return(NULL);
4504 }
4505 SHRINK;
4506 do {
4507 NEXT;
4508 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004509 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004510 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004511 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4512 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004513 return(ret);
4514 }
4515 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004516 if (cur == NULL) return(ret);
4517 if (last == NULL) ret = last = cur;
4518 else {
4519 last->next = cur;
4520 last = cur;
4521 }
4522 SKIP_BLANKS;
4523 } while (RAW == '|');
4524 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004525 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004526 if ((last != NULL) && (last != ret))
4527 xmlFreeEnumeration(last);
4528 return(ret);
4529 }
4530 NEXT;
4531 return(ret);
4532}
4533
4534/**
4535 * xmlParseEnumerationType:
4536 * @ctxt: an XML parser context
4537 *
4538 * parse an Enumeration attribute type.
4539 *
4540 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4541 *
4542 * [ VC: Enumeration ]
4543 * Values of this type must match one of the Nmtoken tokens in
4544 * the declaration
4545 *
4546 * Returns: the enumeration attribute tree built while parsing
4547 */
4548
4549xmlEnumerationPtr
4550xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4551 xmlChar *name;
4552 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4553
4554 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004555 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004556 return(NULL);
4557 }
4558 SHRINK;
4559 do {
4560 NEXT;
4561 SKIP_BLANKS;
4562 name = xmlParseNmtoken(ctxt);
4563 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004564 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004565 return(ret);
4566 }
4567 cur = xmlCreateEnumeration(name);
4568 xmlFree(name);
4569 if (cur == NULL) return(ret);
4570 if (last == NULL) ret = last = cur;
4571 else {
4572 last->next = cur;
4573 last = cur;
4574 }
4575 SKIP_BLANKS;
4576 } while (RAW == '|');
4577 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004578 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004579 return(ret);
4580 }
4581 NEXT;
4582 return(ret);
4583}
4584
4585/**
4586 * xmlParseEnumeratedType:
4587 * @ctxt: an XML parser context
4588 * @tree: the enumeration tree built while parsing
4589 *
4590 * parse an Enumerated attribute type.
4591 *
4592 * [57] EnumeratedType ::= NotationType | Enumeration
4593 *
4594 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4595 *
4596 *
4597 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4598 */
4599
4600int
4601xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00004602 if (memcmp(CUR_PTR, "NOTATION", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004603 SKIP(8);
4604 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004605 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4606 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004607 return(0);
4608 }
4609 SKIP_BLANKS;
4610 *tree = xmlParseNotationType(ctxt);
4611 if (*tree == NULL) return(0);
4612 return(XML_ATTRIBUTE_NOTATION);
4613 }
4614 *tree = xmlParseEnumerationType(ctxt);
4615 if (*tree == NULL) return(0);
4616 return(XML_ATTRIBUTE_ENUMERATION);
4617}
4618
4619/**
4620 * xmlParseAttributeType:
4621 * @ctxt: an XML parser context
4622 * @tree: the enumeration tree built while parsing
4623 *
4624 * parse the Attribute list def for an element
4625 *
4626 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4627 *
4628 * [55] StringType ::= 'CDATA'
4629 *
4630 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4631 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4632 *
4633 * Validity constraints for attribute values syntax are checked in
4634 * xmlValidateAttributeValue()
4635 *
4636 * [ VC: ID ]
4637 * Values of type ID must match the Name production. A name must not
4638 * appear more than once in an XML document as a value of this type;
4639 * i.e., ID values must uniquely identify the elements which bear them.
4640 *
4641 * [ VC: One ID per Element Type ]
4642 * No element type may have more than one ID attribute specified.
4643 *
4644 * [ VC: ID Attribute Default ]
4645 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4646 *
4647 * [ VC: IDREF ]
4648 * Values of type IDREF must match the Name production, and values
4649 * of type IDREFS must match Names; each IDREF Name must match the value
4650 * of an ID attribute on some element in the XML document; i.e. IDREF
4651 * values must match the value of some ID attribute.
4652 *
4653 * [ VC: Entity Name ]
4654 * Values of type ENTITY must match the Name production, values
4655 * of type ENTITIES must match Names; each Entity Name must match the
4656 * name of an unparsed entity declared in the DTD.
4657 *
4658 * [ VC: Name Token ]
4659 * Values of type NMTOKEN must match the Nmtoken production; values
4660 * of type NMTOKENS must match Nmtokens.
4661 *
4662 * Returns the attribute type
4663 */
4664int
4665xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4666 SHRINK;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004667 if (memcmp(CUR_PTR, "CDATA", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004668 SKIP(5);
4669 return(XML_ATTRIBUTE_CDATA);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004670 } else if (memcmp(CUR_PTR, "IDREFS", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004671 SKIP(6);
4672 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004673 } else if (memcmp(CUR_PTR, "IDREF", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004674 SKIP(5);
4675 return(XML_ATTRIBUTE_IDREF);
4676 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4677 SKIP(2);
4678 return(XML_ATTRIBUTE_ID);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004679 } else if (memcmp(CUR_PTR, "ENTITY", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004680 SKIP(6);
4681 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004682 } else if (memcmp(CUR_PTR, "ENTITIES", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004683 SKIP(8);
4684 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004685 } else if (memcmp(CUR_PTR, "NMTOKENS", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004686 SKIP(8);
4687 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004688 } else if (memcmp(CUR_PTR, "NMTOKEN", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004689 SKIP(7);
4690 return(XML_ATTRIBUTE_NMTOKEN);
4691 }
4692 return(xmlParseEnumeratedType(ctxt, tree));
4693}
4694
4695/**
4696 * xmlParseAttributeListDecl:
4697 * @ctxt: an XML parser context
4698 *
4699 * : parse the Attribute list def for an element
4700 *
4701 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4702 *
4703 * [53] AttDef ::= S Name S AttType S DefaultDecl
4704 *
4705 */
4706void
4707xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004708 const xmlChar *elemName;
4709 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004710 xmlEnumerationPtr tree;
4711
Daniel Veillard8f597c32003-10-06 08:19:27 +00004712 if (memcmp(CUR_PTR, "<!ATTLIST", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004713 xmlParserInputPtr input = ctxt->input;
4714
4715 SKIP(9);
4716 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004717 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004718 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004719 }
4720 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004721 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004722 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004723 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4724 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004725 return;
4726 }
4727 SKIP_BLANKS;
4728 GROW;
4729 while (RAW != '>') {
4730 const xmlChar *check = CUR_PTR;
4731 int type;
4732 int def;
4733 xmlChar *defaultValue = NULL;
4734
4735 GROW;
4736 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004737 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004738 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004739 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4740 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004741 break;
4742 }
4743 GROW;
4744 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004745 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004746 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004747 if (defaultValue != NULL)
4748 xmlFree(defaultValue);
4749 break;
4750 }
4751 SKIP_BLANKS;
4752
4753 type = xmlParseAttributeType(ctxt, &tree);
4754 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004755 if (defaultValue != NULL)
4756 xmlFree(defaultValue);
4757 break;
4758 }
4759
4760 GROW;
4761 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004762 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4763 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004764 if (defaultValue != NULL)
4765 xmlFree(defaultValue);
4766 if (tree != NULL)
4767 xmlFreeEnumeration(tree);
4768 break;
4769 }
4770 SKIP_BLANKS;
4771
4772 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4773 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004774 if (defaultValue != NULL)
4775 xmlFree(defaultValue);
4776 if (tree != NULL)
4777 xmlFreeEnumeration(tree);
4778 break;
4779 }
4780
4781 GROW;
4782 if (RAW != '>') {
4783 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004784 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004785 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004786 if (defaultValue != NULL)
4787 xmlFree(defaultValue);
4788 if (tree != NULL)
4789 xmlFreeEnumeration(tree);
4790 break;
4791 }
4792 SKIP_BLANKS;
4793 }
4794 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004795 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4796 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004797 if (defaultValue != NULL)
4798 xmlFree(defaultValue);
4799 if (tree != NULL)
4800 xmlFreeEnumeration(tree);
4801 break;
4802 }
4803 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4804 (ctxt->sax->attributeDecl != NULL))
4805 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4806 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004807 else if (tree != NULL)
4808 xmlFreeEnumeration(tree);
4809
4810 if ((ctxt->sax2) && (defaultValue != NULL) &&
4811 (def != XML_ATTRIBUTE_IMPLIED) &&
4812 (def != XML_ATTRIBUTE_REQUIRED)) {
4813 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4814 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004815 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4816 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4817 }
Owen Taylor3473f882001-02-23 17:55:21 +00004818 if (defaultValue != NULL)
4819 xmlFree(defaultValue);
4820 GROW;
4821 }
4822 if (RAW == '>') {
4823 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004824 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4825 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004826 }
4827 NEXT;
4828 }
Owen Taylor3473f882001-02-23 17:55:21 +00004829 }
4830}
4831
4832/**
4833 * xmlParseElementMixedContentDecl:
4834 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004835 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004836 *
4837 * parse the declaration for a Mixed Element content
4838 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4839 *
4840 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4841 * '(' S? '#PCDATA' S? ')'
4842 *
4843 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4844 *
4845 * [ VC: No Duplicate Types ]
4846 * The same name must not appear more than once in a single
4847 * mixed-content declaration.
4848 *
4849 * returns: the list of the xmlElementContentPtr describing the element choices
4850 */
4851xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004852xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004853 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004854 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004855
4856 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004857 if (memcmp(CUR_PTR, "#PCDATA", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004858 SKIP(7);
4859 SKIP_BLANKS;
4860 SHRINK;
4861 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004862 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004863 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4864"Element content declaration doesn't start and stop in the same entity\n",
4865 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004866 }
Owen Taylor3473f882001-02-23 17:55:21 +00004867 NEXT;
4868 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4869 if (RAW == '*') {
4870 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4871 NEXT;
4872 }
4873 return(ret);
4874 }
4875 if ((RAW == '(') || (RAW == '|')) {
4876 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4877 if (ret == NULL) return(NULL);
4878 }
4879 while (RAW == '|') {
4880 NEXT;
4881 if (elem == NULL) {
4882 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4883 if (ret == NULL) return(NULL);
4884 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004885 if (cur != NULL)
4886 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004887 cur = ret;
4888 } else {
4889 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4890 if (n == NULL) return(NULL);
4891 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004892 if (n->c1 != NULL)
4893 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004894 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004895 if (n != NULL)
4896 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004897 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004898 }
4899 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004900 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004901 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004902 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004903 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004904 xmlFreeElementContent(cur);
4905 return(NULL);
4906 }
4907 SKIP_BLANKS;
4908 GROW;
4909 }
4910 if ((RAW == ')') && (NXT(1) == '*')) {
4911 if (elem != NULL) {
4912 cur->c2 = xmlNewElementContent(elem,
4913 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004914 if (cur->c2 != NULL)
4915 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004916 }
4917 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004918 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004919 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4920"Element content declaration doesn't start and stop in the same entity\n",
4921 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004922 }
Owen Taylor3473f882001-02-23 17:55:21 +00004923 SKIP(2);
4924 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004925 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004926 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004927 return(NULL);
4928 }
4929
4930 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004931 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004932 }
4933 return(ret);
4934}
4935
4936/**
4937 * xmlParseElementChildrenContentDecl:
4938 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004939 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004940 *
4941 * parse the declaration for a Mixed Element content
4942 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4943 *
4944 *
4945 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4946 *
4947 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4948 *
4949 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4950 *
4951 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4952 *
4953 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4954 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004955 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004956 * opening or closing parentheses in a choice, seq, or Mixed
4957 * construct is contained in the replacement text for a parameter
4958 * entity, both must be contained in the same replacement text. For
4959 * interoperability, if a parameter-entity reference appears in a
4960 * choice, seq, or Mixed construct, its replacement text should not
4961 * be empty, and neither the first nor last non-blank character of
4962 * the replacement text should be a connector (| or ,).
4963 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004964 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004965 * hierarchy.
4966 */
4967xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004968xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004969 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004970 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004971 xmlChar type = 0;
4972
4973 SKIP_BLANKS;
4974 GROW;
4975 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004976 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004977
Owen Taylor3473f882001-02-23 17:55:21 +00004978 /* Recurse on first child */
4979 NEXT;
4980 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004981 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004982 SKIP_BLANKS;
4983 GROW;
4984 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004985 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004986 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004987 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004988 return(NULL);
4989 }
4990 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004991 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004992 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004993 return(NULL);
4994 }
Owen Taylor3473f882001-02-23 17:55:21 +00004995 GROW;
4996 if (RAW == '?') {
4997 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4998 NEXT;
4999 } else if (RAW == '*') {
5000 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5001 NEXT;
5002 } else if (RAW == '+') {
5003 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5004 NEXT;
5005 } else {
5006 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5007 }
Owen Taylor3473f882001-02-23 17:55:21 +00005008 GROW;
5009 }
5010 SKIP_BLANKS;
5011 SHRINK;
5012 while (RAW != ')') {
5013 /*
5014 * Each loop we parse one separator and one element.
5015 */
5016 if (RAW == ',') {
5017 if (type == 0) type = CUR;
5018
5019 /*
5020 * Detect "Name | Name , Name" error
5021 */
5022 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005023 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005024 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005025 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005026 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005027 xmlFreeElementContent(last);
5028 if (ret != NULL)
5029 xmlFreeElementContent(ret);
5030 return(NULL);
5031 }
5032 NEXT;
5033
5034 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5035 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005036 if ((last != NULL) && (last != ret))
5037 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00005038 xmlFreeElementContent(ret);
5039 return(NULL);
5040 }
5041 if (last == NULL) {
5042 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005043 if (ret != NULL)
5044 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005045 ret = cur = op;
5046 } else {
5047 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005048 if (op != NULL)
5049 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005050 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005051 if (last != NULL)
5052 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005053 cur =op;
5054 last = NULL;
5055 }
5056 } else if (RAW == '|') {
5057 if (type == 0) type = CUR;
5058
5059 /*
5060 * Detect "Name , Name | Name" error
5061 */
5062 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005063 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005064 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005065 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005066 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005067 xmlFreeElementContent(last);
5068 if (ret != NULL)
5069 xmlFreeElementContent(ret);
5070 return(NULL);
5071 }
5072 NEXT;
5073
5074 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5075 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005076 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005077 xmlFreeElementContent(last);
5078 if (ret != NULL)
5079 xmlFreeElementContent(ret);
5080 return(NULL);
5081 }
5082 if (last == NULL) {
5083 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005084 if (ret != NULL)
5085 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005086 ret = cur = op;
5087 } else {
5088 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005089 if (op != NULL)
5090 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005091 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005092 if (last != NULL)
5093 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005094 cur =op;
5095 last = NULL;
5096 }
5097 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005098 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005099 if (ret != NULL)
5100 xmlFreeElementContent(ret);
5101 return(NULL);
5102 }
5103 GROW;
5104 SKIP_BLANKS;
5105 GROW;
5106 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005107 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005108 /* Recurse on second child */
5109 NEXT;
5110 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005111 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005112 SKIP_BLANKS;
5113 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005114 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005115 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005116 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005117 if (ret != NULL)
5118 xmlFreeElementContent(ret);
5119 return(NULL);
5120 }
5121 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005122 if (RAW == '?') {
5123 last->ocur = XML_ELEMENT_CONTENT_OPT;
5124 NEXT;
5125 } else if (RAW == '*') {
5126 last->ocur = XML_ELEMENT_CONTENT_MULT;
5127 NEXT;
5128 } else if (RAW == '+') {
5129 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5130 NEXT;
5131 } else {
5132 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5133 }
5134 }
5135 SKIP_BLANKS;
5136 GROW;
5137 }
5138 if ((cur != NULL) && (last != NULL)) {
5139 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005140 if (last != NULL)
5141 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005142 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005143 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005144 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5145"Element content declaration doesn't start and stop in the same entity\n",
5146 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005147 }
Owen Taylor3473f882001-02-23 17:55:21 +00005148 NEXT;
5149 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005150 if (ret != NULL)
5151 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005152 NEXT;
5153 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005154 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005155 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005156 cur = ret;
5157 /*
5158 * Some normalization:
5159 * (a | b* | c?)* == (a | b | c)*
5160 */
5161 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5162 if ((cur->c1 != NULL) &&
5163 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5164 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5165 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5166 if ((cur->c2 != NULL) &&
5167 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5168 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5169 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5170 cur = cur->c2;
5171 }
5172 }
Owen Taylor3473f882001-02-23 17:55:21 +00005173 NEXT;
5174 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005175 if (ret != NULL) {
5176 int found = 0;
5177
Daniel Veillarde470df72001-04-18 21:41:07 +00005178 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005179 /*
5180 * Some normalization:
5181 * (a | b*)+ == (a | b)*
5182 * (a | b?)+ == (a | b)*
5183 */
5184 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5185 if ((cur->c1 != NULL) &&
5186 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5187 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5188 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5189 found = 1;
5190 }
5191 if ((cur->c2 != NULL) &&
5192 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5193 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5194 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5195 found = 1;
5196 }
5197 cur = cur->c2;
5198 }
5199 if (found)
5200 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5201 }
Owen Taylor3473f882001-02-23 17:55:21 +00005202 NEXT;
5203 }
5204 return(ret);
5205}
5206
5207/**
5208 * xmlParseElementContentDecl:
5209 * @ctxt: an XML parser context
5210 * @name: the name of the element being defined.
5211 * @result: the Element Content pointer will be stored here if any
5212 *
5213 * parse the declaration for an Element content either Mixed or Children,
5214 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5215 *
5216 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5217 *
5218 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5219 */
5220
5221int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005222xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005223 xmlElementContentPtr *result) {
5224
5225 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005226 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005227 int res;
5228
5229 *result = NULL;
5230
5231 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005232 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005233 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005234 return(-1);
5235 }
5236 NEXT;
5237 GROW;
5238 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005239 if (memcmp(CUR_PTR, "#PCDATA", 7) == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005240 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005241 res = XML_ELEMENT_TYPE_MIXED;
5242 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005243 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005244 res = XML_ELEMENT_TYPE_ELEMENT;
5245 }
Owen Taylor3473f882001-02-23 17:55:21 +00005246 SKIP_BLANKS;
5247 *result = tree;
5248 return(res);
5249}
5250
5251/**
5252 * xmlParseElementDecl:
5253 * @ctxt: an XML parser context
5254 *
5255 * parse an Element declaration.
5256 *
5257 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5258 *
5259 * [ VC: Unique Element Type Declaration ]
5260 * No element type may be declared more than once
5261 *
5262 * Returns the type of the element, or -1 in case of error
5263 */
5264int
5265xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005266 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005267 int ret = -1;
5268 xmlElementContentPtr content = NULL;
5269
5270 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005271 if (memcmp(CUR_PTR, "<!ELEMENT", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005272 xmlParserInputPtr input = ctxt->input;
5273
5274 SKIP(9);
5275 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005276 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5277 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005278 }
5279 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005280 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005281 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005282 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5283 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005284 return(-1);
5285 }
5286 while ((RAW == 0) && (ctxt->inputNr > 1))
5287 xmlPopInput(ctxt);
5288 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005289 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5290 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005291 }
5292 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005293 if (memcmp(CUR_PTR, "EMPTY", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005294 SKIP(5);
5295 /*
5296 * Element must always be empty.
5297 */
5298 ret = XML_ELEMENT_TYPE_EMPTY;
5299 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5300 (NXT(2) == 'Y')) {
5301 SKIP(3);
5302 /*
5303 * Element is a generic container.
5304 */
5305 ret = XML_ELEMENT_TYPE_ANY;
5306 } else if (RAW == '(') {
5307 ret = xmlParseElementContentDecl(ctxt, name, &content);
5308 } else {
5309 /*
5310 * [ WFC: PEs in Internal Subset ] error handling.
5311 */
5312 if ((RAW == '%') && (ctxt->external == 0) &&
5313 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005314 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005315 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005316 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005317 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005318 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5319 }
Owen Taylor3473f882001-02-23 17:55:21 +00005320 return(-1);
5321 }
5322
5323 SKIP_BLANKS;
5324 /*
5325 * Pop-up of finished entities.
5326 */
5327 while ((RAW == 0) && (ctxt->inputNr > 1))
5328 xmlPopInput(ctxt);
5329 SKIP_BLANKS;
5330
5331 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005332 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005333 } else {
5334 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005335 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5336 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005337 }
5338
5339 NEXT;
5340 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5341 (ctxt->sax->elementDecl != NULL))
5342 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5343 content);
5344 }
5345 if (content != NULL) {
5346 xmlFreeElementContent(content);
5347 }
Owen Taylor3473f882001-02-23 17:55:21 +00005348 }
5349 return(ret);
5350}
5351
5352/**
Owen Taylor3473f882001-02-23 17:55:21 +00005353 * xmlParseConditionalSections
5354 * @ctxt: an XML parser context
5355 *
5356 * [61] conditionalSect ::= includeSect | ignoreSect
5357 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5358 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5359 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5360 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5361 */
5362
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005363static void
Owen Taylor3473f882001-02-23 17:55:21 +00005364xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5365 SKIP(3);
5366 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005367 if (memcmp(CUR_PTR, "INCLUDE", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005368 SKIP(7);
5369 SKIP_BLANKS;
5370 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005371 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005372 } else {
5373 NEXT;
5374 }
5375 if (xmlParserDebugEntities) {
5376 if ((ctxt->input != NULL) && (ctxt->input->filename))
5377 xmlGenericError(xmlGenericErrorContext,
5378 "%s(%d): ", ctxt->input->filename,
5379 ctxt->input->line);
5380 xmlGenericError(xmlGenericErrorContext,
5381 "Entering INCLUDE Conditional Section\n");
5382 }
5383
5384 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5385 (NXT(2) != '>'))) {
5386 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005387 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005388
5389 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5390 xmlParseConditionalSections(ctxt);
5391 } else if (IS_BLANK(CUR)) {
5392 NEXT;
5393 } else if (RAW == '%') {
5394 xmlParsePEReference(ctxt);
5395 } else
5396 xmlParseMarkupDecl(ctxt);
5397
5398 /*
5399 * Pop-up of finished entities.
5400 */
5401 while ((RAW == 0) && (ctxt->inputNr > 1))
5402 xmlPopInput(ctxt);
5403
Daniel Veillardfdc91562002-07-01 21:52:03 +00005404 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005405 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005406 break;
5407 }
5408 }
5409 if (xmlParserDebugEntities) {
5410 if ((ctxt->input != NULL) && (ctxt->input->filename))
5411 xmlGenericError(xmlGenericErrorContext,
5412 "%s(%d): ", ctxt->input->filename,
5413 ctxt->input->line);
5414 xmlGenericError(xmlGenericErrorContext,
5415 "Leaving INCLUDE Conditional Section\n");
5416 }
5417
Daniel Veillard8f597c32003-10-06 08:19:27 +00005418 } else if (memcmp(CUR_PTR, "IGNORE", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005419 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005420 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005421 int depth = 0;
5422
5423 SKIP(6);
5424 SKIP_BLANKS;
5425 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005426 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005427 } else {
5428 NEXT;
5429 }
5430 if (xmlParserDebugEntities) {
5431 if ((ctxt->input != NULL) && (ctxt->input->filename))
5432 xmlGenericError(xmlGenericErrorContext,
5433 "%s(%d): ", ctxt->input->filename,
5434 ctxt->input->line);
5435 xmlGenericError(xmlGenericErrorContext,
5436 "Entering IGNORE Conditional Section\n");
5437 }
5438
5439 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005440 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005441 * But disable SAX event generating DTD building in the meantime
5442 */
5443 state = ctxt->disableSAX;
5444 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005445 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005446 ctxt->instate = XML_PARSER_IGNORE;
5447
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005448 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005449 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5450 depth++;
5451 SKIP(3);
5452 continue;
5453 }
5454 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5455 if (--depth >= 0) SKIP(3);
5456 continue;
5457 }
5458 NEXT;
5459 continue;
5460 }
5461
5462 ctxt->disableSAX = state;
5463 ctxt->instate = instate;
5464
5465 if (xmlParserDebugEntities) {
5466 if ((ctxt->input != NULL) && (ctxt->input->filename))
5467 xmlGenericError(xmlGenericErrorContext,
5468 "%s(%d): ", ctxt->input->filename,
5469 ctxt->input->line);
5470 xmlGenericError(xmlGenericErrorContext,
5471 "Leaving IGNORE Conditional Section\n");
5472 }
5473
5474 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005475 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005476 }
5477
5478 if (RAW == 0)
5479 SHRINK;
5480
5481 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005482 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005483 } else {
5484 SKIP(3);
5485 }
5486}
5487
5488/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005489 * xmlParseMarkupDecl:
5490 * @ctxt: an XML parser context
5491 *
5492 * parse Markup declarations
5493 *
5494 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5495 * NotationDecl | PI | Comment
5496 *
5497 * [ VC: Proper Declaration/PE Nesting ]
5498 * Parameter-entity replacement text must be properly nested with
5499 * markup declarations. That is to say, if either the first character
5500 * or the last character of a markup declaration (markupdecl above) is
5501 * contained in the replacement text for a parameter-entity reference,
5502 * both must be contained in the same replacement text.
5503 *
5504 * [ WFC: PEs in Internal Subset ]
5505 * In the internal DTD subset, parameter-entity references can occur
5506 * only where markup declarations can occur, not within markup declarations.
5507 * (This does not apply to references that occur in external parameter
5508 * entities or to the external subset.)
5509 */
5510void
5511xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5512 GROW;
5513 xmlParseElementDecl(ctxt);
5514 xmlParseAttributeListDecl(ctxt);
5515 xmlParseEntityDecl(ctxt);
5516 xmlParseNotationDecl(ctxt);
5517 xmlParsePI(ctxt);
5518 xmlParseComment(ctxt);
5519 /*
5520 * This is only for internal subset. On external entities,
5521 * the replacement is done before parsing stage
5522 */
5523 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5524 xmlParsePEReference(ctxt);
5525
5526 /*
5527 * Conditional sections are allowed from entities included
5528 * by PE References in the internal subset.
5529 */
5530 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5531 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5532 xmlParseConditionalSections(ctxt);
5533 }
5534 }
5535
5536 ctxt->instate = XML_PARSER_DTD;
5537}
5538
5539/**
5540 * xmlParseTextDecl:
5541 * @ctxt: an XML parser context
5542 *
5543 * parse an XML declaration header for external entities
5544 *
5545 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5546 *
5547 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5548 */
5549
5550void
5551xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5552 xmlChar *version;
5553
5554 /*
5555 * We know that '<?xml' is here.
5556 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00005557 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005558 SKIP(5);
5559 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005560 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005561 return;
5562 }
5563
5564 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005565 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5566 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005567 }
5568 SKIP_BLANKS;
5569
5570 /*
5571 * We may have the VersionInfo here.
5572 */
5573 version = xmlParseVersionInfo(ctxt);
5574 if (version == NULL)
5575 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005576 else {
5577 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005578 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5579 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005580 }
5581 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005582 ctxt->input->version = version;
5583
5584 /*
5585 * We must have the encoding declaration
5586 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005587 xmlParseEncodingDecl(ctxt);
5588 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5589 /*
5590 * The XML REC instructs us to stop parsing right here
5591 */
5592 return;
5593 }
5594
5595 SKIP_BLANKS;
5596 if ((RAW == '?') && (NXT(1) == '>')) {
5597 SKIP(2);
5598 } else if (RAW == '>') {
5599 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005600 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005601 NEXT;
5602 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005603 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005604 MOVETO_ENDTAG(CUR_PTR);
5605 NEXT;
5606 }
5607}
5608
5609/**
Owen Taylor3473f882001-02-23 17:55:21 +00005610 * xmlParseExternalSubset:
5611 * @ctxt: an XML parser context
5612 * @ExternalID: the external identifier
5613 * @SystemID: the system identifier (or URL)
5614 *
5615 * parse Markup declarations from an external subset
5616 *
5617 * [30] extSubset ::= textDecl? extSubsetDecl
5618 *
5619 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5620 */
5621void
5622xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5623 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005624 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005625 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005626 if (memcmp(CUR_PTR, "<?xml", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005627 xmlParseTextDecl(ctxt);
5628 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5629 /*
5630 * The XML REC instructs us to stop parsing right here
5631 */
5632 ctxt->instate = XML_PARSER_EOF;
5633 return;
5634 }
5635 }
5636 if (ctxt->myDoc == NULL) {
5637 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5638 }
5639 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5640 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5641
5642 ctxt->instate = XML_PARSER_DTD;
5643 ctxt->external = 1;
5644 while (((RAW == '<') && (NXT(1) == '?')) ||
5645 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005646 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005647 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005648 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005649
5650 GROW;
5651 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5652 xmlParseConditionalSections(ctxt);
5653 } else if (IS_BLANK(CUR)) {
5654 NEXT;
5655 } else if (RAW == '%') {
5656 xmlParsePEReference(ctxt);
5657 } else
5658 xmlParseMarkupDecl(ctxt);
5659
5660 /*
5661 * Pop-up of finished entities.
5662 */
5663 while ((RAW == 0) && (ctxt->inputNr > 1))
5664 xmlPopInput(ctxt);
5665
Daniel Veillardfdc91562002-07-01 21:52:03 +00005666 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005667 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005668 break;
5669 }
5670 }
5671
5672 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005673 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005674 }
5675
5676}
5677
5678/**
5679 * xmlParseReference:
5680 * @ctxt: an XML parser context
5681 *
5682 * parse and handle entity references in content, depending on the SAX
5683 * interface, this may end-up in a call to character() if this is a
5684 * CharRef, a predefined entity, if there is no reference() callback.
5685 * or if the parser was asked to switch to that mode.
5686 *
5687 * [67] Reference ::= EntityRef | CharRef
5688 */
5689void
5690xmlParseReference(xmlParserCtxtPtr ctxt) {
5691 xmlEntityPtr ent;
5692 xmlChar *val;
5693 if (RAW != '&') return;
5694
5695 if (NXT(1) == '#') {
5696 int i = 0;
5697 xmlChar out[10];
5698 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005699 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005700
5701 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5702 /*
5703 * So we are using non-UTF-8 buffers
5704 * Check that the char fit on 8bits, if not
5705 * generate a CharRef.
5706 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005707 if (value <= 0xFF) {
5708 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005709 out[1] = 0;
5710 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5711 (!ctxt->disableSAX))
5712 ctxt->sax->characters(ctxt->userData, out, 1);
5713 } else {
5714 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005715 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005716 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005717 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005718 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5719 (!ctxt->disableSAX))
5720 ctxt->sax->reference(ctxt->userData, out);
5721 }
5722 } else {
5723 /*
5724 * Just encode the value in UTF-8
5725 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005726 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005727 out[i] = 0;
5728 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5729 (!ctxt->disableSAX))
5730 ctxt->sax->characters(ctxt->userData, out, i);
5731 }
5732 } else {
5733 ent = xmlParseEntityRef(ctxt);
5734 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005735 if (!ctxt->wellFormed)
5736 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005737 if ((ent->name != NULL) &&
5738 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5739 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005740 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005741
5742
5743 /*
5744 * The first reference to the entity trigger a parsing phase
5745 * where the ent->children is filled with the result from
5746 * the parsing.
5747 */
5748 if (ent->children == NULL) {
5749 xmlChar *value;
5750 value = ent->content;
5751
5752 /*
5753 * Check that this entity is well formed
5754 */
5755 if ((value != NULL) &&
5756 (value[1] == 0) && (value[0] == '<') &&
5757 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5758 /*
5759 * DONE: get definite answer on this !!!
5760 * Lots of entity decls are used to declare a single
5761 * char
5762 * <!ENTITY lt "<">
5763 * Which seems to be valid since
5764 * 2.4: The ampersand character (&) and the left angle
5765 * bracket (<) may appear in their literal form only
5766 * when used ... They are also legal within the literal
5767 * entity value of an internal entity declaration;i
5768 * see "4.3.2 Well-Formed Parsed Entities".
5769 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5770 * Looking at the OASIS test suite and James Clark
5771 * tests, this is broken. However the XML REC uses
5772 * it. Is the XML REC not well-formed ????
5773 * This is a hack to avoid this problem
5774 *
5775 * ANSWER: since lt gt amp .. are already defined,
5776 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005777 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005778 * is lousy but acceptable.
5779 */
5780 list = xmlNewDocText(ctxt->myDoc, value);
5781 if (list != NULL) {
5782 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5783 (ent->children == NULL)) {
5784 ent->children = list;
5785 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005786 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005787 list->parent = (xmlNodePtr) ent;
5788 } else {
5789 xmlFreeNodeList(list);
5790 }
5791 } else if (list != NULL) {
5792 xmlFreeNodeList(list);
5793 }
5794 } else {
5795 /*
5796 * 4.3.2: An internal general parsed entity is well-formed
5797 * if its replacement text matches the production labeled
5798 * content.
5799 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005800
5801 void *user_data;
5802 /*
5803 * This is a bit hackish but this seems the best
5804 * way to make sure both SAX and DOM entity support
5805 * behaves okay.
5806 */
5807 if (ctxt->userData == ctxt)
5808 user_data = NULL;
5809 else
5810 user_data = ctxt->userData;
5811
Owen Taylor3473f882001-02-23 17:55:21 +00005812 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5813 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005814 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5815 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005816 ctxt->depth--;
5817 } else if (ent->etype ==
5818 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5819 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005820 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005821 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005822 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005823 ctxt->depth--;
5824 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005825 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005826 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5827 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005828 }
5829 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005830 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005831 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005832 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005833 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5834 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005835 (ent->children == NULL)) {
5836 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005837 if (ctxt->replaceEntities) {
5838 /*
5839 * Prune it directly in the generated document
5840 * except for single text nodes.
5841 */
5842 if ((list->type == XML_TEXT_NODE) &&
5843 (list->next == NULL)) {
5844 list->parent = (xmlNodePtr) ent;
5845 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005846 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005847 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005848 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005849 while (list != NULL) {
5850 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005851 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005852 if (list->next == NULL)
5853 ent->last = list;
5854 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005855 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005856 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005857#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005858 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5859 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005860#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005861 }
5862 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005863 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005864 while (list != NULL) {
5865 list->parent = (xmlNodePtr) ent;
5866 if (list->next == NULL)
5867 ent->last = list;
5868 list = list->next;
5869 }
Owen Taylor3473f882001-02-23 17:55:21 +00005870 }
5871 } else {
5872 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005873 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005874 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005875 } else if ((ret != XML_ERR_OK) &&
5876 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005877 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005878 } else if (list != NULL) {
5879 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005880 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005881 }
5882 }
5883 }
5884 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5885 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5886 /*
5887 * Create a node.
5888 */
5889 ctxt->sax->reference(ctxt->userData, ent->name);
5890 return;
5891 } else if (ctxt->replaceEntities) {
5892 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5893 /*
5894 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005895 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005896 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005897 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005898 if ((list == NULL) && (ent->owner == 0)) {
5899 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005900 cur = ent->children;
5901 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005902 nw = xmlCopyNode(cur, 1);
5903 if (nw != NULL) {
5904 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005905 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005906 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005907 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005908 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005909 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005910 if (cur == ent->last)
5911 break;
5912 cur = cur->next;
5913 }
Daniel Veillard81273902003-09-30 00:43:48 +00005914#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005915 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005916 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005917#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005918 } else if (list == NULL) {
5919 xmlNodePtr nw = NULL, cur, next, last,
5920 firstChild = NULL;
5921 /*
5922 * Copy the entity child list and make it the new
5923 * entity child list. The goal is to make sure any
5924 * ID or REF referenced will be the one from the
5925 * document content and not the entity copy.
5926 */
5927 cur = ent->children;
5928 ent->children = NULL;
5929 last = ent->last;
5930 ent->last = NULL;
5931 while (cur != NULL) {
5932 next = cur->next;
5933 cur->next = NULL;
5934 cur->parent = NULL;
5935 nw = xmlCopyNode(cur, 1);
5936 if (nw != NULL) {
5937 nw->_private = cur->_private;
5938 if (firstChild == NULL){
5939 firstChild = cur;
5940 }
5941 xmlAddChild((xmlNodePtr) ent, nw);
5942 xmlAddChild(ctxt->node, cur);
5943 }
5944 if (cur == last)
5945 break;
5946 cur = next;
5947 }
5948 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005949#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005950 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5951 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005952#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005953 } else {
5954 /*
5955 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005956 * node with a possible previous text one which
5957 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005958 */
5959 if (ent->children->type == XML_TEXT_NODE)
5960 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5961 if ((ent->last != ent->children) &&
5962 (ent->last->type == XML_TEXT_NODE))
5963 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5964 xmlAddChildList(ctxt->node, ent->children);
5965 }
5966
Owen Taylor3473f882001-02-23 17:55:21 +00005967 /*
5968 * This is to avoid a nasty side effect, see
5969 * characters() in SAX.c
5970 */
5971 ctxt->nodemem = 0;
5972 ctxt->nodelen = 0;
5973 return;
5974 } else {
5975 /*
5976 * Probably running in SAX mode
5977 */
5978 xmlParserInputPtr input;
5979
5980 input = xmlNewEntityInputStream(ctxt, ent);
5981 xmlPushInput(ctxt, input);
5982 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillard8f597c32003-10-06 08:19:27 +00005983 (memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005984 xmlParseTextDecl(ctxt);
5985 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5986 /*
5987 * The XML REC instructs us to stop parsing right here
5988 */
5989 ctxt->instate = XML_PARSER_EOF;
5990 return;
5991 }
5992 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005993 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5994 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005995 }
5996 }
5997 return;
5998 }
5999 }
6000 } else {
6001 val = ent->content;
6002 if (val == NULL) return;
6003 /*
6004 * inline the entity.
6005 */
6006 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6007 (!ctxt->disableSAX))
6008 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6009 }
6010 }
6011}
6012
6013/**
6014 * xmlParseEntityRef:
6015 * @ctxt: an XML parser context
6016 *
6017 * parse ENTITY references declarations
6018 *
6019 * [68] EntityRef ::= '&' Name ';'
6020 *
6021 * [ WFC: Entity Declared ]
6022 * In a document without any DTD, a document with only an internal DTD
6023 * subset which contains no parameter entity references, or a document
6024 * with "standalone='yes'", the Name given in the entity reference
6025 * must match that in an entity declaration, except that well-formed
6026 * documents need not declare any of the following entities: amp, lt,
6027 * gt, apos, quot. The declaration of a parameter entity must precede
6028 * any reference to it. Similarly, the declaration of a general entity
6029 * must precede any reference to it which appears in a default value in an
6030 * attribute-list declaration. Note that if entities are declared in the
6031 * external subset or in external parameter entities, a non-validating
6032 * processor is not obligated to read and process their declarations;
6033 * for such documents, the rule that an entity must be declared is a
6034 * well-formedness constraint only if standalone='yes'.
6035 *
6036 * [ WFC: Parsed Entity ]
6037 * An entity reference must not contain the name of an unparsed entity
6038 *
6039 * Returns the xmlEntityPtr if found, or NULL otherwise.
6040 */
6041xmlEntityPtr
6042xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006043 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006044 xmlEntityPtr ent = NULL;
6045
6046 GROW;
6047
6048 if (RAW == '&') {
6049 NEXT;
6050 name = xmlParseName(ctxt);
6051 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006052 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6053 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006054 } else {
6055 if (RAW == ';') {
6056 NEXT;
6057 /*
6058 * Ask first SAX for entity resolution, otherwise try the
6059 * predefined set.
6060 */
6061 if (ctxt->sax != NULL) {
6062 if (ctxt->sax->getEntity != NULL)
6063 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006064 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006065 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006066 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6067 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006068 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006069 }
Owen Taylor3473f882001-02-23 17:55:21 +00006070 }
6071 /*
6072 * [ WFC: Entity Declared ]
6073 * In a document without any DTD, a document with only an
6074 * internal DTD subset which contains no parameter entity
6075 * references, or a document with "standalone='yes'", the
6076 * Name given in the entity reference must match that in an
6077 * entity declaration, except that well-formed documents
6078 * need not declare any of the following entities: amp, lt,
6079 * gt, apos, quot.
6080 * The declaration of a parameter entity must precede any
6081 * reference to it.
6082 * Similarly, the declaration of a general entity must
6083 * precede any reference to it which appears in a default
6084 * value in an attribute-list declaration. Note that if
6085 * entities are declared in the external subset or in
6086 * external parameter entities, a non-validating processor
6087 * is not obligated to read and process their declarations;
6088 * for such documents, the rule that an entity must be
6089 * declared is a well-formedness constraint only if
6090 * standalone='yes'.
6091 */
6092 if (ent == NULL) {
6093 if ((ctxt->standalone == 1) ||
6094 ((ctxt->hasExternalSubset == 0) &&
6095 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006096 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006097 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006098 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006099 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006100 "Entity '%s' not defined\n", name);
6101 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006102 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006103 }
6104
6105 /*
6106 * [ WFC: Parsed Entity ]
6107 * An entity reference must not contain the name of an
6108 * unparsed entity
6109 */
6110 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006111 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006112 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006113 }
6114
6115 /*
6116 * [ WFC: No External Entity References ]
6117 * Attribute values cannot contain direct or indirect
6118 * entity references to external entities.
6119 */
6120 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6121 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006122 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6123 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006124 }
6125 /*
6126 * [ WFC: No < in Attribute Values ]
6127 * The replacement text of any entity referred to directly or
6128 * indirectly in an attribute value (other than "&lt;") must
6129 * not contain a <.
6130 */
6131 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6132 (ent != NULL) &&
6133 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6134 (ent->content != NULL) &&
6135 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006136 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006137 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006138 }
6139
6140 /*
6141 * Internal check, no parameter entities here ...
6142 */
6143 else {
6144 switch (ent->etype) {
6145 case XML_INTERNAL_PARAMETER_ENTITY:
6146 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006147 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6148 "Attempt to reference the parameter entity '%s'\n",
6149 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006150 break;
6151 default:
6152 break;
6153 }
6154 }
6155
6156 /*
6157 * [ WFC: No Recursion ]
6158 * A parsed entity must not contain a recursive reference
6159 * to itself, either directly or indirectly.
6160 * Done somewhere else
6161 */
6162
6163 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006164 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006165 }
Owen Taylor3473f882001-02-23 17:55:21 +00006166 }
6167 }
6168 return(ent);
6169}
6170
6171/**
6172 * xmlParseStringEntityRef:
6173 * @ctxt: an XML parser context
6174 * @str: a pointer to an index in the string
6175 *
6176 * parse ENTITY references declarations, but this version parses it from
6177 * a string value.
6178 *
6179 * [68] EntityRef ::= '&' Name ';'
6180 *
6181 * [ WFC: Entity Declared ]
6182 * In a document without any DTD, a document with only an internal DTD
6183 * subset which contains no parameter entity references, or a document
6184 * with "standalone='yes'", the Name given in the entity reference
6185 * must match that in an entity declaration, except that well-formed
6186 * documents need not declare any of the following entities: amp, lt,
6187 * gt, apos, quot. The declaration of a parameter entity must precede
6188 * any reference to it. Similarly, the declaration of a general entity
6189 * must precede any reference to it which appears in a default value in an
6190 * attribute-list declaration. Note that if entities are declared in the
6191 * external subset or in external parameter entities, a non-validating
6192 * processor is not obligated to read and process their declarations;
6193 * for such documents, the rule that an entity must be declared is a
6194 * well-formedness constraint only if standalone='yes'.
6195 *
6196 * [ WFC: Parsed Entity ]
6197 * An entity reference must not contain the name of an unparsed entity
6198 *
6199 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6200 * is updated to the current location in the string.
6201 */
6202xmlEntityPtr
6203xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6204 xmlChar *name;
6205 const xmlChar *ptr;
6206 xmlChar cur;
6207 xmlEntityPtr ent = NULL;
6208
6209 if ((str == NULL) || (*str == NULL))
6210 return(NULL);
6211 ptr = *str;
6212 cur = *ptr;
6213 if (cur == '&') {
6214 ptr++;
6215 cur = *ptr;
6216 name = xmlParseStringName(ctxt, &ptr);
6217 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006218 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6219 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006220 } else {
6221 if (*ptr == ';') {
6222 ptr++;
6223 /*
6224 * Ask first SAX for entity resolution, otherwise try the
6225 * predefined set.
6226 */
6227 if (ctxt->sax != NULL) {
6228 if (ctxt->sax->getEntity != NULL)
6229 ent = ctxt->sax->getEntity(ctxt->userData, name);
6230 if (ent == NULL)
6231 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006232 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006233 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006234 }
Owen Taylor3473f882001-02-23 17:55:21 +00006235 }
6236 /*
6237 * [ WFC: Entity Declared ]
6238 * In a document without any DTD, a document with only an
6239 * internal DTD subset which contains no parameter entity
6240 * references, or a document with "standalone='yes'", the
6241 * Name given in the entity reference must match that in an
6242 * entity declaration, except that well-formed documents
6243 * need not declare any of the following entities: amp, lt,
6244 * gt, apos, quot.
6245 * The declaration of a parameter entity must precede any
6246 * reference to it.
6247 * Similarly, the declaration of a general entity must
6248 * precede any reference to it which appears in a default
6249 * value in an attribute-list declaration. Note that if
6250 * entities are declared in the external subset or in
6251 * external parameter entities, a non-validating processor
6252 * is not obligated to read and process their declarations;
6253 * for such documents, the rule that an entity must be
6254 * declared is a well-formedness constraint only if
6255 * standalone='yes'.
6256 */
6257 if (ent == NULL) {
6258 if ((ctxt->standalone == 1) ||
6259 ((ctxt->hasExternalSubset == 0) &&
6260 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006261 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006262 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006263 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006264 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006265 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006266 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006267 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006268 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006269 }
6270
6271 /*
6272 * [ WFC: Parsed Entity ]
6273 * An entity reference must not contain the name of an
6274 * unparsed entity
6275 */
6276 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006277 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006278 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006279 }
6280
6281 /*
6282 * [ WFC: No External Entity References ]
6283 * Attribute values cannot contain direct or indirect
6284 * entity references to external entities.
6285 */
6286 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6287 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006288 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006289 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006290 }
6291 /*
6292 * [ WFC: No < in Attribute Values ]
6293 * The replacement text of any entity referred to directly or
6294 * indirectly in an attribute value (other than "&lt;") must
6295 * not contain a <.
6296 */
6297 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6298 (ent != NULL) &&
6299 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6300 (ent->content != NULL) &&
6301 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006302 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6303 "'<' in entity '%s' is not allowed in attributes values\n",
6304 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006305 }
6306
6307 /*
6308 * Internal check, no parameter entities here ...
6309 */
6310 else {
6311 switch (ent->etype) {
6312 case XML_INTERNAL_PARAMETER_ENTITY:
6313 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006314 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6315 "Attempt to reference the parameter entity '%s'\n",
6316 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006317 break;
6318 default:
6319 break;
6320 }
6321 }
6322
6323 /*
6324 * [ WFC: No Recursion ]
6325 * A parsed entity must not contain a recursive reference
6326 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006327 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006328 */
6329
6330 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006331 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006332 }
6333 xmlFree(name);
6334 }
6335 }
6336 *str = ptr;
6337 return(ent);
6338}
6339
6340/**
6341 * xmlParsePEReference:
6342 * @ctxt: an XML parser context
6343 *
6344 * parse PEReference declarations
6345 * The entity content is handled directly by pushing it's content as
6346 * a new input stream.
6347 *
6348 * [69] PEReference ::= '%' Name ';'
6349 *
6350 * [ WFC: No Recursion ]
6351 * A parsed entity must not contain a recursive
6352 * reference to itself, either directly or indirectly.
6353 *
6354 * [ WFC: Entity Declared ]
6355 * In a document without any DTD, a document with only an internal DTD
6356 * subset which contains no parameter entity references, or a document
6357 * with "standalone='yes'", ... ... The declaration of a parameter
6358 * entity must precede any reference to it...
6359 *
6360 * [ VC: Entity Declared ]
6361 * In a document with an external subset or external parameter entities
6362 * with "standalone='no'", ... ... The declaration of a parameter entity
6363 * must precede any reference to it...
6364 *
6365 * [ WFC: In DTD ]
6366 * Parameter-entity references may only appear in the DTD.
6367 * NOTE: misleading but this is handled.
6368 */
6369void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006370xmlParsePEReference(xmlParserCtxtPtr ctxt)
6371{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006372 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006373 xmlEntityPtr entity = NULL;
6374 xmlParserInputPtr input;
6375
6376 if (RAW == '%') {
6377 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006378 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006379 if (name == NULL) {
6380 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6381 "xmlParsePEReference: no name\n");
6382 } else {
6383 if (RAW == ';') {
6384 NEXT;
6385 if ((ctxt->sax != NULL) &&
6386 (ctxt->sax->getParameterEntity != NULL))
6387 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6388 name);
6389 if (entity == NULL) {
6390 /*
6391 * [ WFC: Entity Declared ]
6392 * In a document without any DTD, a document with only an
6393 * internal DTD subset which contains no parameter entity
6394 * references, or a document with "standalone='yes'", ...
6395 * ... The declaration of a parameter entity must precede
6396 * any reference to it...
6397 */
6398 if ((ctxt->standalone == 1) ||
6399 ((ctxt->hasExternalSubset == 0) &&
6400 (ctxt->hasPErefs == 0))) {
6401 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6402 "PEReference: %%%s; not found\n",
6403 name);
6404 } else {
6405 /*
6406 * [ VC: Entity Declared ]
6407 * In a document with an external subset or external
6408 * parameter entities with "standalone='no'", ...
6409 * ... The declaration of a parameter entity must
6410 * precede any reference to it...
6411 */
6412 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6413 "PEReference: %%%s; not found\n",
6414 name, NULL);
6415 ctxt->valid = 0;
6416 }
6417 } else {
6418 /*
6419 * Internal checking in case the entity quest barfed
6420 */
6421 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6422 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6423 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6424 "Internal: %%%s; is not a parameter entity\n",
6425 name, NULL);
6426 } else if (ctxt->input->free != deallocblankswrapper) {
6427 input =
6428 xmlNewBlanksWrapperInputStream(ctxt, entity);
6429 xmlPushInput(ctxt, input);
6430 } else {
6431 /*
6432 * TODO !!!
6433 * handle the extra spaces added before and after
6434 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6435 */
6436 input = xmlNewEntityInputStream(ctxt, entity);
6437 xmlPushInput(ctxt, input);
6438 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6439 (memcmp(CUR_PTR, "<?xml", 5) == 0) &&
6440 (IS_BLANK(NXT(5)))) {
6441 xmlParseTextDecl(ctxt);
6442 if (ctxt->errNo ==
6443 XML_ERR_UNSUPPORTED_ENCODING) {
6444 /*
6445 * The XML REC instructs us to stop parsing
6446 * right here
6447 */
6448 ctxt->instate = XML_PARSER_EOF;
6449 return;
6450 }
6451 }
6452 }
6453 }
6454 ctxt->hasPErefs = 1;
6455 } else {
6456 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6457 }
6458 }
Owen Taylor3473f882001-02-23 17:55:21 +00006459 }
6460}
6461
6462/**
6463 * xmlParseStringPEReference:
6464 * @ctxt: an XML parser context
6465 * @str: a pointer to an index in the string
6466 *
6467 * parse PEReference declarations
6468 *
6469 * [69] PEReference ::= '%' Name ';'
6470 *
6471 * [ WFC: No Recursion ]
6472 * A parsed entity must not contain a recursive
6473 * reference to itself, either directly or indirectly.
6474 *
6475 * [ WFC: Entity Declared ]
6476 * In a document without any DTD, a document with only an internal DTD
6477 * subset which contains no parameter entity references, or a document
6478 * with "standalone='yes'", ... ... The declaration of a parameter
6479 * entity must precede any reference to it...
6480 *
6481 * [ VC: Entity Declared ]
6482 * In a document with an external subset or external parameter entities
6483 * with "standalone='no'", ... ... The declaration of a parameter entity
6484 * must precede any reference to it...
6485 *
6486 * [ WFC: In DTD ]
6487 * Parameter-entity references may only appear in the DTD.
6488 * NOTE: misleading but this is handled.
6489 *
6490 * Returns the string of the entity content.
6491 * str is updated to the current value of the index
6492 */
6493xmlEntityPtr
6494xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6495 const xmlChar *ptr;
6496 xmlChar cur;
6497 xmlChar *name;
6498 xmlEntityPtr entity = NULL;
6499
6500 if ((str == NULL) || (*str == NULL)) return(NULL);
6501 ptr = *str;
6502 cur = *ptr;
6503 if (cur == '%') {
6504 ptr++;
6505 cur = *ptr;
6506 name = xmlParseStringName(ctxt, &ptr);
6507 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006508 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6509 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006510 } else {
6511 cur = *ptr;
6512 if (cur == ';') {
6513 ptr++;
6514 cur = *ptr;
6515 if ((ctxt->sax != NULL) &&
6516 (ctxt->sax->getParameterEntity != NULL))
6517 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6518 name);
6519 if (entity == NULL) {
6520 /*
6521 * [ WFC: Entity Declared ]
6522 * In a document without any DTD, a document with only an
6523 * internal DTD subset which contains no parameter entity
6524 * references, or a document with "standalone='yes'", ...
6525 * ... The declaration of a parameter entity must precede
6526 * any reference to it...
6527 */
6528 if ((ctxt->standalone == 1) ||
6529 ((ctxt->hasExternalSubset == 0) &&
6530 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006531 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006532 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006533 } else {
6534 /*
6535 * [ VC: Entity Declared ]
6536 * In a document with an external subset or external
6537 * parameter entities with "standalone='no'", ...
6538 * ... The declaration of a parameter entity must
6539 * precede any reference to it...
6540 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006541 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6542 "PEReference: %%%s; not found\n",
6543 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006544 ctxt->valid = 0;
6545 }
6546 } else {
6547 /*
6548 * Internal checking in case the entity quest barfed
6549 */
6550 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6551 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006552 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6553 "%%%s; is not a parameter entity\n",
6554 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006555 }
6556 }
6557 ctxt->hasPErefs = 1;
6558 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006559 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006560 }
6561 xmlFree(name);
6562 }
6563 }
6564 *str = ptr;
6565 return(entity);
6566}
6567
6568/**
6569 * xmlParseDocTypeDecl:
6570 * @ctxt: an XML parser context
6571 *
6572 * parse a DOCTYPE declaration
6573 *
6574 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6575 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6576 *
6577 * [ VC: Root Element Type ]
6578 * The Name in the document type declaration must match the element
6579 * type of the root element.
6580 */
6581
6582void
6583xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006584 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006585 xmlChar *ExternalID = NULL;
6586 xmlChar *URI = NULL;
6587
6588 /*
6589 * We know that '<!DOCTYPE' has been detected.
6590 */
6591 SKIP(9);
6592
6593 SKIP_BLANKS;
6594
6595 /*
6596 * Parse the DOCTYPE name.
6597 */
6598 name = xmlParseName(ctxt);
6599 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006600 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6601 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006602 }
6603 ctxt->intSubName = name;
6604
6605 SKIP_BLANKS;
6606
6607 /*
6608 * Check for SystemID and ExternalID
6609 */
6610 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6611
6612 if ((URI != NULL) || (ExternalID != NULL)) {
6613 ctxt->hasExternalSubset = 1;
6614 }
6615 ctxt->extSubURI = URI;
6616 ctxt->extSubSystem = ExternalID;
6617
6618 SKIP_BLANKS;
6619
6620 /*
6621 * Create and update the internal subset.
6622 */
6623 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6624 (!ctxt->disableSAX))
6625 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6626
6627 /*
6628 * Is there any internal subset declarations ?
6629 * they are handled separately in xmlParseInternalSubset()
6630 */
6631 if (RAW == '[')
6632 return;
6633
6634 /*
6635 * We should be at the end of the DOCTYPE declaration.
6636 */
6637 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006638 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006639 }
6640 NEXT;
6641}
6642
6643/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006644 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006645 * @ctxt: an XML parser context
6646 *
6647 * parse the internal subset declaration
6648 *
6649 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6650 */
6651
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006652static void
Owen Taylor3473f882001-02-23 17:55:21 +00006653xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6654 /*
6655 * Is there any DTD definition ?
6656 */
6657 if (RAW == '[') {
6658 ctxt->instate = XML_PARSER_DTD;
6659 NEXT;
6660 /*
6661 * Parse the succession of Markup declarations and
6662 * PEReferences.
6663 * Subsequence (markupdecl | PEReference | S)*
6664 */
6665 while (RAW != ']') {
6666 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006667 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006668
6669 SKIP_BLANKS;
6670 xmlParseMarkupDecl(ctxt);
6671 xmlParsePEReference(ctxt);
6672
6673 /*
6674 * Pop-up of finished entities.
6675 */
6676 while ((RAW == 0) && (ctxt->inputNr > 1))
6677 xmlPopInput(ctxt);
6678
6679 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006680 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006681 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006682 break;
6683 }
6684 }
6685 if (RAW == ']') {
6686 NEXT;
6687 SKIP_BLANKS;
6688 }
6689 }
6690
6691 /*
6692 * We should be at the end of the DOCTYPE declaration.
6693 */
6694 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006695 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006696 }
6697 NEXT;
6698}
6699
Daniel Veillard81273902003-09-30 00:43:48 +00006700#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006701/**
6702 * xmlParseAttribute:
6703 * @ctxt: an XML parser context
6704 * @value: a xmlChar ** used to store the value of the attribute
6705 *
6706 * parse an attribute
6707 *
6708 * [41] Attribute ::= Name Eq AttValue
6709 *
6710 * [ WFC: No External Entity References ]
6711 * Attribute values cannot contain direct or indirect entity references
6712 * to external entities.
6713 *
6714 * [ WFC: No < in Attribute Values ]
6715 * The replacement text of any entity referred to directly or indirectly in
6716 * an attribute value (other than "&lt;") must not contain a <.
6717 *
6718 * [ VC: Attribute Value Type ]
6719 * The attribute must have been declared; the value must be of the type
6720 * declared for it.
6721 *
6722 * [25] Eq ::= S? '=' S?
6723 *
6724 * With namespace:
6725 *
6726 * [NS 11] Attribute ::= QName Eq AttValue
6727 *
6728 * Also the case QName == xmlns:??? is handled independently as a namespace
6729 * definition.
6730 *
6731 * Returns the attribute name, and the value in *value.
6732 */
6733
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006734const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006735xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006736 const xmlChar *name;
6737 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006738
6739 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006740 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006741 name = xmlParseName(ctxt);
6742 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006743 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006744 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006745 return(NULL);
6746 }
6747
6748 /*
6749 * read the value
6750 */
6751 SKIP_BLANKS;
6752 if (RAW == '=') {
6753 NEXT;
6754 SKIP_BLANKS;
6755 val = xmlParseAttValue(ctxt);
6756 ctxt->instate = XML_PARSER_CONTENT;
6757 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006758 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006759 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006760 return(NULL);
6761 }
6762
6763 /*
6764 * Check that xml:lang conforms to the specification
6765 * No more registered as an error, just generate a warning now
6766 * since this was deprecated in XML second edition
6767 */
6768 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6769 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006770 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6771 "Malformed value for xml:lang : %s\n",
6772 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006773 }
6774 }
6775
6776 /*
6777 * Check that xml:space conforms to the specification
6778 */
6779 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6780 if (xmlStrEqual(val, BAD_CAST "default"))
6781 *(ctxt->space) = 0;
6782 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6783 *(ctxt->space) = 1;
6784 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006785 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006786"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006787 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006788 }
6789 }
6790
6791 *value = val;
6792 return(name);
6793}
6794
6795/**
6796 * xmlParseStartTag:
6797 * @ctxt: an XML parser context
6798 *
6799 * parse a start of tag either for rule element or
6800 * EmptyElement. In both case we don't parse the tag closing chars.
6801 *
6802 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6803 *
6804 * [ WFC: Unique Att Spec ]
6805 * No attribute name may appear more than once in the same start-tag or
6806 * empty-element tag.
6807 *
6808 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6809 *
6810 * [ WFC: Unique Att Spec ]
6811 * No attribute name may appear more than once in the same start-tag or
6812 * empty-element tag.
6813 *
6814 * With namespace:
6815 *
6816 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6817 *
6818 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6819 *
6820 * Returns the element name parsed
6821 */
6822
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006823const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006824xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006825 const xmlChar *name;
6826 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006827 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006828 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006829 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006830 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006831 int i;
6832
6833 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006834 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006835
6836 name = xmlParseName(ctxt);
6837 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006838 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006839 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006840 return(NULL);
6841 }
6842
6843 /*
6844 * Now parse the attributes, it ends up with the ending
6845 *
6846 * (S Attribute)* S?
6847 */
6848 SKIP_BLANKS;
6849 GROW;
6850
Daniel Veillard21a0f912001-02-25 19:54:14 +00006851 while ((RAW != '>') &&
6852 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006853 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006854 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006855 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006856
6857 attname = xmlParseAttribute(ctxt, &attvalue);
6858 if ((attname != NULL) && (attvalue != NULL)) {
6859 /*
6860 * [ WFC: Unique Att Spec ]
6861 * No attribute name may appear more than once in the same
6862 * start-tag or empty-element tag.
6863 */
6864 for (i = 0; i < nbatts;i += 2) {
6865 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006866 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006867 xmlFree(attvalue);
6868 goto failed;
6869 }
6870 }
Owen Taylor3473f882001-02-23 17:55:21 +00006871 /*
6872 * Add the pair to atts
6873 */
6874 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006875 maxatts = 22; /* allow for 10 attrs by default */
6876 atts = (const xmlChar **)
6877 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006878 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006879 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006880 if (attvalue != NULL)
6881 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006882 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006883 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006884 ctxt->atts = atts;
6885 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006886 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006887 const xmlChar **n;
6888
Owen Taylor3473f882001-02-23 17:55:21 +00006889 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006890 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006891 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006892 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006893 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006894 if (attvalue != NULL)
6895 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006896 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006897 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006898 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006899 ctxt->atts = atts;
6900 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006901 }
6902 atts[nbatts++] = attname;
6903 atts[nbatts++] = attvalue;
6904 atts[nbatts] = NULL;
6905 atts[nbatts + 1] = NULL;
6906 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006907 if (attvalue != NULL)
6908 xmlFree(attvalue);
6909 }
6910
6911failed:
6912
Daniel Veillard3772de32002-12-17 10:31:45 +00006913 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006914 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6915 break;
6916 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006917 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6918 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006919 }
6920 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006921 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6922 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006923 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6924 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006925 break;
6926 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006927 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006928 GROW;
6929 }
6930
6931 /*
6932 * SAX: Start of Element !
6933 */
6934 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006935 (!ctxt->disableSAX)) {
6936 if (nbatts > 0)
6937 ctxt->sax->startElement(ctxt->userData, name, atts);
6938 else
6939 ctxt->sax->startElement(ctxt->userData, name, NULL);
6940 }
Owen Taylor3473f882001-02-23 17:55:21 +00006941
6942 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006943 /* Free only the content strings */
6944 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006945 if (atts[i] != NULL)
6946 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006947 }
6948 return(name);
6949}
6950
6951/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006952 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006953 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006954 * @line: line of the start tag
6955 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006956 *
6957 * parse an end of tag
6958 *
6959 * [42] ETag ::= '</' Name S? '>'
6960 *
6961 * With namespace
6962 *
6963 * [NS 9] ETag ::= '</' QName S? '>'
6964 */
6965
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006966static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006967xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006968 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006969
6970 GROW;
6971 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006972 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006973 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006974 return;
6975 }
6976 SKIP(2);
6977
Daniel Veillard46de64e2002-05-29 08:21:33 +00006978 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006979
6980 /*
6981 * We should definitely be at the ending "S? '>'" part
6982 */
6983 GROW;
6984 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006985 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006986 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006987 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006988 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006989
6990 /*
6991 * [ WFC: Element Type Match ]
6992 * The Name in an element's end-tag must match the element type in the
6993 * start-tag.
6994 *
6995 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006996 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006997 if (name == NULL) name = BAD_CAST "unparseable";
6998 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006999 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007000 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007001 }
7002
7003 /*
7004 * SAX: End of Tag
7005 */
7006 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7007 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007008 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007009
Daniel Veillarde57ec792003-09-10 10:50:59 +00007010 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007011 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007012 return;
7013}
7014
7015/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007016 * xmlParseEndTag:
7017 * @ctxt: an XML parser context
7018 *
7019 * parse an end of tag
7020 *
7021 * [42] ETag ::= '</' Name S? '>'
7022 *
7023 * With namespace
7024 *
7025 * [NS 9] ETag ::= '</' QName S? '>'
7026 */
7027
7028void
7029xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007030 xmlParseEndTag1(ctxt, 0);
7031}
Daniel Veillard81273902003-09-30 00:43:48 +00007032#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007033
7034/************************************************************************
7035 * *
7036 * SAX 2 specific operations *
7037 * *
7038 ************************************************************************/
7039
7040static const xmlChar *
7041xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7042 int len = 0, l;
7043 int c;
7044 int count = 0;
7045
7046 /*
7047 * Handler for more complex cases
7048 */
7049 GROW;
7050 c = CUR_CHAR(l);
7051 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007052 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007053 return(NULL);
7054 }
7055
7056 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
Daniel Veillard73b013f2003-09-30 12:36:01 +00007057 ((xmlIsLetter(c)) || (xmlIsDigit(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007058 (c == '.') || (c == '-') || (c == '_') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00007059 (xmlIsCombining(c)) ||
7060 (xmlIsExtender(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007061 if (count++ > 100) {
7062 count = 0;
7063 GROW;
7064 }
7065 len += l;
7066 NEXTL(l);
7067 c = CUR_CHAR(l);
7068 }
7069 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7070}
7071
7072/*
7073 * xmlGetNamespace:
7074 * @ctxt: an XML parser context
7075 * @prefix: the prefix to lookup
7076 *
7077 * Lookup the namespace name for the @prefix (which ca be NULL)
7078 * The prefix must come from the @ctxt->dict dictionnary
7079 *
7080 * Returns the namespace name or NULL if not bound
7081 */
7082static const xmlChar *
7083xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7084 int i;
7085
Daniel Veillarde57ec792003-09-10 10:50:59 +00007086 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007087 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007088 if (ctxt->nsTab[i] == prefix) {
7089 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7090 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007091 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007092 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007093 return(NULL);
7094}
7095
7096/**
7097 * xmlParseNCName:
7098 * @ctxt: an XML parser context
7099 *
7100 * parse an XML name.
7101 *
7102 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7103 * CombiningChar | Extender
7104 *
7105 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7106 *
7107 * Returns the Name parsed or NULL
7108 */
7109
7110static const xmlChar *
7111xmlParseNCName(xmlParserCtxtPtr ctxt) {
7112 const xmlChar *in;
7113 const xmlChar *ret;
7114 int count = 0;
7115
7116 /*
7117 * Accelerator for simple ASCII names
7118 */
7119 in = ctxt->input->cur;
7120 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7121 ((*in >= 0x41) && (*in <= 0x5A)) ||
7122 (*in == '_')) {
7123 in++;
7124 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7125 ((*in >= 0x41) && (*in <= 0x5A)) ||
7126 ((*in >= 0x30) && (*in <= 0x39)) ||
7127 (*in == '_') || (*in == '-') ||
7128 (*in == '.'))
7129 in++;
7130 if ((*in > 0) && (*in < 0x80)) {
7131 count = in - ctxt->input->cur;
7132 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7133 ctxt->input->cur = in;
7134 ctxt->nbChars += count;
7135 ctxt->input->col += count;
7136 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007137 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007138 }
7139 return(ret);
7140 }
7141 }
7142 return(xmlParseNCNameComplex(ctxt));
7143}
7144
7145/**
7146 * xmlParseQName:
7147 * @ctxt: an XML parser context
7148 * @prefix: pointer to store the prefix part
7149 *
7150 * parse an XML Namespace QName
7151 *
7152 * [6] QName ::= (Prefix ':')? LocalPart
7153 * [7] Prefix ::= NCName
7154 * [8] LocalPart ::= NCName
7155 *
7156 * Returns the Name parsed or NULL
7157 */
7158
7159static const xmlChar *
7160xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7161 const xmlChar *l, *p;
7162
7163 GROW;
7164
7165 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007166 if (l == NULL) {
7167 if (CUR == ':') {
7168 l = xmlParseName(ctxt);
7169 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007170 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7171 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007172 *prefix = NULL;
7173 return(l);
7174 }
7175 }
7176 return(NULL);
7177 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007178 if (CUR == ':') {
7179 NEXT;
7180 p = l;
7181 l = xmlParseNCName(ctxt);
7182 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007183 xmlChar *tmp;
7184
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007185 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7186 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007187 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7188 p = xmlDictLookup(ctxt->dict, tmp, -1);
7189 if (tmp != NULL) xmlFree(tmp);
7190 *prefix = NULL;
7191 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007192 }
7193 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007194 xmlChar *tmp;
7195
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007196 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7197 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007198 NEXT;
7199 tmp = (xmlChar *) xmlParseName(ctxt);
7200 if (tmp != NULL) {
7201 tmp = xmlBuildQName(tmp, l, NULL, 0);
7202 l = xmlDictLookup(ctxt->dict, tmp, -1);
7203 if (tmp != NULL) xmlFree(tmp);
7204 *prefix = p;
7205 return(l);
7206 }
7207 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7208 l = xmlDictLookup(ctxt->dict, tmp, -1);
7209 if (tmp != NULL) xmlFree(tmp);
7210 *prefix = p;
7211 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007212 }
7213 *prefix = p;
7214 } else
7215 *prefix = NULL;
7216 return(l);
7217}
7218
7219/**
7220 * xmlParseQNameAndCompare:
7221 * @ctxt: an XML parser context
7222 * @name: the localname
7223 * @prefix: the prefix, if any.
7224 *
7225 * parse an XML name and compares for match
7226 * (specialized for endtag parsing)
7227 *
7228 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7229 * and the name for mismatch
7230 */
7231
7232static const xmlChar *
7233xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7234 xmlChar const *prefix) {
7235 const xmlChar *cmp = name;
7236 const xmlChar *in;
7237 const xmlChar *ret;
7238 const xmlChar *prefix2;
7239
7240 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7241
7242 GROW;
7243 in = ctxt->input->cur;
7244
7245 cmp = prefix;
7246 while (*in != 0 && *in == *cmp) {
7247 ++in;
7248 ++cmp;
7249 }
7250 if ((*cmp == 0) && (*in == ':')) {
7251 in++;
7252 cmp = name;
7253 while (*in != 0 && *in == *cmp) {
7254 ++in;
7255 ++cmp;
7256 }
7257 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
7258 /* success */
7259 ctxt->input->cur = in;
7260 return((const xmlChar*) 1);
7261 }
7262 }
7263 /*
7264 * all strings coms from the dictionary, equality can be done directly
7265 */
7266 ret = xmlParseQName (ctxt, &prefix2);
7267 if ((ret == name) && (prefix == prefix2))
7268 return((const xmlChar*) 1);
7269 return ret;
7270}
7271
7272/**
7273 * xmlParseAttValueInternal:
7274 * @ctxt: an XML parser context
7275 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007276 * @alloc: whether the attribute was reallocated as a new string
7277 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007278 *
7279 * parse a value for an attribute.
7280 * NOTE: if no normalization is needed, the routine will return pointers
7281 * directly from the data buffer.
7282 *
7283 * 3.3.3 Attribute-Value Normalization:
7284 * Before the value of an attribute is passed to the application or
7285 * checked for validity, the XML processor must normalize it as follows:
7286 * - a character reference is processed by appending the referenced
7287 * character to the attribute value
7288 * - an entity reference is processed by recursively processing the
7289 * replacement text of the entity
7290 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7291 * appending #x20 to the normalized value, except that only a single
7292 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7293 * parsed entity or the literal entity value of an internal parsed entity
7294 * - other characters are processed by appending them to the normalized value
7295 * If the declared value is not CDATA, then the XML processor must further
7296 * process the normalized attribute value by discarding any leading and
7297 * trailing space (#x20) characters, and by replacing sequences of space
7298 * (#x20) characters by a single space (#x20) character.
7299 * All attributes for which no declaration has been read should be treated
7300 * by a non-validating parser as if declared CDATA.
7301 *
7302 * Returns the AttValue parsed or NULL. The value has to be freed by the
7303 * caller if it was copied, this can be detected by val[*len] == 0.
7304 */
7305
7306static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007307xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7308 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007309{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007310 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007311 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007312 xmlChar *ret = NULL;
7313
7314 GROW;
7315 in = (xmlChar *) CUR_PTR;
7316 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007317 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007318 return (NULL);
7319 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007320 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007321
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007322 /*
7323 * try to handle in this routine the most common case where no
7324 * allocation of a new string is required and where content is
7325 * pure ASCII.
7326 */
7327 limit = *in++;
7328 end = ctxt->input->end;
7329 start = in;
7330 if (in >= end) {
7331 const xmlChar *oldbase = ctxt->input->base;
7332 GROW;
7333 if (oldbase != ctxt->input->base) {
7334 long delta = ctxt->input->base - oldbase;
7335 start = start + delta;
7336 in = in + delta;
7337 }
7338 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007339 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007340 if (normalize) {
7341 /*
7342 * Skip any leading spaces
7343 */
7344 while ((in < end) && (*in != limit) &&
7345 ((*in == 0x20) || (*in == 0x9) ||
7346 (*in == 0xA) || (*in == 0xD))) {
7347 in++;
7348 start = in;
7349 if (in >= end) {
7350 const xmlChar *oldbase = ctxt->input->base;
7351 GROW;
7352 if (oldbase != ctxt->input->base) {
7353 long delta = ctxt->input->base - oldbase;
7354 start = start + delta;
7355 in = in + delta;
7356 }
7357 end = ctxt->input->end;
7358 }
7359 }
7360 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7361 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7362 if ((*in++ == 0x20) && (*in == 0x20)) break;
7363 if (in >= end) {
7364 const xmlChar *oldbase = ctxt->input->base;
7365 GROW;
7366 if (oldbase != ctxt->input->base) {
7367 long delta = ctxt->input->base - oldbase;
7368 start = start + delta;
7369 in = in + delta;
7370 }
7371 end = ctxt->input->end;
7372 }
7373 }
7374 last = in;
7375 /*
7376 * skip the trailing blanks
7377 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007378 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007379 while ((in < end) && (*in != limit) &&
7380 ((*in == 0x20) || (*in == 0x9) ||
7381 (*in == 0xA) || (*in == 0xD))) {
7382 in++;
7383 if (in >= end) {
7384 const xmlChar *oldbase = ctxt->input->base;
7385 GROW;
7386 if (oldbase != ctxt->input->base) {
7387 long delta = ctxt->input->base - oldbase;
7388 start = start + delta;
7389 in = in + delta;
7390 last = last + delta;
7391 }
7392 end = ctxt->input->end;
7393 }
7394 }
7395 if (*in != limit) goto need_complex;
7396 } else {
7397 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7398 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7399 in++;
7400 if (in >= end) {
7401 const xmlChar *oldbase = ctxt->input->base;
7402 GROW;
7403 if (oldbase != ctxt->input->base) {
7404 long delta = ctxt->input->base - oldbase;
7405 start = start + delta;
7406 in = in + delta;
7407 }
7408 end = ctxt->input->end;
7409 }
7410 }
7411 last = in;
7412 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007413 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007414 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007415 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007416 *len = last - start;
7417 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007418 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007419 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007420 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007421 }
7422 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007423 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007424 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007425need_complex:
7426 if (alloc) *alloc = 1;
7427 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007428}
7429
7430/**
7431 * xmlParseAttribute2:
7432 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007433 * @pref: the element prefix
7434 * @elem: the element name
7435 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007436 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007437 * @len: an int * to save the length of the attribute
7438 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007439 *
7440 * parse an attribute in the new SAX2 framework.
7441 *
7442 * Returns the attribute name, and the value in *value, .
7443 */
7444
7445static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007446xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7447 const xmlChar *pref, const xmlChar *elem,
7448 const xmlChar **prefix, xmlChar **value,
7449 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007450 const xmlChar *name;
7451 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007452 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007453
7454 *value = NULL;
7455 GROW;
7456 name = xmlParseQName(ctxt, prefix);
7457 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007458 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7459 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007460 return(NULL);
7461 }
7462
7463 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007464 * get the type if needed
7465 */
7466 if (ctxt->attsSpecial != NULL) {
7467 int type;
7468
7469 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7470 pref, elem, *prefix, name);
7471 if (type != 0) normalize = 1;
7472 }
7473
7474 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007475 * read the value
7476 */
7477 SKIP_BLANKS;
7478 if (RAW == '=') {
7479 NEXT;
7480 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007481 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007482 ctxt->instate = XML_PARSER_CONTENT;
7483 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007484 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007485 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007486 return(NULL);
7487 }
7488
7489 /*
7490 * Check that xml:lang conforms to the specification
7491 * No more registered as an error, just generate a warning now
7492 * since this was deprecated in XML second edition
7493 */
7494 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7495 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007496 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7497 "Malformed value for xml:lang : %s\n",
7498 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007499 }
7500 }
7501
7502 /*
7503 * Check that xml:space conforms to the specification
7504 */
7505 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7506 if (xmlStrEqual(val, BAD_CAST "default"))
7507 *(ctxt->space) = 0;
7508 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7509 *(ctxt->space) = 1;
7510 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007511 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007512"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7513 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007514 }
7515 }
7516
7517 *value = val;
7518 return(name);
7519}
7520
7521/**
7522 * xmlParseStartTag2:
7523 * @ctxt: an XML parser context
7524 *
7525 * parse a start of tag either for rule element or
7526 * EmptyElement. In both case we don't parse the tag closing chars.
7527 * This routine is called when running SAX2 parsing
7528 *
7529 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7530 *
7531 * [ WFC: Unique Att Spec ]
7532 * No attribute name may appear more than once in the same start-tag or
7533 * empty-element tag.
7534 *
7535 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7536 *
7537 * [ WFC: Unique Att Spec ]
7538 * No attribute name may appear more than once in the same start-tag or
7539 * empty-element tag.
7540 *
7541 * With namespace:
7542 *
7543 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7544 *
7545 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7546 *
7547 * Returns the element name parsed
7548 */
7549
7550static const xmlChar *
7551xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7552 const xmlChar **URI) {
7553 const xmlChar *localname;
7554 const xmlChar *prefix;
7555 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007556 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007557 const xmlChar *nsname;
7558 xmlChar *attvalue;
7559 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007560 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007561 int nratts, nbatts, nbdef;
7562 int i, j, nbNs, attval;
7563 const xmlChar *base;
7564 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007565
7566 if (RAW != '<') return(NULL);
7567 NEXT1;
7568
7569 /*
7570 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7571 * point since the attribute values may be stored as pointers to
7572 * the buffer and calling SHRINK would destroy them !
7573 * The Shrinking is only possible once the full set of attribute
7574 * callbacks have been done.
7575 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007576reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007577 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007578 base = ctxt->input->base;
7579 cur = ctxt->input->cur - ctxt->input->base;
7580 nbatts = 0;
7581 nratts = 0;
7582 nbdef = 0;
7583 nbNs = 0;
7584 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007585
7586 localname = xmlParseQName(ctxt, &prefix);
7587 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007588 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7589 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007590 return(NULL);
7591 }
7592
7593 /*
7594 * Now parse the attributes, it ends up with the ending
7595 *
7596 * (S Attribute)* S?
7597 */
7598 SKIP_BLANKS;
7599 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007600 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007601
7602 while ((RAW != '>') &&
7603 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007604 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007605 const xmlChar *q = CUR_PTR;
7606 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007607 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007608
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007609 attname = xmlParseAttribute2(ctxt, prefix, localname,
7610 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007611 if ((attname != NULL) && (attvalue != NULL)) {
7612 if (len < 0) len = xmlStrlen(attvalue);
7613 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007614 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7615 xmlURIPtr uri;
7616
7617 if (*URL != 0) {
7618 uri = xmlParseURI((const char *) URL);
7619 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007620 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7621 "xmlns: %s not a valid URI\n",
7622 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007623 } else {
7624 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007625 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7626 "xmlns: URI %s is not absolute\n",
7627 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007628 }
7629 xmlFreeURI(uri);
7630 }
7631 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007632 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007633 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007634 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007635 for (j = 1;j <= nbNs;j++)
7636 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7637 break;
7638 if (j <= nbNs)
7639 xmlErrAttributeDup(ctxt, NULL, attname);
7640 else
7641 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007642 if (alloc != 0) xmlFree(attvalue);
7643 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007644 continue;
7645 }
7646 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007647 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7648 xmlURIPtr uri;
7649
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007650 if (attname == ctxt->str_xml) {
7651 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007652 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7653 "xml namespace prefix mapped to wrong URI\n",
7654 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007655 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007656 /*
7657 * Do not keep a namespace definition node
7658 */
7659 if (alloc != 0) xmlFree(attvalue);
7660 SKIP_BLANKS;
7661 continue;
7662 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007663 uri = xmlParseURI((const char *) URL);
7664 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007665 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7666 "xmlns:%s: '%s' is not a valid URI\n",
7667 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007668 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007669 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007670 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7671 "xmlns:%s: URI %s is not absolute\n",
7672 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007673 }
7674 xmlFreeURI(uri);
7675 }
7676
Daniel Veillard0fb18932003-09-07 09:14:37 +00007677 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007678 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007679 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007680 for (j = 1;j <= nbNs;j++)
7681 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7682 break;
7683 if (j <= nbNs)
7684 xmlErrAttributeDup(ctxt, aprefix, attname);
7685 else
7686 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007687 if (alloc != 0) xmlFree(attvalue);
7688 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007689 continue;
7690 }
7691
7692 /*
7693 * Add the pair to atts
7694 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007695 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7696 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007697 if (attvalue[len] == 0)
7698 xmlFree(attvalue);
7699 goto failed;
7700 }
7701 maxatts = ctxt->maxatts;
7702 atts = ctxt->atts;
7703 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007704 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007705 atts[nbatts++] = attname;
7706 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007707 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 atts[nbatts++] = attvalue;
7709 attvalue += len;
7710 atts[nbatts++] = attvalue;
7711 /*
7712 * tag if some deallocation is needed
7713 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007714 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007715 } else {
7716 if ((attvalue != NULL) && (attvalue[len] == 0))
7717 xmlFree(attvalue);
7718 }
7719
7720failed:
7721
7722 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007723 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007724 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7725 break;
7726 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007727 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7728 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007729 }
7730 SKIP_BLANKS;
7731 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7732 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007733 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007734 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007735 break;
7736 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007738 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007739 }
7740
Daniel Veillard0fb18932003-09-07 09:14:37 +00007741 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007742 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007743 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007744 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007745 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7746 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007747 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007748 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007749 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007750 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007751 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007752 /*
7753 * [ WFC: Unique Att Spec ]
7754 * No attribute name may appear more than once in the same
7755 * start-tag or empty-element tag.
7756 * As extended by the Namespace in XML REC.
7757 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007758 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007759 if (atts[i] == atts[j]) {
7760 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007761 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007762 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007763 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007764 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007765 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007766 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007767 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007768 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007769 }
7770 }
7771 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007772 }
7773
7774 /*
7775 * The attributes defaulting
7776 */
7777 if (ctxt->attsDefault != NULL) {
7778 xmlDefAttrsPtr defaults;
7779
7780 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7781 if (defaults != NULL) {
7782 for (i = 0;i < defaults->nbAttrs;i++) {
7783 attname = defaults->values[4 * i];
7784 aprefix = defaults->values[4 * i + 1];
7785
7786 /*
7787 * special work for namespaces defaulted defs
7788 */
7789 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7790 /*
7791 * check that it's not a defined namespace
7792 */
7793 for (j = 1;j <= nbNs;j++)
7794 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7795 break;
7796 if (j <= nbNs) continue;
7797
7798 nsname = xmlGetNamespace(ctxt, NULL);
7799 if (nsname != defaults->values[4 * i + 2]) {
7800 if (nsPush(ctxt, NULL,
7801 defaults->values[4 * i + 2]) > 0)
7802 nbNs++;
7803 }
7804 } else if (aprefix == ctxt->str_xmlns) {
7805 /*
7806 * check that it's not a defined namespace
7807 */
7808 for (j = 1;j <= nbNs;j++)
7809 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7810 break;
7811 if (j <= nbNs) continue;
7812
7813 nsname = xmlGetNamespace(ctxt, attname);
7814 if (nsname != defaults->values[2]) {
7815 if (nsPush(ctxt, attname,
7816 defaults->values[4 * i + 2]) > 0)
7817 nbNs++;
7818 }
7819 } else {
7820 /*
7821 * check that it's not a defined attribute
7822 */
7823 for (j = 0;j < nbatts;j+=5) {
7824 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7825 break;
7826 }
7827 if (j < nbatts) continue;
7828
7829 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7830 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007831 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007832 }
7833 maxatts = ctxt->maxatts;
7834 atts = ctxt->atts;
7835 }
7836 atts[nbatts++] = attname;
7837 atts[nbatts++] = aprefix;
7838 if (aprefix == NULL)
7839 atts[nbatts++] = NULL;
7840 else
7841 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7842 atts[nbatts++] = defaults->values[4 * i + 2];
7843 atts[nbatts++] = defaults->values[4 * i + 3];
7844 nbdef++;
7845 }
7846 }
7847 }
7848 }
7849
7850 nsname = xmlGetNamespace(ctxt, prefix);
7851 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007852 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7853 "Namespace prefix %s on %s is not defined\n",
7854 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007855 }
7856 *pref = prefix;
7857 *URI = nsname;
7858
7859 /*
7860 * SAX: Start of Element !
7861 */
7862 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7863 (!ctxt->disableSAX)) {
7864 if (nbNs > 0)
7865 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7866 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7867 nbatts / 5, nbdef, atts);
7868 else
7869 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7870 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7871 }
7872
7873 /*
7874 * Free up attribute allocated strings if needed
7875 */
7876 if (attval != 0) {
7877 for (i = 3,j = 0; j < nratts;i += 5,j++)
7878 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7879 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007880 }
7881
7882 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007883
7884base_changed:
7885 /*
7886 * the attribute strings are valid iif the base didn't changed
7887 */
7888 if (attval != 0) {
7889 for (i = 3,j = 0; j < nratts;i += 5,j++)
7890 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7891 xmlFree((xmlChar *) atts[i]);
7892 }
7893 ctxt->input->cur = ctxt->input->base + cur;
7894 if (ctxt->wellFormed == 1) {
7895 goto reparse;
7896 }
7897 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007898}
7899
7900/**
7901 * xmlParseEndTag2:
7902 * @ctxt: an XML parser context
7903 * @line: line of the start tag
7904 * @nsNr: number of namespaces on the start tag
7905 *
7906 * parse an end of tag
7907 *
7908 * [42] ETag ::= '</' Name S? '>'
7909 *
7910 * With namespace
7911 *
7912 * [NS 9] ETag ::= '</' QName S? '>'
7913 */
7914
7915static void
7916xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7917 const xmlChar *URI, int line, int nsNr) {
7918 const xmlChar *name;
7919
7920 GROW;
7921 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007922 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007923 return;
7924 }
7925 SKIP(2);
7926
7927 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7928
7929 /*
7930 * We should definitely be at the ending "S? '>'" part
7931 */
7932 GROW;
7933 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007934 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007935 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007936 } else
7937 NEXT1;
7938
7939 /*
7940 * [ WFC: Element Type Match ]
7941 * The Name in an element's end-tag must match the element type in the
7942 * start-tag.
7943 *
7944 */
7945 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007946 if (name == NULL) name = BAD_CAST "unparseable";
7947 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007948 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007949 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007950 }
7951
7952 /*
7953 * SAX: End of Tag
7954 */
7955 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7956 (!ctxt->disableSAX))
7957 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7958
Daniel Veillard0fb18932003-09-07 09:14:37 +00007959 spacePop(ctxt);
7960 if (nsNr != 0)
7961 nsPop(ctxt, nsNr);
7962 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007963}
7964
7965/**
Owen Taylor3473f882001-02-23 17:55:21 +00007966 * xmlParseCDSect:
7967 * @ctxt: an XML parser context
7968 *
7969 * Parse escaped pure raw content.
7970 *
7971 * [18] CDSect ::= CDStart CData CDEnd
7972 *
7973 * [19] CDStart ::= '<![CDATA['
7974 *
7975 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7976 *
7977 * [21] CDEnd ::= ']]>'
7978 */
7979void
7980xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7981 xmlChar *buf = NULL;
7982 int len = 0;
7983 int size = XML_PARSER_BUFFER_SIZE;
7984 int r, rl;
7985 int s, sl;
7986 int cur, l;
7987 int count = 0;
7988
Daniel Veillard8f597c32003-10-06 08:19:27 +00007989 /* Check 2.6.0 was NXT(0) not RAW */
7990 if (memcmp(CUR_PTR, "<![CDATA[", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007991 SKIP(9);
7992 } else
7993 return;
7994
7995 ctxt->instate = XML_PARSER_CDATA_SECTION;
7996 r = CUR_CHAR(rl);
Daniel Veillard73b013f2003-09-30 12:36:01 +00007997 if (!xmlIsChar(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007998 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007999 ctxt->instate = XML_PARSER_CONTENT;
8000 return;
8001 }
8002 NEXTL(rl);
8003 s = CUR_CHAR(sl);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008004 if (!xmlIsChar(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008005 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008006 ctxt->instate = XML_PARSER_CONTENT;
8007 return;
8008 }
8009 NEXTL(sl);
8010 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008011 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008012 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008013 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008014 return;
8015 }
Daniel Veillard73b013f2003-09-30 12:36:01 +00008016 while (xmlIsChar(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008017 ((r != ']') || (s != ']') || (cur != '>'))) {
8018 if (len + 5 >= size) {
8019 size *= 2;
8020 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8021 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008022 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008023 return;
8024 }
8025 }
8026 COPY_BUF(rl,buf,len,r);
8027 r = s;
8028 rl = sl;
8029 s = cur;
8030 sl = l;
8031 count++;
8032 if (count > 50) {
8033 GROW;
8034 count = 0;
8035 }
8036 NEXTL(l);
8037 cur = CUR_CHAR(l);
8038 }
8039 buf[len] = 0;
8040 ctxt->instate = XML_PARSER_CONTENT;
8041 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008042 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008043 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008044 xmlFree(buf);
8045 return;
8046 }
8047 NEXTL(l);
8048
8049 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008050 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008051 */
8052 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8053 if (ctxt->sax->cdataBlock != NULL)
8054 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008055 else if (ctxt->sax->characters != NULL)
8056 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008057 }
8058 xmlFree(buf);
8059}
8060
8061/**
8062 * xmlParseContent:
8063 * @ctxt: an XML parser context
8064 *
8065 * Parse a content:
8066 *
8067 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8068 */
8069
8070void
8071xmlParseContent(xmlParserCtxtPtr ctxt) {
8072 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008073 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008074 ((RAW != '<') || (NXT(1) != '/'))) {
8075 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008076 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008077 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008078
8079 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008080 * First case : a Processing Instruction.
8081 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008082 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008083 xmlParsePI(ctxt);
8084 }
8085
8086 /*
8087 * Second case : a CDSection
8088 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008089 /* 2.6.0 test was *cur not RAW */
8090 else if (memcmp(CUR_PTR, "<![CDATA[", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008091 xmlParseCDSect(ctxt);
8092 }
8093
8094 /*
8095 * Third case : a comment
8096 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008097 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008098 (NXT(2) == '-') && (NXT(3) == '-')) {
8099 xmlParseComment(ctxt);
8100 ctxt->instate = XML_PARSER_CONTENT;
8101 }
8102
8103 /*
8104 * Fourth case : a sub-element.
8105 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008106 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008107 xmlParseElement(ctxt);
8108 }
8109
8110 /*
8111 * Fifth case : a reference. If if has not been resolved,
8112 * parsing returns it's Name, create the node
8113 */
8114
Daniel Veillard21a0f912001-02-25 19:54:14 +00008115 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008116 xmlParseReference(ctxt);
8117 }
8118
8119 /*
8120 * Last case, text. Note that References are handled directly.
8121 */
8122 else {
8123 xmlParseCharData(ctxt, 0);
8124 }
8125
8126 GROW;
8127 /*
8128 * Pop-up of finished entities.
8129 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008130 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008131 xmlPopInput(ctxt);
8132 SHRINK;
8133
Daniel Veillardfdc91562002-07-01 21:52:03 +00008134 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008135 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8136 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008137 ctxt->instate = XML_PARSER_EOF;
8138 break;
8139 }
8140 }
8141}
8142
8143/**
8144 * xmlParseElement:
8145 * @ctxt: an XML parser context
8146 *
8147 * parse an XML element, this is highly recursive
8148 *
8149 * [39] element ::= EmptyElemTag | STag content ETag
8150 *
8151 * [ WFC: Element Type Match ]
8152 * The Name in an element's end-tag must match the element type in the
8153 * start-tag.
8154 *
Owen Taylor3473f882001-02-23 17:55:21 +00008155 */
8156
8157void
8158xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008159 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008160 const xmlChar *prefix;
8161 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008162 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008163 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008164 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008165 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008166
8167 /* Capture start position */
8168 if (ctxt->record_info) {
8169 node_info.begin_pos = ctxt->input->consumed +
8170 (CUR_PTR - ctxt->input->base);
8171 node_info.begin_line = ctxt->input->line;
8172 }
8173
8174 if (ctxt->spaceNr == 0)
8175 spacePush(ctxt, -1);
8176 else
8177 spacePush(ctxt, *ctxt->space);
8178
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008179 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008180#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008181 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008182#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008183 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008184#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008185 else
8186 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008187#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008188 if (name == NULL) {
8189 spacePop(ctxt);
8190 return;
8191 }
8192 namePush(ctxt, name);
8193 ret = ctxt->node;
8194
Daniel Veillard4432df22003-09-28 18:58:27 +00008195#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008196 /*
8197 * [ VC: Root Element Type ]
8198 * The Name in the document type declaration must match the element
8199 * type of the root element.
8200 */
8201 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8202 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8203 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008204#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008205
8206 /*
8207 * Check for an Empty Element.
8208 */
8209 if ((RAW == '/') && (NXT(1) == '>')) {
8210 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008211 if (ctxt->sax2) {
8212 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8213 (!ctxt->disableSAX))
8214 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008215#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008216 } else {
8217 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8218 (!ctxt->disableSAX))
8219 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008220#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008221 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008222 namePop(ctxt);
8223 spacePop(ctxt);
8224 if (nsNr != ctxt->nsNr)
8225 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008226 if ( ret != NULL && ctxt->record_info ) {
8227 node_info.end_pos = ctxt->input->consumed +
8228 (CUR_PTR - ctxt->input->base);
8229 node_info.end_line = ctxt->input->line;
8230 node_info.node = ret;
8231 xmlParserAddNodeInfo(ctxt, &node_info);
8232 }
8233 return;
8234 }
8235 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008236 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008237 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008238 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8239 "Couldn't find end of Start Tag %s line %d\n",
8240 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008241
8242 /*
8243 * end of parsing of this node.
8244 */
8245 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008246 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008247 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008248 if (nsNr != ctxt->nsNr)
8249 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008250
8251 /*
8252 * Capture end position and add node
8253 */
8254 if ( ret != NULL && ctxt->record_info ) {
8255 node_info.end_pos = ctxt->input->consumed +
8256 (CUR_PTR - ctxt->input->base);
8257 node_info.end_line = ctxt->input->line;
8258 node_info.node = ret;
8259 xmlParserAddNodeInfo(ctxt, &node_info);
8260 }
8261 return;
8262 }
8263
8264 /*
8265 * Parse the content of the element:
8266 */
8267 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008268 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008269 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
8270 "Premature end of data in tag %s line %d\n"
8271 "Couldn't find end of Start Tag %s line %d\n",
8272 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008273
8274 /*
8275 * end of parsing of this node.
8276 */
8277 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008278 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008279 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008280 if (nsNr != ctxt->nsNr)
8281 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008282 return;
8283 }
8284
8285 /*
8286 * parse the end of tag: '</' should be here.
8287 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008288 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008289 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008290 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008291 }
8292#ifdef LIBXML_SAX1_ENABLED
8293 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008294 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008295#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008296
8297 /*
8298 * Capture end position and add node
8299 */
8300 if ( ret != NULL && ctxt->record_info ) {
8301 node_info.end_pos = ctxt->input->consumed +
8302 (CUR_PTR - ctxt->input->base);
8303 node_info.end_line = ctxt->input->line;
8304 node_info.node = ret;
8305 xmlParserAddNodeInfo(ctxt, &node_info);
8306 }
8307}
8308
8309/**
8310 * xmlParseVersionNum:
8311 * @ctxt: an XML parser context
8312 *
8313 * parse the XML version value.
8314 *
8315 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8316 *
8317 * Returns the string giving the XML version number, or NULL
8318 */
8319xmlChar *
8320xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8321 xmlChar *buf = NULL;
8322 int len = 0;
8323 int size = 10;
8324 xmlChar cur;
8325
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008326 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008327 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008328 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008329 return(NULL);
8330 }
8331 cur = CUR;
8332 while (((cur >= 'a') && (cur <= 'z')) ||
8333 ((cur >= 'A') && (cur <= 'Z')) ||
8334 ((cur >= '0') && (cur <= '9')) ||
8335 (cur == '_') || (cur == '.') ||
8336 (cur == ':') || (cur == '-')) {
8337 if (len + 1 >= size) {
8338 size *= 2;
8339 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8340 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008341 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008342 return(NULL);
8343 }
8344 }
8345 buf[len++] = cur;
8346 NEXT;
8347 cur=CUR;
8348 }
8349 buf[len] = 0;
8350 return(buf);
8351}
8352
8353/**
8354 * xmlParseVersionInfo:
8355 * @ctxt: an XML parser context
8356 *
8357 * parse the XML version.
8358 *
8359 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8360 *
8361 * [25] Eq ::= S? '=' S?
8362 *
8363 * Returns the version string, e.g. "1.0"
8364 */
8365
8366xmlChar *
8367xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8368 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008369
Daniel Veillard8f597c32003-10-06 08:19:27 +00008370 if (memcmp(CUR_PTR, "version", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008371 SKIP(7);
8372 SKIP_BLANKS;
8373 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008374 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008375 return(NULL);
8376 }
8377 NEXT;
8378 SKIP_BLANKS;
8379 if (RAW == '"') {
8380 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008381 version = xmlParseVersionNum(ctxt);
8382 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008383 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008384 } else
8385 NEXT;
8386 } else if (RAW == '\''){
8387 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008388 version = xmlParseVersionNum(ctxt);
8389 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008390 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008391 } else
8392 NEXT;
8393 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008394 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008395 }
8396 }
8397 return(version);
8398}
8399
8400/**
8401 * xmlParseEncName:
8402 * @ctxt: an XML parser context
8403 *
8404 * parse the XML encoding name
8405 *
8406 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8407 *
8408 * Returns the encoding name value or NULL
8409 */
8410xmlChar *
8411xmlParseEncName(xmlParserCtxtPtr ctxt) {
8412 xmlChar *buf = NULL;
8413 int len = 0;
8414 int size = 10;
8415 xmlChar cur;
8416
8417 cur = CUR;
8418 if (((cur >= 'a') && (cur <= 'z')) ||
8419 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008420 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008421 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008422 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008423 return(NULL);
8424 }
8425
8426 buf[len++] = cur;
8427 NEXT;
8428 cur = CUR;
8429 while (((cur >= 'a') && (cur <= 'z')) ||
8430 ((cur >= 'A') && (cur <= 'Z')) ||
8431 ((cur >= '0') && (cur <= '9')) ||
8432 (cur == '.') || (cur == '_') ||
8433 (cur == '-')) {
8434 if (len + 1 >= size) {
8435 size *= 2;
8436 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8437 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008439 return(NULL);
8440 }
8441 }
8442 buf[len++] = cur;
8443 NEXT;
8444 cur = CUR;
8445 if (cur == 0) {
8446 SHRINK;
8447 GROW;
8448 cur = CUR;
8449 }
8450 }
8451 buf[len] = 0;
8452 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008453 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008454 }
8455 return(buf);
8456}
8457
8458/**
8459 * xmlParseEncodingDecl:
8460 * @ctxt: an XML parser context
8461 *
8462 * parse the XML encoding declaration
8463 *
8464 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8465 *
8466 * this setups the conversion filters.
8467 *
8468 * Returns the encoding value or NULL
8469 */
8470
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008471const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008472xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8473 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008474
8475 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008476 if (memcmp(CUR_PTR, "encoding", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008477 SKIP(8);
8478 SKIP_BLANKS;
8479 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008480 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008481 return(NULL);
8482 }
8483 NEXT;
8484 SKIP_BLANKS;
8485 if (RAW == '"') {
8486 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008487 encoding = xmlParseEncName(ctxt);
8488 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008489 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008490 } else
8491 NEXT;
8492 } else if (RAW == '\''){
8493 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008494 encoding = xmlParseEncName(ctxt);
8495 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008496 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008497 } else
8498 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008499 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008500 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008501 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008502 /*
8503 * UTF-16 encoding stwich has already taken place at this stage,
8504 * more over the little-endian/big-endian selection is already done
8505 */
8506 if ((encoding != NULL) &&
8507 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8508 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008509 if (ctxt->encoding != NULL)
8510 xmlFree((xmlChar *) ctxt->encoding);
8511 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008512 }
8513 /*
8514 * UTF-8 encoding is handled natively
8515 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008516 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008517 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8518 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008519 if (ctxt->encoding != NULL)
8520 xmlFree((xmlChar *) ctxt->encoding);
8521 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008522 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008523 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008524 xmlCharEncodingHandlerPtr handler;
8525
8526 if (ctxt->input->encoding != NULL)
8527 xmlFree((xmlChar *) ctxt->input->encoding);
8528 ctxt->input->encoding = encoding;
8529
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008530 handler = xmlFindCharEncodingHandler((const char *) encoding);
8531 if (handler != NULL) {
8532 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008533 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008534 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008535 "Unsupported encoding %s\n", encoding);
8536 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008537 }
8538 }
8539 }
8540 return(encoding);
8541}
8542
8543/**
8544 * xmlParseSDDecl:
8545 * @ctxt: an XML parser context
8546 *
8547 * parse the XML standalone declaration
8548 *
8549 * [32] SDDecl ::= S 'standalone' Eq
8550 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8551 *
8552 * [ VC: Standalone Document Declaration ]
8553 * TODO The standalone document declaration must have the value "no"
8554 * if any external markup declarations contain declarations of:
8555 * - attributes with default values, if elements to which these
8556 * attributes apply appear in the document without specifications
8557 * of values for these attributes, or
8558 * - entities (other than amp, lt, gt, apos, quot), if references
8559 * to those entities appear in the document, or
8560 * - attributes with values subject to normalization, where the
8561 * attribute appears in the document with a value which will change
8562 * as a result of normalization, or
8563 * - element types with element content, if white space occurs directly
8564 * within any instance of those types.
8565 *
8566 * Returns 1 if standalone, 0 otherwise
8567 */
8568
8569int
8570xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8571 int standalone = -1;
8572
8573 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008574 if (memcmp(CUR_PTR, "standalone", 10) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008575 SKIP(10);
8576 SKIP_BLANKS;
8577 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008578 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008579 return(standalone);
8580 }
8581 NEXT;
8582 SKIP_BLANKS;
8583 if (RAW == '\''){
8584 NEXT;
8585 if ((RAW == 'n') && (NXT(1) == 'o')) {
8586 standalone = 0;
8587 SKIP(2);
8588 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8589 (NXT(2) == 's')) {
8590 standalone = 1;
8591 SKIP(3);
8592 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008593 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008594 }
8595 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008596 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008597 } else
8598 NEXT;
8599 } else if (RAW == '"'){
8600 NEXT;
8601 if ((RAW == 'n') && (NXT(1) == 'o')) {
8602 standalone = 0;
8603 SKIP(2);
8604 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8605 (NXT(2) == 's')) {
8606 standalone = 1;
8607 SKIP(3);
8608 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008609 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008610 }
8611 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008612 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008613 } else
8614 NEXT;
8615 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008616 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008617 }
8618 }
8619 return(standalone);
8620}
8621
8622/**
8623 * xmlParseXMLDecl:
8624 * @ctxt: an XML parser context
8625 *
8626 * parse an XML declaration header
8627 *
8628 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8629 */
8630
8631void
8632xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8633 xmlChar *version;
8634
8635 /*
8636 * We know that '<?xml' is here.
8637 */
8638 SKIP(5);
8639
8640 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008641 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8642 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008643 }
8644 SKIP_BLANKS;
8645
8646 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008647 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008648 */
8649 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008650 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008651 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008652 } else {
8653 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8654 /*
8655 * TODO: Blueberry should be detected here
8656 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008657 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8658 "Unsupported version '%s'\n",
8659 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008660 }
8661 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008662 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008663 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008664 }
Owen Taylor3473f882001-02-23 17:55:21 +00008665
8666 /*
8667 * We may have the encoding declaration
8668 */
8669 if (!IS_BLANK(RAW)) {
8670 if ((RAW == '?') && (NXT(1) == '>')) {
8671 SKIP(2);
8672 return;
8673 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008674 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008675 }
8676 xmlParseEncodingDecl(ctxt);
8677 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8678 /*
8679 * The XML REC instructs us to stop parsing right here
8680 */
8681 return;
8682 }
8683
8684 /*
8685 * We may have the standalone status.
8686 */
8687 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
8688 if ((RAW == '?') && (NXT(1) == '>')) {
8689 SKIP(2);
8690 return;
8691 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008693 }
8694 SKIP_BLANKS;
8695 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8696
8697 SKIP_BLANKS;
8698 if ((RAW == '?') && (NXT(1) == '>')) {
8699 SKIP(2);
8700 } else if (RAW == '>') {
8701 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008702 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008703 NEXT;
8704 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008705 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008706 MOVETO_ENDTAG(CUR_PTR);
8707 NEXT;
8708 }
8709}
8710
8711/**
8712 * xmlParseMisc:
8713 * @ctxt: an XML parser context
8714 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008715 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008716 *
8717 * [27] Misc ::= Comment | PI | S
8718 */
8719
8720void
8721xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008722 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillard8f597c32003-10-06 08:19:27 +00008723 (memcmp(CUR_PTR, "<!--", 4) == 0) ||
Daniel Veillard561b7f82002-03-20 21:55:57 +00008724 IS_BLANK(CUR)) {
8725 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008726 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00008727 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008728 NEXT;
8729 } else
8730 xmlParseComment(ctxt);
8731 }
8732}
8733
8734/**
8735 * xmlParseDocument:
8736 * @ctxt: an XML parser context
8737 *
8738 * parse an XML document (and build a tree if using the standard SAX
8739 * interface).
8740 *
8741 * [1] document ::= prolog element Misc*
8742 *
8743 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8744 *
8745 * Returns 0, -1 in case of error. the parser context is augmented
8746 * as a result of the parsing.
8747 */
8748
8749int
8750xmlParseDocument(xmlParserCtxtPtr ctxt) {
8751 xmlChar start[4];
8752 xmlCharEncoding enc;
8753
8754 xmlInitParser();
8755
8756 GROW;
8757
8758 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008759 * SAX: detecting the level.
8760 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008761 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008762
8763 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008764 * SAX: beginning of the document processing.
8765 */
8766 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8767 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8768
Daniel Veillard50f34372001-08-03 12:06:36 +00008769 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008770 /*
8771 * Get the 4 first bytes and decode the charset
8772 * if enc != XML_CHAR_ENCODING_NONE
8773 * plug some encoding conversion routines.
8774 */
8775 start[0] = RAW;
8776 start[1] = NXT(1);
8777 start[2] = NXT(2);
8778 start[3] = NXT(3);
8779 enc = xmlDetectCharEncoding(start, 4);
8780 if (enc != XML_CHAR_ENCODING_NONE) {
8781 xmlSwitchEncoding(ctxt, enc);
8782 }
Owen Taylor3473f882001-02-23 17:55:21 +00008783 }
8784
8785
8786 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008787 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008788 }
8789
8790 /*
8791 * Check for the XMLDecl in the Prolog.
8792 */
8793 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008794 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008795
8796 /*
8797 * Note that we will switch encoding on the fly.
8798 */
8799 xmlParseXMLDecl(ctxt);
8800 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8801 /*
8802 * The XML REC instructs us to stop parsing right here
8803 */
8804 return(-1);
8805 }
8806 ctxt->standalone = ctxt->input->standalone;
8807 SKIP_BLANKS;
8808 } else {
8809 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8810 }
8811 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8812 ctxt->sax->startDocument(ctxt->userData);
8813
8814 /*
8815 * The Misc part of the Prolog
8816 */
8817 GROW;
8818 xmlParseMisc(ctxt);
8819
8820 /*
8821 * Then possibly doc type declaration(s) and more Misc
8822 * (doctypedecl Misc*)?
8823 */
8824 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008825 if (memcmp(CUR_PTR, "<!DOCTYPE", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008826
8827 ctxt->inSubset = 1;
8828 xmlParseDocTypeDecl(ctxt);
8829 if (RAW == '[') {
8830 ctxt->instate = XML_PARSER_DTD;
8831 xmlParseInternalSubset(ctxt);
8832 }
8833
8834 /*
8835 * Create and update the external subset.
8836 */
8837 ctxt->inSubset = 2;
8838 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8839 (!ctxt->disableSAX))
8840 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8841 ctxt->extSubSystem, ctxt->extSubURI);
8842 ctxt->inSubset = 0;
8843
8844
8845 ctxt->instate = XML_PARSER_PROLOG;
8846 xmlParseMisc(ctxt);
8847 }
8848
8849 /*
8850 * Time to start parsing the tree itself
8851 */
8852 GROW;
8853 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008854 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8855 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008856 } else {
8857 ctxt->instate = XML_PARSER_CONTENT;
8858 xmlParseElement(ctxt);
8859 ctxt->instate = XML_PARSER_EPILOG;
8860
8861
8862 /*
8863 * The Misc part at the end
8864 */
8865 xmlParseMisc(ctxt);
8866
Daniel Veillard561b7f82002-03-20 21:55:57 +00008867 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008868 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008869 }
8870 ctxt->instate = XML_PARSER_EOF;
8871 }
8872
8873 /*
8874 * SAX: end of the document processing.
8875 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008876 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008877 ctxt->sax->endDocument(ctxt->userData);
8878
Daniel Veillard5997aca2002-03-18 18:36:20 +00008879 /*
8880 * Remove locally kept entity definitions if the tree was not built
8881 */
8882 if ((ctxt->myDoc != NULL) &&
8883 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8884 xmlFreeDoc(ctxt->myDoc);
8885 ctxt->myDoc = NULL;
8886 }
8887
Daniel Veillardc7612992002-02-17 22:47:37 +00008888 if (! ctxt->wellFormed) {
8889 ctxt->valid = 0;
8890 return(-1);
8891 }
Owen Taylor3473f882001-02-23 17:55:21 +00008892 return(0);
8893}
8894
8895/**
8896 * xmlParseExtParsedEnt:
8897 * @ctxt: an XML parser context
8898 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008899 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008900 * An external general parsed entity is well-formed if it matches the
8901 * production labeled extParsedEnt.
8902 *
8903 * [78] extParsedEnt ::= TextDecl? content
8904 *
8905 * Returns 0, -1 in case of error. the parser context is augmented
8906 * as a result of the parsing.
8907 */
8908
8909int
8910xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8911 xmlChar start[4];
8912 xmlCharEncoding enc;
8913
8914 xmlDefaultSAXHandlerInit();
8915
Daniel Veillard309f81d2003-09-23 09:02:53 +00008916 xmlDetectSAX2(ctxt);
8917
Owen Taylor3473f882001-02-23 17:55:21 +00008918 GROW;
8919
8920 /*
8921 * SAX: beginning of the document processing.
8922 */
8923 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8924 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8925
8926 /*
8927 * Get the 4 first bytes and decode the charset
8928 * if enc != XML_CHAR_ENCODING_NONE
8929 * plug some encoding conversion routines.
8930 */
8931 start[0] = RAW;
8932 start[1] = NXT(1);
8933 start[2] = NXT(2);
8934 start[3] = NXT(3);
8935 enc = xmlDetectCharEncoding(start, 4);
8936 if (enc != XML_CHAR_ENCODING_NONE) {
8937 xmlSwitchEncoding(ctxt, enc);
8938 }
8939
8940
8941 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008942 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008943 }
8944
8945 /*
8946 * Check for the XMLDecl in the Prolog.
8947 */
8948 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008949 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008950
8951 /*
8952 * Note that we will switch encoding on the fly.
8953 */
8954 xmlParseXMLDecl(ctxt);
8955 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8956 /*
8957 * The XML REC instructs us to stop parsing right here
8958 */
8959 return(-1);
8960 }
8961 SKIP_BLANKS;
8962 } else {
8963 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8964 }
8965 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8966 ctxt->sax->startDocument(ctxt->userData);
8967
8968 /*
8969 * Doing validity checking on chunk doesn't make sense
8970 */
8971 ctxt->instate = XML_PARSER_CONTENT;
8972 ctxt->validate = 0;
8973 ctxt->loadsubset = 0;
8974 ctxt->depth = 0;
8975
8976 xmlParseContent(ctxt);
8977
8978 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008979 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008980 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008981 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008982 }
8983
8984 /*
8985 * SAX: end of the document processing.
8986 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008987 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008988 ctxt->sax->endDocument(ctxt->userData);
8989
8990 if (! ctxt->wellFormed) return(-1);
8991 return(0);
8992}
8993
Daniel Veillard73b013f2003-09-30 12:36:01 +00008994#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008995/************************************************************************
8996 * *
8997 * Progressive parsing interfaces *
8998 * *
8999 ************************************************************************/
9000
9001/**
9002 * xmlParseLookupSequence:
9003 * @ctxt: an XML parser context
9004 * @first: the first char to lookup
9005 * @next: the next char to lookup or zero
9006 * @third: the next char to lookup or zero
9007 *
9008 * Try to find if a sequence (first, next, third) or just (first next) or
9009 * (first) is available in the input stream.
9010 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9011 * to avoid rescanning sequences of bytes, it DOES change the state of the
9012 * parser, do not use liberally.
9013 *
9014 * Returns the index to the current parsing point if the full sequence
9015 * is available, -1 otherwise.
9016 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009017static int
Owen Taylor3473f882001-02-23 17:55:21 +00009018xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9019 xmlChar next, xmlChar third) {
9020 int base, len;
9021 xmlParserInputPtr in;
9022 const xmlChar *buf;
9023
9024 in = ctxt->input;
9025 if (in == NULL) return(-1);
9026 base = in->cur - in->base;
9027 if (base < 0) return(-1);
9028 if (ctxt->checkIndex > base)
9029 base = ctxt->checkIndex;
9030 if (in->buf == NULL) {
9031 buf = in->base;
9032 len = in->length;
9033 } else {
9034 buf = in->buf->buffer->content;
9035 len = in->buf->buffer->use;
9036 }
9037 /* take into account the sequence length */
9038 if (third) len -= 2;
9039 else if (next) len --;
9040 for (;base < len;base++) {
9041 if (buf[base] == first) {
9042 if (third != 0) {
9043 if ((buf[base + 1] != next) ||
9044 (buf[base + 2] != third)) continue;
9045 } else if (next != 0) {
9046 if (buf[base + 1] != next) continue;
9047 }
9048 ctxt->checkIndex = 0;
9049#ifdef DEBUG_PUSH
9050 if (next == 0)
9051 xmlGenericError(xmlGenericErrorContext,
9052 "PP: lookup '%c' found at %d\n",
9053 first, base);
9054 else if (third == 0)
9055 xmlGenericError(xmlGenericErrorContext,
9056 "PP: lookup '%c%c' found at %d\n",
9057 first, next, base);
9058 else
9059 xmlGenericError(xmlGenericErrorContext,
9060 "PP: lookup '%c%c%c' found at %d\n",
9061 first, next, third, base);
9062#endif
9063 return(base - (in->cur - in->base));
9064 }
9065 }
9066 ctxt->checkIndex = base;
9067#ifdef DEBUG_PUSH
9068 if (next == 0)
9069 xmlGenericError(xmlGenericErrorContext,
9070 "PP: lookup '%c' failed\n", first);
9071 else if (third == 0)
9072 xmlGenericError(xmlGenericErrorContext,
9073 "PP: lookup '%c%c' failed\n", first, next);
9074 else
9075 xmlGenericError(xmlGenericErrorContext,
9076 "PP: lookup '%c%c%c' failed\n", first, next, third);
9077#endif
9078 return(-1);
9079}
9080
9081/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009082 * xmlParseGetLasts:
9083 * @ctxt: an XML parser context
9084 * @lastlt: pointer to store the last '<' from the input
9085 * @lastgt: pointer to store the last '>' from the input
9086 *
9087 * Lookup the last < and > in the current chunk
9088 */
9089static void
9090xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9091 const xmlChar **lastgt) {
9092 const xmlChar *tmp;
9093
9094 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9095 xmlGenericError(xmlGenericErrorContext,
9096 "Internal error: xmlParseGetLasts\n");
9097 return;
9098 }
9099 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9100 tmp = ctxt->input->end;
9101 tmp--;
9102 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9103 (*tmp != '>')) tmp--;
9104 if (tmp < ctxt->input->base) {
9105 *lastlt = NULL;
9106 *lastgt = NULL;
9107 } else if (*tmp == '<') {
9108 *lastlt = tmp;
9109 tmp--;
9110 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9111 if (tmp < ctxt->input->base)
9112 *lastgt = NULL;
9113 else
9114 *lastgt = tmp;
9115 } else {
9116 *lastgt = tmp;
9117 tmp--;
9118 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9119 if (tmp < ctxt->input->base)
9120 *lastlt = NULL;
9121 else
9122 *lastlt = tmp;
9123 }
9124
9125 } else {
9126 *lastlt = NULL;
9127 *lastgt = NULL;
9128 }
9129}
9130/**
Owen Taylor3473f882001-02-23 17:55:21 +00009131 * xmlParseTryOrFinish:
9132 * @ctxt: an XML parser context
9133 * @terminate: last chunk indicator
9134 *
9135 * Try to progress on parsing
9136 *
9137 * Returns zero if no parsing was possible
9138 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009139static int
Owen Taylor3473f882001-02-23 17:55:21 +00009140xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9141 int ret = 0;
9142 int avail;
9143 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009144 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009145
9146#ifdef DEBUG_PUSH
9147 switch (ctxt->instate) {
9148 case XML_PARSER_EOF:
9149 xmlGenericError(xmlGenericErrorContext,
9150 "PP: try EOF\n"); break;
9151 case XML_PARSER_START:
9152 xmlGenericError(xmlGenericErrorContext,
9153 "PP: try START\n"); break;
9154 case XML_PARSER_MISC:
9155 xmlGenericError(xmlGenericErrorContext,
9156 "PP: try MISC\n");break;
9157 case XML_PARSER_COMMENT:
9158 xmlGenericError(xmlGenericErrorContext,
9159 "PP: try COMMENT\n");break;
9160 case XML_PARSER_PROLOG:
9161 xmlGenericError(xmlGenericErrorContext,
9162 "PP: try PROLOG\n");break;
9163 case XML_PARSER_START_TAG:
9164 xmlGenericError(xmlGenericErrorContext,
9165 "PP: try START_TAG\n");break;
9166 case XML_PARSER_CONTENT:
9167 xmlGenericError(xmlGenericErrorContext,
9168 "PP: try CONTENT\n");break;
9169 case XML_PARSER_CDATA_SECTION:
9170 xmlGenericError(xmlGenericErrorContext,
9171 "PP: try CDATA_SECTION\n");break;
9172 case XML_PARSER_END_TAG:
9173 xmlGenericError(xmlGenericErrorContext,
9174 "PP: try END_TAG\n");break;
9175 case XML_PARSER_ENTITY_DECL:
9176 xmlGenericError(xmlGenericErrorContext,
9177 "PP: try ENTITY_DECL\n");break;
9178 case XML_PARSER_ENTITY_VALUE:
9179 xmlGenericError(xmlGenericErrorContext,
9180 "PP: try ENTITY_VALUE\n");break;
9181 case XML_PARSER_ATTRIBUTE_VALUE:
9182 xmlGenericError(xmlGenericErrorContext,
9183 "PP: try ATTRIBUTE_VALUE\n");break;
9184 case XML_PARSER_DTD:
9185 xmlGenericError(xmlGenericErrorContext,
9186 "PP: try DTD\n");break;
9187 case XML_PARSER_EPILOG:
9188 xmlGenericError(xmlGenericErrorContext,
9189 "PP: try EPILOG\n");break;
9190 case XML_PARSER_PI:
9191 xmlGenericError(xmlGenericErrorContext,
9192 "PP: try PI\n");break;
9193 case XML_PARSER_IGNORE:
9194 xmlGenericError(xmlGenericErrorContext,
9195 "PP: try IGNORE\n");break;
9196 }
9197#endif
9198
Daniel Veillarda880b122003-04-21 21:36:41 +00009199 if (ctxt->input->cur - ctxt->input->base > 4096) {
9200 xmlSHRINK(ctxt);
9201 ctxt->checkIndex = 0;
9202 }
9203 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009204
Daniel Veillarda880b122003-04-21 21:36:41 +00009205 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009206 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9207 return(0);
9208
9209
Owen Taylor3473f882001-02-23 17:55:21 +00009210 /*
9211 * Pop-up of finished entities.
9212 */
9213 while ((RAW == 0) && (ctxt->inputNr > 1))
9214 xmlPopInput(ctxt);
9215
9216 if (ctxt->input ==NULL) break;
9217 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009218 avail = ctxt->input->length -
9219 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009220 else {
9221 /*
9222 * If we are operating on converted input, try to flush
9223 * remainng chars to avoid them stalling in the non-converted
9224 * buffer.
9225 */
9226 if ((ctxt->input->buf->raw != NULL) &&
9227 (ctxt->input->buf->raw->use > 0)) {
9228 int base = ctxt->input->base -
9229 ctxt->input->buf->buffer->content;
9230 int current = ctxt->input->cur - ctxt->input->base;
9231
9232 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9233 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9234 ctxt->input->cur = ctxt->input->base + current;
9235 ctxt->input->end =
9236 &ctxt->input->buf->buffer->content[
9237 ctxt->input->buf->buffer->use];
9238 }
9239 avail = ctxt->input->buf->buffer->use -
9240 (ctxt->input->cur - ctxt->input->base);
9241 }
Owen Taylor3473f882001-02-23 17:55:21 +00009242 if (avail < 1)
9243 goto done;
9244 switch (ctxt->instate) {
9245 case XML_PARSER_EOF:
9246 /*
9247 * Document parsing is done !
9248 */
9249 goto done;
9250 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009251 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9252 xmlChar start[4];
9253 xmlCharEncoding enc;
9254
9255 /*
9256 * Very first chars read from the document flow.
9257 */
9258 if (avail < 4)
9259 goto done;
9260
9261 /*
9262 * Get the 4 first bytes and decode the charset
9263 * if enc != XML_CHAR_ENCODING_NONE
9264 * plug some encoding conversion routines.
9265 */
9266 start[0] = RAW;
9267 start[1] = NXT(1);
9268 start[2] = NXT(2);
9269 start[3] = NXT(3);
9270 enc = xmlDetectCharEncoding(start, 4);
9271 if (enc != XML_CHAR_ENCODING_NONE) {
9272 xmlSwitchEncoding(ctxt, enc);
9273 }
9274 break;
9275 }
Owen Taylor3473f882001-02-23 17:55:21 +00009276
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009277 if (avail < 2)
9278 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009279 cur = ctxt->input->cur[0];
9280 next = ctxt->input->cur[1];
9281 if (cur == 0) {
9282 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9283 ctxt->sax->setDocumentLocator(ctxt->userData,
9284 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009285 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009286 ctxt->instate = XML_PARSER_EOF;
9287#ifdef DEBUG_PUSH
9288 xmlGenericError(xmlGenericErrorContext,
9289 "PP: entering EOF\n");
9290#endif
9291 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9292 ctxt->sax->endDocument(ctxt->userData);
9293 goto done;
9294 }
9295 if ((cur == '<') && (next == '?')) {
9296 /* PI or XML decl */
9297 if (avail < 5) return(ret);
9298 if ((!terminate) &&
9299 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9300 return(ret);
9301 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9302 ctxt->sax->setDocumentLocator(ctxt->userData,
9303 &xmlDefaultSAXLocator);
9304 if ((ctxt->input->cur[2] == 'x') &&
9305 (ctxt->input->cur[3] == 'm') &&
9306 (ctxt->input->cur[4] == 'l') &&
9307 (IS_BLANK(ctxt->input->cur[5]))) {
9308 ret += 5;
9309#ifdef DEBUG_PUSH
9310 xmlGenericError(xmlGenericErrorContext,
9311 "PP: Parsing XML Decl\n");
9312#endif
9313 xmlParseXMLDecl(ctxt);
9314 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9315 /*
9316 * The XML REC instructs us to stop parsing right
9317 * here
9318 */
9319 ctxt->instate = XML_PARSER_EOF;
9320 return(0);
9321 }
9322 ctxt->standalone = ctxt->input->standalone;
9323 if ((ctxt->encoding == NULL) &&
9324 (ctxt->input->encoding != NULL))
9325 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9326 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9327 (!ctxt->disableSAX))
9328 ctxt->sax->startDocument(ctxt->userData);
9329 ctxt->instate = XML_PARSER_MISC;
9330#ifdef DEBUG_PUSH
9331 xmlGenericError(xmlGenericErrorContext,
9332 "PP: entering MISC\n");
9333#endif
9334 } else {
9335 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9336 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9337 (!ctxt->disableSAX))
9338 ctxt->sax->startDocument(ctxt->userData);
9339 ctxt->instate = XML_PARSER_MISC;
9340#ifdef DEBUG_PUSH
9341 xmlGenericError(xmlGenericErrorContext,
9342 "PP: entering MISC\n");
9343#endif
9344 }
9345 } else {
9346 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9347 ctxt->sax->setDocumentLocator(ctxt->userData,
9348 &xmlDefaultSAXLocator);
9349 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9350 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9351 (!ctxt->disableSAX))
9352 ctxt->sax->startDocument(ctxt->userData);
9353 ctxt->instate = XML_PARSER_MISC;
9354#ifdef DEBUG_PUSH
9355 xmlGenericError(xmlGenericErrorContext,
9356 "PP: entering MISC\n");
9357#endif
9358 }
9359 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009360 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009361 const xmlChar *name;
9362 const xmlChar *prefix;
9363 const xmlChar *URI;
9364 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009365
9366 if ((avail < 2) && (ctxt->inputNr == 1))
9367 goto done;
9368 cur = ctxt->input->cur[0];
9369 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009370 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009371 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009372 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9373 ctxt->sax->endDocument(ctxt->userData);
9374 goto done;
9375 }
9376 if (!terminate) {
9377 if (ctxt->progressive) {
9378 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9379 goto done;
9380 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9381 goto done;
9382 }
9383 }
9384 if (ctxt->spaceNr == 0)
9385 spacePush(ctxt, -1);
9386 else
9387 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009388#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009389 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009390#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009391 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009392#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009393 else
9394 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009395#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009396 if (name == NULL) {
9397 spacePop(ctxt);
9398 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009399 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9400 ctxt->sax->endDocument(ctxt->userData);
9401 goto done;
9402 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009403#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009404 /*
9405 * [ VC: Root Element Type ]
9406 * The Name in the document type declaration must match
9407 * the element type of the root element.
9408 */
9409 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9410 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9411 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009412#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009413
9414 /*
9415 * Check for an Empty Element.
9416 */
9417 if ((RAW == '/') && (NXT(1) == '>')) {
9418 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009419
9420 if (ctxt->sax2) {
9421 if ((ctxt->sax != NULL) &&
9422 (ctxt->sax->endElementNs != NULL) &&
9423 (!ctxt->disableSAX))
9424 ctxt->sax->endElementNs(ctxt->userData, name,
9425 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009426#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009427 } else {
9428 if ((ctxt->sax != NULL) &&
9429 (ctxt->sax->endElement != NULL) &&
9430 (!ctxt->disableSAX))
9431 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009432#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009433 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009434 spacePop(ctxt);
9435 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009436 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009437 } else {
9438 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009439 }
9440 break;
9441 }
9442 if (RAW == '>') {
9443 NEXT;
9444 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009445 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009446 "Couldn't find end of Start Tag %s\n",
9447 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009448 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009449 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009450 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009451 if (ctxt->sax2)
9452 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009453#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009454 else
9455 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009456#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009457
Daniel Veillarda880b122003-04-21 21:36:41 +00009458 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009459 break;
9460 }
9461 case XML_PARSER_CONTENT: {
9462 const xmlChar *test;
9463 unsigned int cons;
9464 if ((avail < 2) && (ctxt->inputNr == 1))
9465 goto done;
9466 cur = ctxt->input->cur[0];
9467 next = ctxt->input->cur[1];
9468
9469 test = CUR_PTR;
9470 cons = ctxt->input->consumed;
9471 if ((cur == '<') && (next == '/')) {
9472 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009473 break;
9474 } else if ((cur == '<') && (next == '?')) {
9475 if ((!terminate) &&
9476 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9477 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009478 xmlParsePI(ctxt);
9479 } else if ((cur == '<') && (next != '!')) {
9480 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009481 break;
9482 } else if ((cur == '<') && (next == '!') &&
9483 (ctxt->input->cur[2] == '-') &&
9484 (ctxt->input->cur[3] == '-')) {
9485 if ((!terminate) &&
9486 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9487 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009488 xmlParseComment(ctxt);
9489 ctxt->instate = XML_PARSER_CONTENT;
9490 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9491 (ctxt->input->cur[2] == '[') &&
9492 (ctxt->input->cur[3] == 'C') &&
9493 (ctxt->input->cur[4] == 'D') &&
9494 (ctxt->input->cur[5] == 'A') &&
9495 (ctxt->input->cur[6] == 'T') &&
9496 (ctxt->input->cur[7] == 'A') &&
9497 (ctxt->input->cur[8] == '[')) {
9498 SKIP(9);
9499 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009500 break;
9501 } else if ((cur == '<') && (next == '!') &&
9502 (avail < 9)) {
9503 goto done;
9504 } else if (cur == '&') {
9505 if ((!terminate) &&
9506 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9507 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009508 xmlParseReference(ctxt);
9509 } else {
9510 /* TODO Avoid the extra copy, handle directly !!! */
9511 /*
9512 * Goal of the following test is:
9513 * - minimize calls to the SAX 'character' callback
9514 * when they are mergeable
9515 * - handle an problem for isBlank when we only parse
9516 * a sequence of blank chars and the next one is
9517 * not available to check against '<' presence.
9518 * - tries to homogenize the differences in SAX
9519 * callbacks between the push and pull versions
9520 * of the parser.
9521 */
9522 if ((ctxt->inputNr == 1) &&
9523 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9524 if (!terminate) {
9525 if (ctxt->progressive) {
9526 if ((lastlt == NULL) ||
9527 (ctxt->input->cur > lastlt))
9528 goto done;
9529 } else if (xmlParseLookupSequence(ctxt,
9530 '<', 0, 0) < 0) {
9531 goto done;
9532 }
9533 }
9534 }
9535 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009536 xmlParseCharData(ctxt, 0);
9537 }
9538 /*
9539 * Pop-up of finished entities.
9540 */
9541 while ((RAW == 0) && (ctxt->inputNr > 1))
9542 xmlPopInput(ctxt);
9543 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009544 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9545 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009546 ctxt->instate = XML_PARSER_EOF;
9547 break;
9548 }
9549 break;
9550 }
9551 case XML_PARSER_END_TAG:
9552 if (avail < 2)
9553 goto done;
9554 if (!terminate) {
9555 if (ctxt->progressive) {
9556 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9557 goto done;
9558 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9559 goto done;
9560 }
9561 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009562 if (ctxt->sax2) {
9563 xmlParseEndTag2(ctxt,
9564 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9565 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9566 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9567 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009568 }
9569#ifdef LIBXML_SAX1_ENABLED
9570 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009571 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009572#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009573 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009574 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009575 } else {
9576 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009577 }
9578 break;
9579 case XML_PARSER_CDATA_SECTION: {
9580 /*
9581 * The Push mode need to have the SAX callback for
9582 * cdataBlock merge back contiguous callbacks.
9583 */
9584 int base;
9585
9586 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9587 if (base < 0) {
9588 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9589 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9590 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009591 ctxt->sax->cdataBlock(ctxt->userData,
9592 ctxt->input->cur,
9593 XML_PARSER_BIG_BUFFER_SIZE);
9594 else if (ctxt->sax->characters != NULL)
9595 ctxt->sax->characters(ctxt->userData,
9596 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009597 XML_PARSER_BIG_BUFFER_SIZE);
9598 }
9599 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9600 ctxt->checkIndex = 0;
9601 }
9602 goto done;
9603 } else {
9604 if ((ctxt->sax != NULL) && (base > 0) &&
9605 (!ctxt->disableSAX)) {
9606 if (ctxt->sax->cdataBlock != NULL)
9607 ctxt->sax->cdataBlock(ctxt->userData,
9608 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009609 else if (ctxt->sax->characters != NULL)
9610 ctxt->sax->characters(ctxt->userData,
9611 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009612 }
9613 SKIP(base + 3);
9614 ctxt->checkIndex = 0;
9615 ctxt->instate = XML_PARSER_CONTENT;
9616#ifdef DEBUG_PUSH
9617 xmlGenericError(xmlGenericErrorContext,
9618 "PP: entering CONTENT\n");
9619#endif
9620 }
9621 break;
9622 }
Owen Taylor3473f882001-02-23 17:55:21 +00009623 case XML_PARSER_MISC:
9624 SKIP_BLANKS;
9625 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009626 avail = ctxt->input->length -
9627 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009628 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009629 avail = ctxt->input->buf->buffer->use -
9630 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009631 if (avail < 2)
9632 goto done;
9633 cur = ctxt->input->cur[0];
9634 next = ctxt->input->cur[1];
9635 if ((cur == '<') && (next == '?')) {
9636 if ((!terminate) &&
9637 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9638 goto done;
9639#ifdef DEBUG_PUSH
9640 xmlGenericError(xmlGenericErrorContext,
9641 "PP: Parsing PI\n");
9642#endif
9643 xmlParsePI(ctxt);
9644 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009645 (ctxt->input->cur[2] == '-') &&
9646 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009647 if ((!terminate) &&
9648 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9649 goto done;
9650#ifdef DEBUG_PUSH
9651 xmlGenericError(xmlGenericErrorContext,
9652 "PP: Parsing Comment\n");
9653#endif
9654 xmlParseComment(ctxt);
9655 ctxt->instate = XML_PARSER_MISC;
9656 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009657 (ctxt->input->cur[2] == 'D') &&
9658 (ctxt->input->cur[3] == 'O') &&
9659 (ctxt->input->cur[4] == 'C') &&
9660 (ctxt->input->cur[5] == 'T') &&
9661 (ctxt->input->cur[6] == 'Y') &&
9662 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009663 (ctxt->input->cur[8] == 'E')) {
9664 if ((!terminate) &&
9665 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9666 goto done;
9667#ifdef DEBUG_PUSH
9668 xmlGenericError(xmlGenericErrorContext,
9669 "PP: Parsing internal subset\n");
9670#endif
9671 ctxt->inSubset = 1;
9672 xmlParseDocTypeDecl(ctxt);
9673 if (RAW == '[') {
9674 ctxt->instate = XML_PARSER_DTD;
9675#ifdef DEBUG_PUSH
9676 xmlGenericError(xmlGenericErrorContext,
9677 "PP: entering DTD\n");
9678#endif
9679 } else {
9680 /*
9681 * Create and update the external subset.
9682 */
9683 ctxt->inSubset = 2;
9684 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9685 (ctxt->sax->externalSubset != NULL))
9686 ctxt->sax->externalSubset(ctxt->userData,
9687 ctxt->intSubName, ctxt->extSubSystem,
9688 ctxt->extSubURI);
9689 ctxt->inSubset = 0;
9690 ctxt->instate = XML_PARSER_PROLOG;
9691#ifdef DEBUG_PUSH
9692 xmlGenericError(xmlGenericErrorContext,
9693 "PP: entering PROLOG\n");
9694#endif
9695 }
9696 } else if ((cur == '<') && (next == '!') &&
9697 (avail < 9)) {
9698 goto done;
9699 } else {
9700 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009701 ctxt->progressive = 1;
9702 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009703#ifdef DEBUG_PUSH
9704 xmlGenericError(xmlGenericErrorContext,
9705 "PP: entering START_TAG\n");
9706#endif
9707 }
9708 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009709 case XML_PARSER_PROLOG:
9710 SKIP_BLANKS;
9711 if (ctxt->input->buf == NULL)
9712 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9713 else
9714 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9715 if (avail < 2)
9716 goto done;
9717 cur = ctxt->input->cur[0];
9718 next = ctxt->input->cur[1];
9719 if ((cur == '<') && (next == '?')) {
9720 if ((!terminate) &&
9721 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9722 goto done;
9723#ifdef DEBUG_PUSH
9724 xmlGenericError(xmlGenericErrorContext,
9725 "PP: Parsing PI\n");
9726#endif
9727 xmlParsePI(ctxt);
9728 } else if ((cur == '<') && (next == '!') &&
9729 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9730 if ((!terminate) &&
9731 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9732 goto done;
9733#ifdef DEBUG_PUSH
9734 xmlGenericError(xmlGenericErrorContext,
9735 "PP: Parsing Comment\n");
9736#endif
9737 xmlParseComment(ctxt);
9738 ctxt->instate = XML_PARSER_PROLOG;
9739 } else if ((cur == '<') && (next == '!') &&
9740 (avail < 4)) {
9741 goto done;
9742 } else {
9743 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009744 ctxt->progressive = 1;
9745 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009746#ifdef DEBUG_PUSH
9747 xmlGenericError(xmlGenericErrorContext,
9748 "PP: entering START_TAG\n");
9749#endif
9750 }
9751 break;
9752 case XML_PARSER_EPILOG:
9753 SKIP_BLANKS;
9754 if (ctxt->input->buf == NULL)
9755 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9756 else
9757 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9758 if (avail < 2)
9759 goto done;
9760 cur = ctxt->input->cur[0];
9761 next = ctxt->input->cur[1];
9762 if ((cur == '<') && (next == '?')) {
9763 if ((!terminate) &&
9764 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9765 goto done;
9766#ifdef DEBUG_PUSH
9767 xmlGenericError(xmlGenericErrorContext,
9768 "PP: Parsing PI\n");
9769#endif
9770 xmlParsePI(ctxt);
9771 ctxt->instate = XML_PARSER_EPILOG;
9772 } else if ((cur == '<') && (next == '!') &&
9773 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9774 if ((!terminate) &&
9775 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9776 goto done;
9777#ifdef DEBUG_PUSH
9778 xmlGenericError(xmlGenericErrorContext,
9779 "PP: Parsing Comment\n");
9780#endif
9781 xmlParseComment(ctxt);
9782 ctxt->instate = XML_PARSER_EPILOG;
9783 } else if ((cur == '<') && (next == '!') &&
9784 (avail < 4)) {
9785 goto done;
9786 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009787 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009788 ctxt->instate = XML_PARSER_EOF;
9789#ifdef DEBUG_PUSH
9790 xmlGenericError(xmlGenericErrorContext,
9791 "PP: entering EOF\n");
9792#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009793 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009794 ctxt->sax->endDocument(ctxt->userData);
9795 goto done;
9796 }
9797 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009798 case XML_PARSER_DTD: {
9799 /*
9800 * Sorry but progressive parsing of the internal subset
9801 * is not expected to be supported. We first check that
9802 * the full content of the internal subset is available and
9803 * the parsing is launched only at that point.
9804 * Internal subset ends up with "']' S? '>'" in an unescaped
9805 * section and not in a ']]>' sequence which are conditional
9806 * sections (whoever argued to keep that crap in XML deserve
9807 * a place in hell !).
9808 */
9809 int base, i;
9810 xmlChar *buf;
9811 xmlChar quote = 0;
9812
9813 base = ctxt->input->cur - ctxt->input->base;
9814 if (base < 0) return(0);
9815 if (ctxt->checkIndex > base)
9816 base = ctxt->checkIndex;
9817 buf = ctxt->input->buf->buffer->content;
9818 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9819 base++) {
9820 if (quote != 0) {
9821 if (buf[base] == quote)
9822 quote = 0;
9823 continue;
9824 }
9825 if (buf[base] == '"') {
9826 quote = '"';
9827 continue;
9828 }
9829 if (buf[base] == '\'') {
9830 quote = '\'';
9831 continue;
9832 }
9833 if (buf[base] == ']') {
9834 if ((unsigned int) base +1 >=
9835 ctxt->input->buf->buffer->use)
9836 break;
9837 if (buf[base + 1] == ']') {
9838 /* conditional crap, skip both ']' ! */
9839 base++;
9840 continue;
9841 }
9842 for (i = 0;
9843 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9844 i++) {
9845 if (buf[base + i] == '>')
9846 goto found_end_int_subset;
9847 }
9848 break;
9849 }
9850 }
9851 /*
9852 * We didn't found the end of the Internal subset
9853 */
9854 if (quote == 0)
9855 ctxt->checkIndex = base;
9856#ifdef DEBUG_PUSH
9857 if (next == 0)
9858 xmlGenericError(xmlGenericErrorContext,
9859 "PP: lookup of int subset end filed\n");
9860#endif
9861 goto done;
9862
9863found_end_int_subset:
9864 xmlParseInternalSubset(ctxt);
9865 ctxt->inSubset = 2;
9866 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9867 (ctxt->sax->externalSubset != NULL))
9868 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9869 ctxt->extSubSystem, ctxt->extSubURI);
9870 ctxt->inSubset = 0;
9871 ctxt->instate = XML_PARSER_PROLOG;
9872 ctxt->checkIndex = 0;
9873#ifdef DEBUG_PUSH
9874 xmlGenericError(xmlGenericErrorContext,
9875 "PP: entering PROLOG\n");
9876#endif
9877 break;
9878 }
9879 case XML_PARSER_COMMENT:
9880 xmlGenericError(xmlGenericErrorContext,
9881 "PP: internal error, state == COMMENT\n");
9882 ctxt->instate = XML_PARSER_CONTENT;
9883#ifdef DEBUG_PUSH
9884 xmlGenericError(xmlGenericErrorContext,
9885 "PP: entering CONTENT\n");
9886#endif
9887 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009888 case XML_PARSER_IGNORE:
9889 xmlGenericError(xmlGenericErrorContext,
9890 "PP: internal error, state == IGNORE");
9891 ctxt->instate = XML_PARSER_DTD;
9892#ifdef DEBUG_PUSH
9893 xmlGenericError(xmlGenericErrorContext,
9894 "PP: entering DTD\n");
9895#endif
9896 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009897 case XML_PARSER_PI:
9898 xmlGenericError(xmlGenericErrorContext,
9899 "PP: internal error, state == PI\n");
9900 ctxt->instate = XML_PARSER_CONTENT;
9901#ifdef DEBUG_PUSH
9902 xmlGenericError(xmlGenericErrorContext,
9903 "PP: entering CONTENT\n");
9904#endif
9905 break;
9906 case XML_PARSER_ENTITY_DECL:
9907 xmlGenericError(xmlGenericErrorContext,
9908 "PP: internal error, state == ENTITY_DECL\n");
9909 ctxt->instate = XML_PARSER_DTD;
9910#ifdef DEBUG_PUSH
9911 xmlGenericError(xmlGenericErrorContext,
9912 "PP: entering DTD\n");
9913#endif
9914 break;
9915 case XML_PARSER_ENTITY_VALUE:
9916 xmlGenericError(xmlGenericErrorContext,
9917 "PP: internal error, state == ENTITY_VALUE\n");
9918 ctxt->instate = XML_PARSER_CONTENT;
9919#ifdef DEBUG_PUSH
9920 xmlGenericError(xmlGenericErrorContext,
9921 "PP: entering DTD\n");
9922#endif
9923 break;
9924 case XML_PARSER_ATTRIBUTE_VALUE:
9925 xmlGenericError(xmlGenericErrorContext,
9926 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9927 ctxt->instate = XML_PARSER_START_TAG;
9928#ifdef DEBUG_PUSH
9929 xmlGenericError(xmlGenericErrorContext,
9930 "PP: entering START_TAG\n");
9931#endif
9932 break;
9933 case XML_PARSER_SYSTEM_LITERAL:
9934 xmlGenericError(xmlGenericErrorContext,
9935 "PP: internal error, state == SYSTEM_LITERAL\n");
9936 ctxt->instate = XML_PARSER_START_TAG;
9937#ifdef DEBUG_PUSH
9938 xmlGenericError(xmlGenericErrorContext,
9939 "PP: entering START_TAG\n");
9940#endif
9941 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009942 case XML_PARSER_PUBLIC_LITERAL:
9943 xmlGenericError(xmlGenericErrorContext,
9944 "PP: internal error, state == PUBLIC_LITERAL\n");
9945 ctxt->instate = XML_PARSER_START_TAG;
9946#ifdef DEBUG_PUSH
9947 xmlGenericError(xmlGenericErrorContext,
9948 "PP: entering START_TAG\n");
9949#endif
9950 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009951 }
9952 }
9953done:
9954#ifdef DEBUG_PUSH
9955 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9956#endif
9957 return(ret);
9958}
9959
9960/**
Owen Taylor3473f882001-02-23 17:55:21 +00009961 * xmlParseChunk:
9962 * @ctxt: an XML parser context
9963 * @chunk: an char array
9964 * @size: the size in byte of the chunk
9965 * @terminate: last chunk indicator
9966 *
9967 * Parse a Chunk of memory
9968 *
9969 * Returns zero if no error, the xmlParserErrors otherwise.
9970 */
9971int
9972xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9973 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009974 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9975 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009976 if (ctxt->instate == XML_PARSER_START)
9977 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009978 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9979 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9980 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9981 int cur = ctxt->input->cur - ctxt->input->base;
9982
9983 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9984 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9985 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009986 ctxt->input->end =
9987 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009988#ifdef DEBUG_PUSH
9989 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9990#endif
9991
Owen Taylor3473f882001-02-23 17:55:21 +00009992 } else if (ctxt->instate != XML_PARSER_EOF) {
9993 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9994 xmlParserInputBufferPtr in = ctxt->input->buf;
9995 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9996 (in->raw != NULL)) {
9997 int nbchars;
9998
9999 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10000 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010001 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010002 xmlGenericError(xmlGenericErrorContext,
10003 "xmlParseChunk: encoder error\n");
10004 return(XML_ERR_INVALID_ENCODING);
10005 }
10006 }
10007 }
10008 }
10009 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010010 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10011 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010012 if (terminate) {
10013 /*
10014 * Check for termination
10015 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010016 int avail = 0;
10017 if (ctxt->input->buf == NULL)
10018 avail = ctxt->input->length -
10019 (ctxt->input->cur - ctxt->input->base);
10020 else
10021 avail = ctxt->input->buf->buffer->use -
10022 (ctxt->input->cur - ctxt->input->base);
10023
Owen Taylor3473f882001-02-23 17:55:21 +000010024 if ((ctxt->instate != XML_PARSER_EOF) &&
10025 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010026 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010027 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010028 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010029 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010030 }
Owen Taylor3473f882001-02-23 17:55:21 +000010031 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010032 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010033 ctxt->sax->endDocument(ctxt->userData);
10034 }
10035 ctxt->instate = XML_PARSER_EOF;
10036 }
10037 return((xmlParserErrors) ctxt->errNo);
10038}
10039
10040/************************************************************************
10041 * *
10042 * I/O front end functions to the parser *
10043 * *
10044 ************************************************************************/
10045
10046/**
10047 * xmlStopParser:
10048 * @ctxt: an XML parser context
10049 *
10050 * Blocks further parser processing
10051 */
10052void
10053xmlStopParser(xmlParserCtxtPtr ctxt) {
10054 ctxt->instate = XML_PARSER_EOF;
10055 if (ctxt->input != NULL)
10056 ctxt->input->cur = BAD_CAST"";
10057}
10058
10059/**
10060 * xmlCreatePushParserCtxt:
10061 * @sax: a SAX handler
10062 * @user_data: The user data returned on SAX callbacks
10063 * @chunk: a pointer to an array of chars
10064 * @size: number of chars in the array
10065 * @filename: an optional file name or URI
10066 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010067 * Create a parser context for using the XML parser in push mode.
10068 * If @buffer and @size are non-NULL, the data is used to detect
10069 * the encoding. The remaining characters will be parsed so they
10070 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010071 * To allow content encoding detection, @size should be >= 4
10072 * The value of @filename is used for fetching external entities
10073 * and error/warning reports.
10074 *
10075 * Returns the new parser context or NULL
10076 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010077
Owen Taylor3473f882001-02-23 17:55:21 +000010078xmlParserCtxtPtr
10079xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10080 const char *chunk, int size, const char *filename) {
10081 xmlParserCtxtPtr ctxt;
10082 xmlParserInputPtr inputStream;
10083 xmlParserInputBufferPtr buf;
10084 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10085
10086 /*
10087 * plug some encoding conversion routines
10088 */
10089 if ((chunk != NULL) && (size >= 4))
10090 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10091
10092 buf = xmlAllocParserInputBuffer(enc);
10093 if (buf == NULL) return(NULL);
10094
10095 ctxt = xmlNewParserCtxt();
10096 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010097 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010098 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010099 return(NULL);
10100 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010101 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10102 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010103 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010104 xmlFreeParserInputBuffer(buf);
10105 xmlFreeParserCtxt(ctxt);
10106 return(NULL);
10107 }
Owen Taylor3473f882001-02-23 17:55:21 +000010108 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010109#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010110 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010111#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010112 xmlFree(ctxt->sax);
10113 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10114 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010115 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010116 xmlFreeParserInputBuffer(buf);
10117 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010118 return(NULL);
10119 }
10120 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10121 if (user_data != NULL)
10122 ctxt->userData = user_data;
10123 }
10124 if (filename == NULL) {
10125 ctxt->directory = NULL;
10126 } else {
10127 ctxt->directory = xmlParserGetDirectory(filename);
10128 }
10129
10130 inputStream = xmlNewInputStream(ctxt);
10131 if (inputStream == NULL) {
10132 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010133 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010134 return(NULL);
10135 }
10136
10137 if (filename == NULL)
10138 inputStream->filename = NULL;
10139 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010140 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010141 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010142 inputStream->buf = buf;
10143 inputStream->base = inputStream->buf->buffer->content;
10144 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010145 inputStream->end =
10146 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010147
10148 inputPush(ctxt, inputStream);
10149
10150 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10151 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010152 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10153 int cur = ctxt->input->cur - ctxt->input->base;
10154
Owen Taylor3473f882001-02-23 17:55:21 +000010155 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010156
10157 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10158 ctxt->input->cur = ctxt->input->base + cur;
10159 ctxt->input->end =
10160 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010161#ifdef DEBUG_PUSH
10162 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10163#endif
10164 }
10165
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010166 if (enc != XML_CHAR_ENCODING_NONE) {
10167 xmlSwitchEncoding(ctxt, enc);
10168 }
10169
Owen Taylor3473f882001-02-23 17:55:21 +000010170 return(ctxt);
10171}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010172#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010173
10174/**
10175 * xmlCreateIOParserCtxt:
10176 * @sax: a SAX handler
10177 * @user_data: The user data returned on SAX callbacks
10178 * @ioread: an I/O read function
10179 * @ioclose: an I/O close function
10180 * @ioctx: an I/O handler
10181 * @enc: the charset encoding if known
10182 *
10183 * Create a parser context for using the XML parser with an existing
10184 * I/O stream
10185 *
10186 * Returns the new parser context or NULL
10187 */
10188xmlParserCtxtPtr
10189xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10190 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10191 void *ioctx, xmlCharEncoding enc) {
10192 xmlParserCtxtPtr ctxt;
10193 xmlParserInputPtr inputStream;
10194 xmlParserInputBufferPtr buf;
10195
10196 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10197 if (buf == NULL) return(NULL);
10198
10199 ctxt = xmlNewParserCtxt();
10200 if (ctxt == NULL) {
10201 xmlFree(buf);
10202 return(NULL);
10203 }
10204 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010205#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010206 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010207#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010208 xmlFree(ctxt->sax);
10209 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10210 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010211 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010212 xmlFree(ctxt);
10213 return(NULL);
10214 }
10215 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10216 if (user_data != NULL)
10217 ctxt->userData = user_data;
10218 }
10219
10220 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10221 if (inputStream == NULL) {
10222 xmlFreeParserCtxt(ctxt);
10223 return(NULL);
10224 }
10225 inputPush(ctxt, inputStream);
10226
10227 return(ctxt);
10228}
10229
Daniel Veillard4432df22003-09-28 18:58:27 +000010230#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010231/************************************************************************
10232 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010233 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010234 * *
10235 ************************************************************************/
10236
10237/**
10238 * xmlIOParseDTD:
10239 * @sax: the SAX handler block or NULL
10240 * @input: an Input Buffer
10241 * @enc: the charset encoding if known
10242 *
10243 * Load and parse a DTD
10244 *
10245 * Returns the resulting xmlDtdPtr or NULL in case of error.
10246 * @input will be freed at parsing end.
10247 */
10248
10249xmlDtdPtr
10250xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10251 xmlCharEncoding enc) {
10252 xmlDtdPtr ret = NULL;
10253 xmlParserCtxtPtr ctxt;
10254 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010255 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010256
10257 if (input == NULL)
10258 return(NULL);
10259
10260 ctxt = xmlNewParserCtxt();
10261 if (ctxt == NULL) {
10262 return(NULL);
10263 }
10264
10265 /*
10266 * Set-up the SAX context
10267 */
10268 if (sax != NULL) {
10269 if (ctxt->sax != NULL)
10270 xmlFree(ctxt->sax);
10271 ctxt->sax = sax;
10272 ctxt->userData = NULL;
10273 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010274 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010275
10276 /*
10277 * generate a parser input from the I/O handler
10278 */
10279
10280 pinput = xmlNewIOInputStream(ctxt, input, enc);
10281 if (pinput == NULL) {
10282 if (sax != NULL) ctxt->sax = NULL;
10283 xmlFreeParserCtxt(ctxt);
10284 return(NULL);
10285 }
10286
10287 /*
10288 * plug some encoding conversion routines here.
10289 */
10290 xmlPushInput(ctxt, pinput);
10291
10292 pinput->filename = NULL;
10293 pinput->line = 1;
10294 pinput->col = 1;
10295 pinput->base = ctxt->input->cur;
10296 pinput->cur = ctxt->input->cur;
10297 pinput->free = NULL;
10298
10299 /*
10300 * let's parse that entity knowing it's an external subset.
10301 */
10302 ctxt->inSubset = 2;
10303 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10304 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10305 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010306
10307 if (enc == XML_CHAR_ENCODING_NONE) {
10308 /*
10309 * Get the 4 first bytes and decode the charset
10310 * if enc != XML_CHAR_ENCODING_NONE
10311 * plug some encoding conversion routines.
10312 */
10313 start[0] = RAW;
10314 start[1] = NXT(1);
10315 start[2] = NXT(2);
10316 start[3] = NXT(3);
10317 enc = xmlDetectCharEncoding(start, 4);
10318 if (enc != XML_CHAR_ENCODING_NONE) {
10319 xmlSwitchEncoding(ctxt, enc);
10320 }
10321 }
10322
Owen Taylor3473f882001-02-23 17:55:21 +000010323 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10324
10325 if (ctxt->myDoc != NULL) {
10326 if (ctxt->wellFormed) {
10327 ret = ctxt->myDoc->extSubset;
10328 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010329 if (ret != NULL) {
10330 xmlNodePtr tmp;
10331
10332 ret->doc = NULL;
10333 tmp = ret->children;
10334 while (tmp != NULL) {
10335 tmp->doc = NULL;
10336 tmp = tmp->next;
10337 }
10338 }
Owen Taylor3473f882001-02-23 17:55:21 +000010339 } else {
10340 ret = NULL;
10341 }
10342 xmlFreeDoc(ctxt->myDoc);
10343 ctxt->myDoc = NULL;
10344 }
10345 if (sax != NULL) ctxt->sax = NULL;
10346 xmlFreeParserCtxt(ctxt);
10347
10348 return(ret);
10349}
10350
10351/**
10352 * xmlSAXParseDTD:
10353 * @sax: the SAX handler block
10354 * @ExternalID: a NAME* containing the External ID of the DTD
10355 * @SystemID: a NAME* containing the URL to the DTD
10356 *
10357 * Load and parse an external subset.
10358 *
10359 * Returns the resulting xmlDtdPtr or NULL in case of error.
10360 */
10361
10362xmlDtdPtr
10363xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10364 const xmlChar *SystemID) {
10365 xmlDtdPtr ret = NULL;
10366 xmlParserCtxtPtr ctxt;
10367 xmlParserInputPtr input = NULL;
10368 xmlCharEncoding enc;
10369
10370 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10371
10372 ctxt = xmlNewParserCtxt();
10373 if (ctxt == NULL) {
10374 return(NULL);
10375 }
10376
10377 /*
10378 * Set-up the SAX context
10379 */
10380 if (sax != NULL) {
10381 if (ctxt->sax != NULL)
10382 xmlFree(ctxt->sax);
10383 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010384 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010385 }
10386
10387 /*
10388 * Ask the Entity resolver to load the damn thing
10389 */
10390
10391 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010392 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010393 if (input == NULL) {
10394 if (sax != NULL) ctxt->sax = NULL;
10395 xmlFreeParserCtxt(ctxt);
10396 return(NULL);
10397 }
10398
10399 /*
10400 * plug some encoding conversion routines here.
10401 */
10402 xmlPushInput(ctxt, input);
10403 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10404 xmlSwitchEncoding(ctxt, enc);
10405
10406 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010407 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010408 input->line = 1;
10409 input->col = 1;
10410 input->base = ctxt->input->cur;
10411 input->cur = ctxt->input->cur;
10412 input->free = NULL;
10413
10414 /*
10415 * let's parse that entity knowing it's an external subset.
10416 */
10417 ctxt->inSubset = 2;
10418 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10419 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10420 ExternalID, SystemID);
10421 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10422
10423 if (ctxt->myDoc != NULL) {
10424 if (ctxt->wellFormed) {
10425 ret = ctxt->myDoc->extSubset;
10426 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010427 if (ret != NULL) {
10428 xmlNodePtr tmp;
10429
10430 ret->doc = NULL;
10431 tmp = ret->children;
10432 while (tmp != NULL) {
10433 tmp->doc = NULL;
10434 tmp = tmp->next;
10435 }
10436 }
Owen Taylor3473f882001-02-23 17:55:21 +000010437 } else {
10438 ret = NULL;
10439 }
10440 xmlFreeDoc(ctxt->myDoc);
10441 ctxt->myDoc = NULL;
10442 }
10443 if (sax != NULL) ctxt->sax = NULL;
10444 xmlFreeParserCtxt(ctxt);
10445
10446 return(ret);
10447}
10448
Daniel Veillard4432df22003-09-28 18:58:27 +000010449
Owen Taylor3473f882001-02-23 17:55:21 +000010450/**
10451 * xmlParseDTD:
10452 * @ExternalID: a NAME* containing the External ID of the DTD
10453 * @SystemID: a NAME* containing the URL to the DTD
10454 *
10455 * Load and parse an external subset.
10456 *
10457 * Returns the resulting xmlDtdPtr or NULL in case of error.
10458 */
10459
10460xmlDtdPtr
10461xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10462 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10463}
Daniel Veillard4432df22003-09-28 18:58:27 +000010464#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010465
10466/************************************************************************
10467 * *
10468 * Front ends when parsing an Entity *
10469 * *
10470 ************************************************************************/
10471
10472/**
Owen Taylor3473f882001-02-23 17:55:21 +000010473 * xmlParseCtxtExternalEntity:
10474 * @ctx: the existing parsing context
10475 * @URL: the URL for the entity to load
10476 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010477 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010478 *
10479 * Parse an external general entity within an existing parsing context
10480 * An external general parsed entity is well-formed if it matches the
10481 * production labeled extParsedEnt.
10482 *
10483 * [78] extParsedEnt ::= TextDecl? content
10484 *
10485 * Returns 0 if the entity is well formed, -1 in case of args problem and
10486 * the parser error code otherwise
10487 */
10488
10489int
10490xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010491 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010492 xmlParserCtxtPtr ctxt;
10493 xmlDocPtr newDoc;
10494 xmlSAXHandlerPtr oldsax = NULL;
10495 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010496 xmlChar start[4];
10497 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010498
10499 if (ctx->depth > 40) {
10500 return(XML_ERR_ENTITY_LOOP);
10501 }
10502
Daniel Veillardcda96922001-08-21 10:56:31 +000010503 if (lst != NULL)
10504 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010505 if ((URL == NULL) && (ID == NULL))
10506 return(-1);
10507 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10508 return(-1);
10509
10510
10511 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10512 if (ctxt == NULL) return(-1);
10513 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010514 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010515 oldsax = ctxt->sax;
10516 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010517 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010518 newDoc = xmlNewDoc(BAD_CAST "1.0");
10519 if (newDoc == NULL) {
10520 xmlFreeParserCtxt(ctxt);
10521 return(-1);
10522 }
10523 if (ctx->myDoc != NULL) {
10524 newDoc->intSubset = ctx->myDoc->intSubset;
10525 newDoc->extSubset = ctx->myDoc->extSubset;
10526 }
10527 if (ctx->myDoc->URL != NULL) {
10528 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10529 }
10530 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10531 if (newDoc->children == NULL) {
10532 ctxt->sax = oldsax;
10533 xmlFreeParserCtxt(ctxt);
10534 newDoc->intSubset = NULL;
10535 newDoc->extSubset = NULL;
10536 xmlFreeDoc(newDoc);
10537 return(-1);
10538 }
10539 nodePush(ctxt, newDoc->children);
10540 if (ctx->myDoc == NULL) {
10541 ctxt->myDoc = newDoc;
10542 } else {
10543 ctxt->myDoc = ctx->myDoc;
10544 newDoc->children->doc = ctx->myDoc;
10545 }
10546
Daniel Veillard87a764e2001-06-20 17:41:10 +000010547 /*
10548 * Get the 4 first bytes and decode the charset
10549 * if enc != XML_CHAR_ENCODING_NONE
10550 * plug some encoding conversion routines.
10551 */
10552 GROW
10553 start[0] = RAW;
10554 start[1] = NXT(1);
10555 start[2] = NXT(2);
10556 start[3] = NXT(3);
10557 enc = xmlDetectCharEncoding(start, 4);
10558 if (enc != XML_CHAR_ENCODING_NONE) {
10559 xmlSwitchEncoding(ctxt, enc);
10560 }
10561
Owen Taylor3473f882001-02-23 17:55:21 +000010562 /*
10563 * Parse a possible text declaration first
10564 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010565 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010566 xmlParseTextDecl(ctxt);
10567 }
10568
10569 /*
10570 * Doing validity checking on chunk doesn't make sense
10571 */
10572 ctxt->instate = XML_PARSER_CONTENT;
10573 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010574 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010575 ctxt->loadsubset = ctx->loadsubset;
10576 ctxt->depth = ctx->depth + 1;
10577 ctxt->replaceEntities = ctx->replaceEntities;
10578 if (ctxt->validate) {
10579 ctxt->vctxt.error = ctx->vctxt.error;
10580 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010581 } else {
10582 ctxt->vctxt.error = NULL;
10583 ctxt->vctxt.warning = NULL;
10584 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010585 ctxt->vctxt.nodeTab = NULL;
10586 ctxt->vctxt.nodeNr = 0;
10587 ctxt->vctxt.nodeMax = 0;
10588 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010589
10590 xmlParseContent(ctxt);
10591
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010592 ctx->validate = ctxt->validate;
10593 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010594 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010595 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010596 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010597 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010598 }
10599 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010600 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010601 }
10602
10603 if (!ctxt->wellFormed) {
10604 if (ctxt->errNo == 0)
10605 ret = 1;
10606 else
10607 ret = ctxt->errNo;
10608 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010609 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010610 xmlNodePtr cur;
10611
10612 /*
10613 * Return the newly created nodeset after unlinking it from
10614 * they pseudo parent.
10615 */
10616 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010617 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010618 while (cur != NULL) {
10619 cur->parent = NULL;
10620 cur = cur->next;
10621 }
10622 newDoc->children->children = NULL;
10623 }
10624 ret = 0;
10625 }
10626 ctxt->sax = oldsax;
10627 xmlFreeParserCtxt(ctxt);
10628 newDoc->intSubset = NULL;
10629 newDoc->extSubset = NULL;
10630 xmlFreeDoc(newDoc);
10631
10632 return(ret);
10633}
10634
10635/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010636 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010637 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010638 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010639 * @sax: the SAX handler bloc (possibly NULL)
10640 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10641 * @depth: Used for loop detection, use 0
10642 * @URL: the URL for the entity to load
10643 * @ID: the System ID for the entity to load
10644 * @list: the return value for the set of parsed nodes
10645 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010646 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010647 *
10648 * Returns 0 if the entity is well formed, -1 in case of args problem and
10649 * the parser error code otherwise
10650 */
10651
Daniel Veillard7d515752003-09-26 19:12:37 +000010652static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010653xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10654 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010655 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010656 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010657 xmlParserCtxtPtr ctxt;
10658 xmlDocPtr newDoc;
10659 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010660 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010661 xmlChar start[4];
10662 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010663
10664 if (depth > 40) {
10665 return(XML_ERR_ENTITY_LOOP);
10666 }
10667
10668
10669
10670 if (list != NULL)
10671 *list = NULL;
10672 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010673 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010674 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010675 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010676
10677
10678 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010679 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010680 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010681 if (oldctxt != NULL) {
10682 ctxt->_private = oldctxt->_private;
10683 ctxt->loadsubset = oldctxt->loadsubset;
10684 ctxt->validate = oldctxt->validate;
10685 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010686 ctxt->record_info = oldctxt->record_info;
10687 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10688 ctxt->node_seq.length = oldctxt->node_seq.length;
10689 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010690 } else {
10691 /*
10692 * Doing validity checking on chunk without context
10693 * doesn't make sense
10694 */
10695 ctxt->_private = NULL;
10696 ctxt->validate = 0;
10697 ctxt->external = 2;
10698 ctxt->loadsubset = 0;
10699 }
Owen Taylor3473f882001-02-23 17:55:21 +000010700 if (sax != NULL) {
10701 oldsax = ctxt->sax;
10702 ctxt->sax = sax;
10703 if (user_data != NULL)
10704 ctxt->userData = user_data;
10705 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010706 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010707 newDoc = xmlNewDoc(BAD_CAST "1.0");
10708 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010709 ctxt->node_seq.maximum = 0;
10710 ctxt->node_seq.length = 0;
10711 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010712 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010713 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010714 }
10715 if (doc != NULL) {
10716 newDoc->intSubset = doc->intSubset;
10717 newDoc->extSubset = doc->extSubset;
10718 }
10719 if (doc->URL != NULL) {
10720 newDoc->URL = xmlStrdup(doc->URL);
10721 }
10722 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10723 if (newDoc->children == NULL) {
10724 if (sax != NULL)
10725 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010726 ctxt->node_seq.maximum = 0;
10727 ctxt->node_seq.length = 0;
10728 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010729 xmlFreeParserCtxt(ctxt);
10730 newDoc->intSubset = NULL;
10731 newDoc->extSubset = NULL;
10732 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010733 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010734 }
10735 nodePush(ctxt, newDoc->children);
10736 if (doc == NULL) {
10737 ctxt->myDoc = newDoc;
10738 } else {
10739 ctxt->myDoc = doc;
10740 newDoc->children->doc = doc;
10741 }
10742
Daniel Veillard87a764e2001-06-20 17:41:10 +000010743 /*
10744 * Get the 4 first bytes and decode the charset
10745 * if enc != XML_CHAR_ENCODING_NONE
10746 * plug some encoding conversion routines.
10747 */
10748 GROW;
10749 start[0] = RAW;
10750 start[1] = NXT(1);
10751 start[2] = NXT(2);
10752 start[3] = NXT(3);
10753 enc = xmlDetectCharEncoding(start, 4);
10754 if (enc != XML_CHAR_ENCODING_NONE) {
10755 xmlSwitchEncoding(ctxt, enc);
10756 }
10757
Owen Taylor3473f882001-02-23 17:55:21 +000010758 /*
10759 * Parse a possible text declaration first
10760 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010761 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010762 xmlParseTextDecl(ctxt);
10763 }
10764
Owen Taylor3473f882001-02-23 17:55:21 +000010765 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010766 ctxt->depth = depth;
10767
10768 xmlParseContent(ctxt);
10769
Daniel Veillard561b7f82002-03-20 21:55:57 +000010770 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010771 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010772 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010773 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010774 }
10775 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010776 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010777 }
10778
10779 if (!ctxt->wellFormed) {
10780 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010781 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010782 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010783 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010784 } else {
10785 if (list != NULL) {
10786 xmlNodePtr cur;
10787
10788 /*
10789 * Return the newly created nodeset after unlinking it from
10790 * they pseudo parent.
10791 */
10792 cur = newDoc->children->children;
10793 *list = cur;
10794 while (cur != NULL) {
10795 cur->parent = NULL;
10796 cur = cur->next;
10797 }
10798 newDoc->children->children = NULL;
10799 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010800 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010801 }
10802 if (sax != NULL)
10803 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010804 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10805 oldctxt->node_seq.length = ctxt->node_seq.length;
10806 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010807 ctxt->node_seq.maximum = 0;
10808 ctxt->node_seq.length = 0;
10809 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010810 xmlFreeParserCtxt(ctxt);
10811 newDoc->intSubset = NULL;
10812 newDoc->extSubset = NULL;
10813 xmlFreeDoc(newDoc);
10814
10815 return(ret);
10816}
10817
Daniel Veillard81273902003-09-30 00:43:48 +000010818#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010819/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010820 * xmlParseExternalEntity:
10821 * @doc: the document the chunk pertains to
10822 * @sax: the SAX handler bloc (possibly NULL)
10823 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10824 * @depth: Used for loop detection, use 0
10825 * @URL: the URL for the entity to load
10826 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010827 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010828 *
10829 * Parse an external general entity
10830 * An external general parsed entity is well-formed if it matches the
10831 * production labeled extParsedEnt.
10832 *
10833 * [78] extParsedEnt ::= TextDecl? content
10834 *
10835 * Returns 0 if the entity is well formed, -1 in case of args problem and
10836 * the parser error code otherwise
10837 */
10838
10839int
10840xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010841 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010842 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010843 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010844}
10845
10846/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010847 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010848 * @doc: the document the chunk pertains to
10849 * @sax: the SAX handler bloc (possibly NULL)
10850 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10851 * @depth: Used for loop detection, use 0
10852 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010853 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010854 *
10855 * Parse a well-balanced chunk of an XML document
10856 * called by the parser
10857 * The allowed sequence for the Well Balanced Chunk is the one defined by
10858 * the content production in the XML grammar:
10859 *
10860 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10861 *
10862 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10863 * the parser error code otherwise
10864 */
10865
10866int
10867xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010868 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010869 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10870 depth, string, lst, 0 );
10871}
Daniel Veillard81273902003-09-30 00:43:48 +000010872#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010873
10874/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010875 * xmlParseBalancedChunkMemoryInternal:
10876 * @oldctxt: the existing parsing context
10877 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10878 * @user_data: the user data field for the parser context
10879 * @lst: the return value for the set of parsed nodes
10880 *
10881 *
10882 * Parse a well-balanced chunk of an XML document
10883 * called by the parser
10884 * The allowed sequence for the Well Balanced Chunk is the one defined by
10885 * the content production in the XML grammar:
10886 *
10887 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10888 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010889 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10890 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010891 *
10892 * In case recover is set to 1, the nodelist will not be empty even if
10893 * the parsed chunk is not well balanced.
10894 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010895static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010896xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10897 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10898 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010899 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010900 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010901 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010902 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010903 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010904
10905 if (oldctxt->depth > 40) {
10906 return(XML_ERR_ENTITY_LOOP);
10907 }
10908
10909
10910 if (lst != NULL)
10911 *lst = NULL;
10912 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010913 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010914
10915 size = xmlStrlen(string);
10916
10917 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010918 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010919 if (user_data != NULL)
10920 ctxt->userData = user_data;
10921 else
10922 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010923 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10924 ctxt->dict = oldctxt->dict;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010925
10926 oldsax = ctxt->sax;
10927 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010928 xmlDetectSAX2(ctxt);
10929
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010930 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010931 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010932 newDoc = xmlNewDoc(BAD_CAST "1.0");
10933 if (newDoc == NULL) {
10934 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010935 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010936 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010937 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010938 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010939 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010940 } else {
10941 ctxt->myDoc = oldctxt->myDoc;
10942 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010943 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010944 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010945 BAD_CAST "pseudoroot", NULL);
10946 if (ctxt->myDoc->children == NULL) {
10947 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010948 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010949 xmlFreeParserCtxt(ctxt);
10950 if (newDoc != NULL)
10951 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010952 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010953 }
10954 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010955 ctxt->instate = XML_PARSER_CONTENT;
10956 ctxt->depth = oldctxt->depth + 1;
10957
Daniel Veillard328f48c2002-11-15 15:24:34 +000010958 ctxt->validate = 0;
10959 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010960 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10961 /*
10962 * ID/IDREF registration will be done in xmlValidateElement below
10963 */
10964 ctxt->loadsubset |= XML_SKIP_IDS;
10965 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010966 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010967
Daniel Veillard68e9e742002-11-16 15:35:11 +000010968 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010969 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010970 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010971 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010972 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010973 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010974 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010975 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010976 }
10977
10978 if (!ctxt->wellFormed) {
10979 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010980 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010981 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010982 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010983 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010984 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010985 }
10986
William M. Brack7b9154b2003-09-27 19:23:50 +000010987 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010988 xmlNodePtr cur;
10989
10990 /*
10991 * Return the newly created nodeset after unlinking it from
10992 * they pseudo parent.
10993 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010994 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010995 *lst = cur;
10996 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010997#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010998 if (oldctxt->validate && oldctxt->wellFormed &&
10999 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11000 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11001 oldctxt->myDoc, cur);
11002 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011003#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011004 cur->parent = NULL;
11005 cur = cur->next;
11006 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011007 ctxt->myDoc->children->children = NULL;
11008 }
11009 if (ctxt->myDoc != NULL) {
11010 xmlFreeNode(ctxt->myDoc->children);
11011 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011012 }
11013
11014 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011015 ctxt->dict = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011016 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011017 if (newDoc != NULL)
11018 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011019
11020 return(ret);
11021}
11022
Daniel Veillard81273902003-09-30 00:43:48 +000011023#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011024/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011025 * xmlParseBalancedChunkMemoryRecover:
11026 * @doc: the document the chunk pertains to
11027 * @sax: the SAX handler bloc (possibly NULL)
11028 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11029 * @depth: Used for loop detection, use 0
11030 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11031 * @lst: the return value for the set of parsed nodes
11032 * @recover: return nodes even if the data is broken (use 0)
11033 *
11034 *
11035 * Parse a well-balanced chunk of an XML document
11036 * called by the parser
11037 * The allowed sequence for the Well Balanced Chunk is the one defined by
11038 * the content production in the XML grammar:
11039 *
11040 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11041 *
11042 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11043 * the parser error code otherwise
11044 *
11045 * In case recover is set to 1, the nodelist will not be empty even if
11046 * the parsed chunk is not well balanced.
11047 */
11048int
11049xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11050 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11051 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011052 xmlParserCtxtPtr ctxt;
11053 xmlDocPtr newDoc;
11054 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011055 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011056 int size;
11057 int ret = 0;
11058
11059 if (depth > 40) {
11060 return(XML_ERR_ENTITY_LOOP);
11061 }
11062
11063
Daniel Veillardcda96922001-08-21 10:56:31 +000011064 if (lst != NULL)
11065 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011066 if (string == NULL)
11067 return(-1);
11068
11069 size = xmlStrlen(string);
11070
11071 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11072 if (ctxt == NULL) return(-1);
11073 ctxt->userData = ctxt;
11074 if (sax != NULL) {
11075 oldsax = ctxt->sax;
11076 ctxt->sax = sax;
11077 if (user_data != NULL)
11078 ctxt->userData = user_data;
11079 }
11080 newDoc = xmlNewDoc(BAD_CAST "1.0");
11081 if (newDoc == NULL) {
11082 xmlFreeParserCtxt(ctxt);
11083 return(-1);
11084 }
11085 if (doc != NULL) {
11086 newDoc->intSubset = doc->intSubset;
11087 newDoc->extSubset = doc->extSubset;
11088 }
11089 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11090 if (newDoc->children == NULL) {
11091 if (sax != NULL)
11092 ctxt->sax = oldsax;
11093 xmlFreeParserCtxt(ctxt);
11094 newDoc->intSubset = NULL;
11095 newDoc->extSubset = NULL;
11096 xmlFreeDoc(newDoc);
11097 return(-1);
11098 }
11099 nodePush(ctxt, newDoc->children);
11100 if (doc == NULL) {
11101 ctxt->myDoc = newDoc;
11102 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011103 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011104 newDoc->children->doc = doc;
11105 }
11106 ctxt->instate = XML_PARSER_CONTENT;
11107 ctxt->depth = depth;
11108
11109 /*
11110 * Doing validity checking on chunk doesn't make sense
11111 */
11112 ctxt->validate = 0;
11113 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011114 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011115
Daniel Veillardb39bc392002-10-26 19:29:51 +000011116 if ( doc != NULL ){
11117 content = doc->children;
11118 doc->children = NULL;
11119 xmlParseContent(ctxt);
11120 doc->children = content;
11121 }
11122 else {
11123 xmlParseContent(ctxt);
11124 }
Owen Taylor3473f882001-02-23 17:55:21 +000011125 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011126 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011127 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011128 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011129 }
11130 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011131 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011132 }
11133
11134 if (!ctxt->wellFormed) {
11135 if (ctxt->errNo == 0)
11136 ret = 1;
11137 else
11138 ret = ctxt->errNo;
11139 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011140 ret = 0;
11141 }
11142
11143 if (lst != NULL && (ret == 0 || recover == 1)) {
11144 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011145
11146 /*
11147 * Return the newly created nodeset after unlinking it from
11148 * they pseudo parent.
11149 */
11150 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011151 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011152 while (cur != NULL) {
11153 cur->parent = NULL;
11154 cur = cur->next;
11155 }
11156 newDoc->children->children = NULL;
11157 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011158
Owen Taylor3473f882001-02-23 17:55:21 +000011159 if (sax != NULL)
11160 ctxt->sax = oldsax;
11161 xmlFreeParserCtxt(ctxt);
11162 newDoc->intSubset = NULL;
11163 newDoc->extSubset = NULL;
11164 xmlFreeDoc(newDoc);
11165
11166 return(ret);
11167}
11168
11169/**
11170 * xmlSAXParseEntity:
11171 * @sax: the SAX handler block
11172 * @filename: the filename
11173 *
11174 * parse an XML external entity out of context and build a tree.
11175 * It use the given SAX function block to handle the parsing callback.
11176 * If sax is NULL, fallback to the default DOM tree building routines.
11177 *
11178 * [78] extParsedEnt ::= TextDecl? content
11179 *
11180 * This correspond to a "Well Balanced" chunk
11181 *
11182 * Returns the resulting document tree
11183 */
11184
11185xmlDocPtr
11186xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11187 xmlDocPtr ret;
11188 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011189
11190 ctxt = xmlCreateFileParserCtxt(filename);
11191 if (ctxt == NULL) {
11192 return(NULL);
11193 }
11194 if (sax != NULL) {
11195 if (ctxt->sax != NULL)
11196 xmlFree(ctxt->sax);
11197 ctxt->sax = sax;
11198 ctxt->userData = NULL;
11199 }
11200
Owen Taylor3473f882001-02-23 17:55:21 +000011201 xmlParseExtParsedEnt(ctxt);
11202
11203 if (ctxt->wellFormed)
11204 ret = ctxt->myDoc;
11205 else {
11206 ret = NULL;
11207 xmlFreeDoc(ctxt->myDoc);
11208 ctxt->myDoc = NULL;
11209 }
11210 if (sax != NULL)
11211 ctxt->sax = NULL;
11212 xmlFreeParserCtxt(ctxt);
11213
11214 return(ret);
11215}
11216
11217/**
11218 * xmlParseEntity:
11219 * @filename: the filename
11220 *
11221 * parse an XML external entity out of context and build a tree.
11222 *
11223 * [78] extParsedEnt ::= TextDecl? content
11224 *
11225 * This correspond to a "Well Balanced" chunk
11226 *
11227 * Returns the resulting document tree
11228 */
11229
11230xmlDocPtr
11231xmlParseEntity(const char *filename) {
11232 return(xmlSAXParseEntity(NULL, filename));
11233}
Daniel Veillard81273902003-09-30 00:43:48 +000011234#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011235
11236/**
11237 * xmlCreateEntityParserCtxt:
11238 * @URL: the entity URL
11239 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011240 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011241 *
11242 * Create a parser context for an external entity
11243 * Automatic support for ZLIB/Compress compressed document is provided
11244 * by default if found at compile-time.
11245 *
11246 * Returns the new parser context or NULL
11247 */
11248xmlParserCtxtPtr
11249xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11250 const xmlChar *base) {
11251 xmlParserCtxtPtr ctxt;
11252 xmlParserInputPtr inputStream;
11253 char *directory = NULL;
11254 xmlChar *uri;
11255
11256 ctxt = xmlNewParserCtxt();
11257 if (ctxt == NULL) {
11258 return(NULL);
11259 }
11260
11261 uri = xmlBuildURI(URL, base);
11262
11263 if (uri == NULL) {
11264 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11265 if (inputStream == NULL) {
11266 xmlFreeParserCtxt(ctxt);
11267 return(NULL);
11268 }
11269
11270 inputPush(ctxt, inputStream);
11271
11272 if ((ctxt->directory == NULL) && (directory == NULL))
11273 directory = xmlParserGetDirectory((char *)URL);
11274 if ((ctxt->directory == NULL) && (directory != NULL))
11275 ctxt->directory = directory;
11276 } else {
11277 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11278 if (inputStream == NULL) {
11279 xmlFree(uri);
11280 xmlFreeParserCtxt(ctxt);
11281 return(NULL);
11282 }
11283
11284 inputPush(ctxt, inputStream);
11285
11286 if ((ctxt->directory == NULL) && (directory == NULL))
11287 directory = xmlParserGetDirectory((char *)uri);
11288 if ((ctxt->directory == NULL) && (directory != NULL))
11289 ctxt->directory = directory;
11290 xmlFree(uri);
11291 }
Owen Taylor3473f882001-02-23 17:55:21 +000011292 return(ctxt);
11293}
11294
11295/************************************************************************
11296 * *
11297 * Front ends when parsing from a file *
11298 * *
11299 ************************************************************************/
11300
11301/**
11302 * xmlCreateFileParserCtxt:
11303 * @filename: the filename
11304 *
11305 * Create a parser context for a file content.
11306 * Automatic support for ZLIB/Compress compressed document is provided
11307 * by default if found at compile-time.
11308 *
11309 * Returns the new parser context or NULL
11310 */
11311xmlParserCtxtPtr
11312xmlCreateFileParserCtxt(const char *filename)
11313{
11314 xmlParserCtxtPtr ctxt;
11315 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011316 char *directory = NULL;
11317
Owen Taylor3473f882001-02-23 17:55:21 +000011318 ctxt = xmlNewParserCtxt();
11319 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011320 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011321 return(NULL);
11322 }
11323
Igor Zlatkovicce076162003-02-23 13:39:39 +000011324
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011325 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011326 if (inputStream == NULL) {
11327 xmlFreeParserCtxt(ctxt);
11328 return(NULL);
11329 }
11330
Owen Taylor3473f882001-02-23 17:55:21 +000011331 inputPush(ctxt, inputStream);
11332 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011333 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011334 if ((ctxt->directory == NULL) && (directory != NULL))
11335 ctxt->directory = directory;
11336
11337 return(ctxt);
11338}
11339
Daniel Veillard81273902003-09-30 00:43:48 +000011340#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011341/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011342 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011343 * @sax: the SAX handler block
11344 * @filename: the filename
11345 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11346 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011347 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011348 *
11349 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11350 * compressed document is provided by default if found at compile-time.
11351 * It use the given SAX function block to handle the parsing callback.
11352 * If sax is NULL, fallback to the default DOM tree building routines.
11353 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011354 * User data (void *) is stored within the parser context in the
11355 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011356 *
Owen Taylor3473f882001-02-23 17:55:21 +000011357 * Returns the resulting document tree
11358 */
11359
11360xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011361xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11362 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011363 xmlDocPtr ret;
11364 xmlParserCtxtPtr ctxt;
11365 char *directory = NULL;
11366
Daniel Veillard635ef722001-10-29 11:48:19 +000011367 xmlInitParser();
11368
Owen Taylor3473f882001-02-23 17:55:21 +000011369 ctxt = xmlCreateFileParserCtxt(filename);
11370 if (ctxt == NULL) {
11371 return(NULL);
11372 }
11373 if (sax != NULL) {
11374 if (ctxt->sax != NULL)
11375 xmlFree(ctxt->sax);
11376 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011377 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011378 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011379 if (data!=NULL) {
11380 ctxt->_private=data;
11381 }
Owen Taylor3473f882001-02-23 17:55:21 +000011382
11383 if ((ctxt->directory == NULL) && (directory == NULL))
11384 directory = xmlParserGetDirectory(filename);
11385 if ((ctxt->directory == NULL) && (directory != NULL))
11386 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11387
Daniel Veillarddad3f682002-11-17 16:47:27 +000011388 ctxt->recovery = recovery;
11389
Owen Taylor3473f882001-02-23 17:55:21 +000011390 xmlParseDocument(ctxt);
11391
William M. Brackc07329e2003-09-08 01:57:30 +000011392 if ((ctxt->wellFormed) || recovery) {
11393 ret = ctxt->myDoc;
11394 if (ctxt->input->buf->compressed > 0)
11395 ret->compression = 9;
11396 else
11397 ret->compression = ctxt->input->buf->compressed;
11398 }
Owen Taylor3473f882001-02-23 17:55:21 +000011399 else {
11400 ret = NULL;
11401 xmlFreeDoc(ctxt->myDoc);
11402 ctxt->myDoc = NULL;
11403 }
11404 if (sax != NULL)
11405 ctxt->sax = NULL;
11406 xmlFreeParserCtxt(ctxt);
11407
11408 return(ret);
11409}
11410
11411/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011412 * xmlSAXParseFile:
11413 * @sax: the SAX handler block
11414 * @filename: the filename
11415 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11416 * documents
11417 *
11418 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11419 * compressed document is provided by default if found at compile-time.
11420 * It use the given SAX function block to handle the parsing callback.
11421 * If sax is NULL, fallback to the default DOM tree building routines.
11422 *
11423 * Returns the resulting document tree
11424 */
11425
11426xmlDocPtr
11427xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11428 int recovery) {
11429 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11430}
11431
11432/**
Owen Taylor3473f882001-02-23 17:55:21 +000011433 * xmlRecoverDoc:
11434 * @cur: a pointer to an array of xmlChar
11435 *
11436 * parse an XML in-memory document and build a tree.
11437 * In the case the document is not Well Formed, a tree is built anyway
11438 *
11439 * Returns the resulting document tree
11440 */
11441
11442xmlDocPtr
11443xmlRecoverDoc(xmlChar *cur) {
11444 return(xmlSAXParseDoc(NULL, cur, 1));
11445}
11446
11447/**
11448 * xmlParseFile:
11449 * @filename: the filename
11450 *
11451 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11452 * compressed document is provided by default if found at compile-time.
11453 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011454 * Returns the resulting document tree if the file was wellformed,
11455 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011456 */
11457
11458xmlDocPtr
11459xmlParseFile(const char *filename) {
11460 return(xmlSAXParseFile(NULL, filename, 0));
11461}
11462
11463/**
11464 * xmlRecoverFile:
11465 * @filename: the filename
11466 *
11467 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11468 * compressed document is provided by default if found at compile-time.
11469 * In the case the document is not Well Formed, a tree is built anyway
11470 *
11471 * Returns the resulting document tree
11472 */
11473
11474xmlDocPtr
11475xmlRecoverFile(const char *filename) {
11476 return(xmlSAXParseFile(NULL, filename, 1));
11477}
11478
11479
11480/**
11481 * xmlSetupParserForBuffer:
11482 * @ctxt: an XML parser context
11483 * @buffer: a xmlChar * buffer
11484 * @filename: a file name
11485 *
11486 * Setup the parser context to parse a new buffer; Clears any prior
11487 * contents from the parser context. The buffer parameter must not be
11488 * NULL, but the filename parameter can be
11489 */
11490void
11491xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11492 const char* filename)
11493{
11494 xmlParserInputPtr input;
11495
11496 input = xmlNewInputStream(ctxt);
11497 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011498 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011499 xmlFree(ctxt);
11500 return;
11501 }
11502
11503 xmlClearParserCtxt(ctxt);
11504 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011505 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011506 input->base = buffer;
11507 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011508 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011509 inputPush(ctxt, input);
11510}
11511
11512/**
11513 * xmlSAXUserParseFile:
11514 * @sax: a SAX handler
11515 * @user_data: The user data returned on SAX callbacks
11516 * @filename: a file name
11517 *
11518 * parse an XML file and call the given SAX handler routines.
11519 * Automatic support for ZLIB/Compress compressed document is provided
11520 *
11521 * Returns 0 in case of success or a error number otherwise
11522 */
11523int
11524xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11525 const char *filename) {
11526 int ret = 0;
11527 xmlParserCtxtPtr ctxt;
11528
11529 ctxt = xmlCreateFileParserCtxt(filename);
11530 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011531#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011532 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011533#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011534 xmlFree(ctxt->sax);
11535 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011536 xmlDetectSAX2(ctxt);
11537
Owen Taylor3473f882001-02-23 17:55:21 +000011538 if (user_data != NULL)
11539 ctxt->userData = user_data;
11540
11541 xmlParseDocument(ctxt);
11542
11543 if (ctxt->wellFormed)
11544 ret = 0;
11545 else {
11546 if (ctxt->errNo != 0)
11547 ret = ctxt->errNo;
11548 else
11549 ret = -1;
11550 }
11551 if (sax != NULL)
11552 ctxt->sax = NULL;
11553 xmlFreeParserCtxt(ctxt);
11554
11555 return ret;
11556}
Daniel Veillard81273902003-09-30 00:43:48 +000011557#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011558
11559/************************************************************************
11560 * *
11561 * Front ends when parsing from memory *
11562 * *
11563 ************************************************************************/
11564
11565/**
11566 * xmlCreateMemoryParserCtxt:
11567 * @buffer: a pointer to a char array
11568 * @size: the size of the array
11569 *
11570 * Create a parser context for an XML in-memory document.
11571 *
11572 * Returns the new parser context or NULL
11573 */
11574xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011575xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011576 xmlParserCtxtPtr ctxt;
11577 xmlParserInputPtr input;
11578 xmlParserInputBufferPtr buf;
11579
11580 if (buffer == NULL)
11581 return(NULL);
11582 if (size <= 0)
11583 return(NULL);
11584
11585 ctxt = xmlNewParserCtxt();
11586 if (ctxt == NULL)
11587 return(NULL);
11588
Daniel Veillard53350552003-09-18 13:35:51 +000011589 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011590 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011591 if (buf == NULL) {
11592 xmlFreeParserCtxt(ctxt);
11593 return(NULL);
11594 }
Owen Taylor3473f882001-02-23 17:55:21 +000011595
11596 input = xmlNewInputStream(ctxt);
11597 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011598 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011599 xmlFreeParserCtxt(ctxt);
11600 return(NULL);
11601 }
11602
11603 input->filename = NULL;
11604 input->buf = buf;
11605 input->base = input->buf->buffer->content;
11606 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011607 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011608
11609 inputPush(ctxt, input);
11610 return(ctxt);
11611}
11612
Daniel Veillard81273902003-09-30 00:43:48 +000011613#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011614/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011615 * xmlSAXParseMemoryWithData:
11616 * @sax: the SAX handler block
11617 * @buffer: an pointer to a char array
11618 * @size: the size of the array
11619 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11620 * documents
11621 * @data: the userdata
11622 *
11623 * parse an XML in-memory block and use the given SAX function block
11624 * to handle the parsing callback. If sax is NULL, fallback to the default
11625 * DOM tree building routines.
11626 *
11627 * User data (void *) is stored within the parser context in the
11628 * context's _private member, so it is available nearly everywhere in libxml
11629 *
11630 * Returns the resulting document tree
11631 */
11632
11633xmlDocPtr
11634xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11635 int size, int recovery, void *data) {
11636 xmlDocPtr ret;
11637 xmlParserCtxtPtr ctxt;
11638
11639 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11640 if (ctxt == NULL) return(NULL);
11641 if (sax != NULL) {
11642 if (ctxt->sax != NULL)
11643 xmlFree(ctxt->sax);
11644 ctxt->sax = sax;
11645 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011646 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011647 if (data!=NULL) {
11648 ctxt->_private=data;
11649 }
11650
Daniel Veillardadba5f12003-04-04 16:09:01 +000011651 ctxt->recovery = recovery;
11652
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011653 xmlParseDocument(ctxt);
11654
11655 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11656 else {
11657 ret = NULL;
11658 xmlFreeDoc(ctxt->myDoc);
11659 ctxt->myDoc = NULL;
11660 }
11661 if (sax != NULL)
11662 ctxt->sax = NULL;
11663 xmlFreeParserCtxt(ctxt);
11664
11665 return(ret);
11666}
11667
11668/**
Owen Taylor3473f882001-02-23 17:55:21 +000011669 * xmlSAXParseMemory:
11670 * @sax: the SAX handler block
11671 * @buffer: an pointer to a char array
11672 * @size: the size of the array
11673 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11674 * documents
11675 *
11676 * parse an XML in-memory block and use the given SAX function block
11677 * to handle the parsing callback. If sax is NULL, fallback to the default
11678 * DOM tree building routines.
11679 *
11680 * Returns the resulting document tree
11681 */
11682xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011683xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11684 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011685 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011686}
11687
11688/**
11689 * xmlParseMemory:
11690 * @buffer: an pointer to a char array
11691 * @size: the size of the array
11692 *
11693 * parse an XML in-memory block and build a tree.
11694 *
11695 * Returns the resulting document tree
11696 */
11697
Daniel Veillard50822cb2001-07-26 20:05:51 +000011698xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011699 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11700}
11701
11702/**
11703 * xmlRecoverMemory:
11704 * @buffer: an pointer to a char array
11705 * @size: the size of the array
11706 *
11707 * parse an XML in-memory block and build a tree.
11708 * In the case the document is not Well Formed, a tree is built anyway
11709 *
11710 * Returns the resulting document tree
11711 */
11712
Daniel Veillard50822cb2001-07-26 20:05:51 +000011713xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011714 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11715}
11716
11717/**
11718 * xmlSAXUserParseMemory:
11719 * @sax: a SAX handler
11720 * @user_data: The user data returned on SAX callbacks
11721 * @buffer: an in-memory XML document input
11722 * @size: the length of the XML document in bytes
11723 *
11724 * A better SAX parsing routine.
11725 * parse an XML in-memory buffer and call the given SAX handler routines.
11726 *
11727 * Returns 0 in case of success or a error number otherwise
11728 */
11729int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011730 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011731 int ret = 0;
11732 xmlParserCtxtPtr ctxt;
11733 xmlSAXHandlerPtr oldsax = NULL;
11734
Daniel Veillard9e923512002-08-14 08:48:52 +000011735 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011736 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11737 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011738 oldsax = ctxt->sax;
11739 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011740 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011741 if (user_data != NULL)
11742 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011743
11744 xmlParseDocument(ctxt);
11745
11746 if (ctxt->wellFormed)
11747 ret = 0;
11748 else {
11749 if (ctxt->errNo != 0)
11750 ret = ctxt->errNo;
11751 else
11752 ret = -1;
11753 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011754 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011755 xmlFreeParserCtxt(ctxt);
11756
11757 return ret;
11758}
Daniel Veillard81273902003-09-30 00:43:48 +000011759#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011760
11761/**
11762 * xmlCreateDocParserCtxt:
11763 * @cur: a pointer to an array of xmlChar
11764 *
11765 * Creates a parser context for an XML in-memory document.
11766 *
11767 * Returns the new parser context or NULL
11768 */
11769xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011770xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011771 int len;
11772
11773 if (cur == NULL)
11774 return(NULL);
11775 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011776 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011777}
11778
Daniel Veillard81273902003-09-30 00:43:48 +000011779#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011780/**
11781 * xmlSAXParseDoc:
11782 * @sax: the SAX handler block
11783 * @cur: a pointer to an array of xmlChar
11784 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11785 * documents
11786 *
11787 * parse an XML in-memory document and build a tree.
11788 * It use the given SAX function block to handle the parsing callback.
11789 * If sax is NULL, fallback to the default DOM tree building routines.
11790 *
11791 * Returns the resulting document tree
11792 */
11793
11794xmlDocPtr
11795xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11796 xmlDocPtr ret;
11797 xmlParserCtxtPtr ctxt;
11798
11799 if (cur == NULL) return(NULL);
11800
11801
11802 ctxt = xmlCreateDocParserCtxt(cur);
11803 if (ctxt == NULL) return(NULL);
11804 if (sax != NULL) {
11805 ctxt->sax = sax;
11806 ctxt->userData = NULL;
11807 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011808 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011809
11810 xmlParseDocument(ctxt);
11811 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11812 else {
11813 ret = NULL;
11814 xmlFreeDoc(ctxt->myDoc);
11815 ctxt->myDoc = NULL;
11816 }
11817 if (sax != NULL)
11818 ctxt->sax = NULL;
11819 xmlFreeParserCtxt(ctxt);
11820
11821 return(ret);
11822}
11823
11824/**
11825 * xmlParseDoc:
11826 * @cur: a pointer to an array of xmlChar
11827 *
11828 * parse an XML in-memory document and build a tree.
11829 *
11830 * Returns the resulting document tree
11831 */
11832
11833xmlDocPtr
11834xmlParseDoc(xmlChar *cur) {
11835 return(xmlSAXParseDoc(NULL, cur, 0));
11836}
Daniel Veillard81273902003-09-30 00:43:48 +000011837#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011838
Daniel Veillard81273902003-09-30 00:43:48 +000011839#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011840/************************************************************************
11841 * *
11842 * Specific function to keep track of entities references *
11843 * and used by the XSLT debugger *
11844 * *
11845 ************************************************************************/
11846
11847static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11848
11849/**
11850 * xmlAddEntityReference:
11851 * @ent : A valid entity
11852 * @firstNode : A valid first node for children of entity
11853 * @lastNode : A valid last node of children entity
11854 *
11855 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11856 */
11857static void
11858xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11859 xmlNodePtr lastNode)
11860{
11861 if (xmlEntityRefFunc != NULL) {
11862 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11863 }
11864}
11865
11866
11867/**
11868 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011869 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011870 *
11871 * Set the function to call call back when a xml reference has been made
11872 */
11873void
11874xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11875{
11876 xmlEntityRefFunc = func;
11877}
Daniel Veillard81273902003-09-30 00:43:48 +000011878#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011879
11880/************************************************************************
11881 * *
11882 * Miscellaneous *
11883 * *
11884 ************************************************************************/
11885
11886#ifdef LIBXML_XPATH_ENABLED
11887#include <libxml/xpath.h>
11888#endif
11889
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011890extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011891static int xmlParserInitialized = 0;
11892
11893/**
11894 * xmlInitParser:
11895 *
11896 * Initialization function for the XML parser.
11897 * This is not reentrant. Call once before processing in case of
11898 * use in multithreaded programs.
11899 */
11900
11901void
11902xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011903 if (xmlParserInitialized != 0)
11904 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011905
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011906 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11907 (xmlGenericError == NULL))
11908 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011909 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011910 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011911 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011912 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011913 xmlDefaultSAXHandlerInit();
11914 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011915#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011916 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011917#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011918#ifdef LIBXML_HTML_ENABLED
11919 htmlInitAutoClose();
11920 htmlDefaultSAXHandlerInit();
11921#endif
11922#ifdef LIBXML_XPATH_ENABLED
11923 xmlXPathInit();
11924#endif
11925 xmlParserInitialized = 1;
11926}
11927
11928/**
11929 * xmlCleanupParser:
11930 *
11931 * Cleanup function for the XML parser. It tries to reclaim all
11932 * parsing related global memory allocated for the parser processing.
11933 * It doesn't deallocate any document related memory. Calling this
11934 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011935 * One should call xmlCleanupParser() only when the process has
11936 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011937 */
11938
11939void
11940xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011941 if (!xmlParserInitialized)
11942 return;
11943
Owen Taylor3473f882001-02-23 17:55:21 +000011944 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011945#ifdef LIBXML_CATALOG_ENABLED
11946 xmlCatalogCleanup();
11947#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011948 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011949 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011950 xmlResetLastError();
Daniel Veillardd0463562001-10-13 09:15:48 +000011951 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011952}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011953
11954/************************************************************************
11955 * *
11956 * New set (2.6.0) of simpler and more flexible APIs *
11957 * *
11958 ************************************************************************/
11959
11960/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011961 * DICT_FREE:
11962 * @str: a string
11963 *
11964 * Free a string if it is not owned by the "dict" dictionnary in the
11965 * current scope
11966 */
11967#define DICT_FREE(str) \
11968 if ((str) && ((!dict) || \
11969 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11970 xmlFree((char *)(str));
11971
11972/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011973 * xmlCtxtReset:
11974 * @ctxt: an XML parser context
11975 *
11976 * Reset a parser context
11977 */
11978void
11979xmlCtxtReset(xmlParserCtxtPtr ctxt)
11980{
11981 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011982 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011983
11984 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11985 xmlFreeInputStream(input);
11986 }
11987 ctxt->inputNr = 0;
11988 ctxt->input = NULL;
11989
11990 ctxt->spaceNr = 0;
11991 ctxt->spaceTab[0] = -1;
11992 ctxt->space = &ctxt->spaceTab[0];
11993
11994
11995 ctxt->nodeNr = 0;
11996 ctxt->node = NULL;
11997
11998 ctxt->nameNr = 0;
11999 ctxt->name = NULL;
12000
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012001 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012002 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012003 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012004 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012005 DICT_FREE(ctxt->directory);
12006 ctxt->directory = NULL;
12007 DICT_FREE(ctxt->extSubURI);
12008 ctxt->extSubURI = NULL;
12009 DICT_FREE(ctxt->extSubSystem);
12010 ctxt->extSubSystem = NULL;
12011 if (ctxt->myDoc != NULL)
12012 xmlFreeDoc(ctxt->myDoc);
12013 ctxt->myDoc = NULL;
12014
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012015 ctxt->standalone = -1;
12016 ctxt->hasExternalSubset = 0;
12017 ctxt->hasPErefs = 0;
12018 ctxt->html = 0;
12019 ctxt->external = 0;
12020 ctxt->instate = XML_PARSER_START;
12021 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012022
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012023 ctxt->wellFormed = 1;
12024 ctxt->nsWellFormed = 1;
12025 ctxt->valid = 1;
12026 ctxt->vctxt.userData = ctxt;
12027 ctxt->vctxt.error = xmlParserValidityError;
12028 ctxt->vctxt.warning = xmlParserValidityWarning;
12029 ctxt->record_info = 0;
12030 ctxt->nbChars = 0;
12031 ctxt->checkIndex = 0;
12032 ctxt->inSubset = 0;
12033 ctxt->errNo = XML_ERR_OK;
12034 ctxt->depth = 0;
12035 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12036 ctxt->catalogs = NULL;
12037 xmlInitNodeInfoSeq(&ctxt->node_seq);
12038
12039 if (ctxt->attsDefault != NULL) {
12040 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12041 ctxt->attsDefault = NULL;
12042 }
12043 if (ctxt->attsSpecial != NULL) {
12044 xmlHashFree(ctxt->attsSpecial, NULL);
12045 ctxt->attsSpecial = NULL;
12046 }
12047
Daniel Veillard4432df22003-09-28 18:58:27 +000012048#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012049 if (ctxt->catalogs != NULL)
12050 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012051#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012052}
12053
12054/**
12055 * xmlCtxtUseOptions:
12056 * @ctxt: an XML parser context
12057 * @options: a combination of xmlParserOption(s)
12058 *
12059 * Applies the options to the parser context
12060 *
12061 * Returns 0 in case of success, the set of unknown or unimplemented options
12062 * in case of error.
12063 */
12064int
12065xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12066{
12067 if (options & XML_PARSE_RECOVER) {
12068 ctxt->recovery = 1;
12069 options -= XML_PARSE_RECOVER;
12070 } else
12071 ctxt->recovery = 0;
12072 if (options & XML_PARSE_DTDLOAD) {
12073 ctxt->loadsubset = XML_DETECT_IDS;
12074 options -= XML_PARSE_DTDLOAD;
12075 } else
12076 ctxt->loadsubset = 0;
12077 if (options & XML_PARSE_DTDATTR) {
12078 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12079 options -= XML_PARSE_DTDATTR;
12080 }
12081 if (options & XML_PARSE_NOENT) {
12082 ctxt->replaceEntities = 1;
12083 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12084 options -= XML_PARSE_NOENT;
12085 } else
12086 ctxt->replaceEntities = 0;
12087 if (options & XML_PARSE_NOWARNING) {
12088 ctxt->sax->warning = NULL;
12089 options -= XML_PARSE_NOWARNING;
12090 }
12091 if (options & XML_PARSE_NOERROR) {
12092 ctxt->sax->error = NULL;
12093 ctxt->sax->fatalError = NULL;
12094 options -= XML_PARSE_NOERROR;
12095 }
12096 if (options & XML_PARSE_PEDANTIC) {
12097 ctxt->pedantic = 1;
12098 options -= XML_PARSE_PEDANTIC;
12099 } else
12100 ctxt->pedantic = 0;
12101 if (options & XML_PARSE_NOBLANKS) {
12102 ctxt->keepBlanks = 0;
12103 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12104 options -= XML_PARSE_NOBLANKS;
12105 } else
12106 ctxt->keepBlanks = 1;
12107 if (options & XML_PARSE_DTDVALID) {
12108 ctxt->validate = 1;
12109 if (options & XML_PARSE_NOWARNING)
12110 ctxt->vctxt.warning = NULL;
12111 if (options & XML_PARSE_NOERROR)
12112 ctxt->vctxt.error = NULL;
12113 options -= XML_PARSE_DTDVALID;
12114 } else
12115 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012116#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012117 if (options & XML_PARSE_SAX1) {
12118 ctxt->sax->startElement = xmlSAX2StartElement;
12119 ctxt->sax->endElement = xmlSAX2EndElement;
12120 ctxt->sax->startElementNs = NULL;
12121 ctxt->sax->endElementNs = NULL;
12122 ctxt->sax->initialized = 1;
12123 options -= XML_PARSE_SAX1;
12124 }
Daniel Veillard81273902003-09-30 00:43:48 +000012125#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012126 if (options & XML_PARSE_NODICT) {
12127 ctxt->dictNames = 0;
12128 options -= XML_PARSE_NODICT;
12129 } else {
12130 ctxt->dictNames = 1;
12131 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012132 if (options & XML_PARSE_NOCDATA) {
12133 ctxt->sax->cdataBlock = NULL;
12134 options -= XML_PARSE_NOCDATA;
12135 }
12136 if (options & XML_PARSE_NSCLEAN) {
12137 ctxt->options |= XML_PARSE_NSCLEAN;
12138 options -= XML_PARSE_NSCLEAN;
12139 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012140 return (options);
12141}
12142
12143/**
12144 * xmlDoRead:
12145 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012146 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012147 * @encoding: the document encoding, or NULL
12148 * @options: a combination of xmlParserOption(s)
12149 * @reuse: keep the context for reuse
12150 *
12151 * Common front-end for the xmlRead functions
12152 *
12153 * Returns the resulting document tree or NULL
12154 */
12155static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012156xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12157 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012158{
12159 xmlDocPtr ret;
12160
12161 xmlCtxtUseOptions(ctxt, options);
12162 if (encoding != NULL) {
12163 xmlCharEncodingHandlerPtr hdlr;
12164
12165 hdlr = xmlFindCharEncodingHandler(encoding);
12166 if (hdlr != NULL)
12167 xmlSwitchToEncoding(ctxt, hdlr);
12168 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012169 if ((URL != NULL) && (ctxt->input != NULL) &&
12170 (ctxt->input->filename == NULL))
12171 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012172 xmlParseDocument(ctxt);
12173 if ((ctxt->wellFormed) || ctxt->recovery)
12174 ret = ctxt->myDoc;
12175 else {
12176 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012177 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012178 if ((ctxt->dictNames) &&
12179 (ctxt->myDoc->dict == ctxt->dict))
12180 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012181 xmlFreeDoc(ctxt->myDoc);
12182 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012183 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012184 ctxt->myDoc = NULL;
12185 if (!reuse) {
12186 if ((ctxt->dictNames) &&
12187 (ret != NULL) &&
12188 (ret->dict == ctxt->dict))
12189 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012190 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012191 } else {
12192 /* Must duplicate the reference to the dictionary */
12193 if ((ctxt->dictNames) &&
12194 (ret != NULL) &&
12195 (ret->dict == ctxt->dict))
12196 xmlDictReference(ctxt->dict);
12197 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012198
12199 return (ret);
12200}
12201
12202/**
12203 * xmlReadDoc:
12204 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012205 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012206 * @encoding: the document encoding, or NULL
12207 * @options: a combination of xmlParserOption(s)
12208 *
12209 * parse an XML in-memory document and build a tree.
12210 *
12211 * Returns the resulting document tree
12212 */
12213xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012214xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012215{
12216 xmlParserCtxtPtr ctxt;
12217
12218 if (cur == NULL)
12219 return (NULL);
12220
12221 ctxt = xmlCreateDocParserCtxt(cur);
12222 if (ctxt == NULL)
12223 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012224 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012225}
12226
12227/**
12228 * xmlReadFile:
12229 * @filename: a file or URL
12230 * @encoding: the document encoding, or NULL
12231 * @options: a combination of xmlParserOption(s)
12232 *
12233 * parse an XML file from the filesystem or the network.
12234 *
12235 * Returns the resulting document tree
12236 */
12237xmlDocPtr
12238xmlReadFile(const char *filename, const char *encoding, int options)
12239{
12240 xmlParserCtxtPtr ctxt;
12241
12242 ctxt = xmlCreateFileParserCtxt(filename);
12243 if (ctxt == NULL)
12244 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012245 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012246}
12247
12248/**
12249 * xmlReadMemory:
12250 * @buffer: a pointer to a char array
12251 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012252 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012253 * @encoding: the document encoding, or NULL
12254 * @options: a combination of xmlParserOption(s)
12255 *
12256 * parse an XML in-memory document and build a tree.
12257 *
12258 * Returns the resulting document tree
12259 */
12260xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012261xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012262{
12263 xmlParserCtxtPtr ctxt;
12264
12265 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12266 if (ctxt == NULL)
12267 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012268 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012269}
12270
12271/**
12272 * xmlReadFd:
12273 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012274 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012275 * @encoding: the document encoding, or NULL
12276 * @options: a combination of xmlParserOption(s)
12277 *
12278 * parse an XML from a file descriptor and build a tree.
12279 *
12280 * Returns the resulting document tree
12281 */
12282xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012283xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012284{
12285 xmlParserCtxtPtr ctxt;
12286 xmlParserInputBufferPtr input;
12287 xmlParserInputPtr stream;
12288
12289 if (fd < 0)
12290 return (NULL);
12291
12292 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12293 if (input == NULL)
12294 return (NULL);
12295 ctxt = xmlNewParserCtxt();
12296 if (ctxt == NULL) {
12297 xmlFreeParserInputBuffer(input);
12298 return (NULL);
12299 }
12300 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12301 if (stream == NULL) {
12302 xmlFreeParserInputBuffer(input);
12303 xmlFreeParserCtxt(ctxt);
12304 return (NULL);
12305 }
12306 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012307 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012308}
12309
12310/**
12311 * xmlReadIO:
12312 * @ioread: an I/O read function
12313 * @ioclose: an I/O close function
12314 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012315 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012316 * @encoding: the document encoding, or NULL
12317 * @options: a combination of xmlParserOption(s)
12318 *
12319 * parse an XML document from I/O functions and source and build a tree.
12320 *
12321 * Returns the resulting document tree
12322 */
12323xmlDocPtr
12324xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012325 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012326{
12327 xmlParserCtxtPtr ctxt;
12328 xmlParserInputBufferPtr input;
12329 xmlParserInputPtr stream;
12330
12331 if (ioread == NULL)
12332 return (NULL);
12333
12334 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12335 XML_CHAR_ENCODING_NONE);
12336 if (input == NULL)
12337 return (NULL);
12338 ctxt = xmlNewParserCtxt();
12339 if (ctxt == NULL) {
12340 xmlFreeParserInputBuffer(input);
12341 return (NULL);
12342 }
12343 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12344 if (stream == NULL) {
12345 xmlFreeParserInputBuffer(input);
12346 xmlFreeParserCtxt(ctxt);
12347 return (NULL);
12348 }
12349 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012350 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012351}
12352
12353/**
12354 * xmlCtxtReadDoc:
12355 * @ctxt: an XML parser context
12356 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012357 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012358 * @encoding: the document encoding, or NULL
12359 * @options: a combination of xmlParserOption(s)
12360 *
12361 * parse an XML in-memory document and build a tree.
12362 * This reuses the existing @ctxt parser context
12363 *
12364 * Returns the resulting document tree
12365 */
12366xmlDocPtr
12367xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012368 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012369{
12370 xmlParserInputPtr stream;
12371
12372 if (cur == NULL)
12373 return (NULL);
12374 if (ctxt == NULL)
12375 return (NULL);
12376
12377 xmlCtxtReset(ctxt);
12378
12379 stream = xmlNewStringInputStream(ctxt, cur);
12380 if (stream == NULL) {
12381 return (NULL);
12382 }
12383 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012384 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012385}
12386
12387/**
12388 * xmlCtxtReadFile:
12389 * @ctxt: an XML parser context
12390 * @filename: a file or URL
12391 * @encoding: the document encoding, or NULL
12392 * @options: a combination of xmlParserOption(s)
12393 *
12394 * parse an XML file from the filesystem or the network.
12395 * This reuses the existing @ctxt parser context
12396 *
12397 * Returns the resulting document tree
12398 */
12399xmlDocPtr
12400xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12401 const char *encoding, int options)
12402{
12403 xmlParserInputPtr stream;
12404
12405 if (filename == NULL)
12406 return (NULL);
12407 if (ctxt == NULL)
12408 return (NULL);
12409
12410 xmlCtxtReset(ctxt);
12411
12412 stream = xmlNewInputFromFile(ctxt, filename);
12413 if (stream == NULL) {
12414 return (NULL);
12415 }
12416 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012417 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012418}
12419
12420/**
12421 * xmlCtxtReadMemory:
12422 * @ctxt: an XML parser context
12423 * @buffer: a pointer to a char array
12424 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012425 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012426 * @encoding: the document encoding, or NULL
12427 * @options: a combination of xmlParserOption(s)
12428 *
12429 * parse an XML in-memory document and build a tree.
12430 * This reuses the existing @ctxt parser context
12431 *
12432 * Returns the resulting document tree
12433 */
12434xmlDocPtr
12435xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012436 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012437{
12438 xmlParserInputBufferPtr input;
12439 xmlParserInputPtr stream;
12440
12441 if (ctxt == NULL)
12442 return (NULL);
12443 if (buffer == NULL)
12444 return (NULL);
12445
12446 xmlCtxtReset(ctxt);
12447
12448 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12449 if (input == NULL) {
12450 return(NULL);
12451 }
12452
12453 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12454 if (stream == NULL) {
12455 xmlFreeParserInputBuffer(input);
12456 return(NULL);
12457 }
12458
12459 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012460 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012461}
12462
12463/**
12464 * xmlCtxtReadFd:
12465 * @ctxt: an XML parser context
12466 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012467 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012468 * @encoding: the document encoding, or NULL
12469 * @options: a combination of xmlParserOption(s)
12470 *
12471 * parse an XML from a file descriptor and build a tree.
12472 * This reuses the existing @ctxt parser context
12473 *
12474 * Returns the resulting document tree
12475 */
12476xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012477xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12478 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012479{
12480 xmlParserInputBufferPtr input;
12481 xmlParserInputPtr stream;
12482
12483 if (fd < 0)
12484 return (NULL);
12485 if (ctxt == NULL)
12486 return (NULL);
12487
12488 xmlCtxtReset(ctxt);
12489
12490
12491 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12492 if (input == NULL)
12493 return (NULL);
12494 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12495 if (stream == NULL) {
12496 xmlFreeParserInputBuffer(input);
12497 return (NULL);
12498 }
12499 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012500 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012501}
12502
12503/**
12504 * xmlCtxtReadIO:
12505 * @ctxt: an XML parser context
12506 * @ioread: an I/O read function
12507 * @ioclose: an I/O close function
12508 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012509 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012510 * @encoding: the document encoding, or NULL
12511 * @options: a combination of xmlParserOption(s)
12512 *
12513 * parse an XML document from I/O functions and source and build a tree.
12514 * This reuses the existing @ctxt parser context
12515 *
12516 * Returns the resulting document tree
12517 */
12518xmlDocPtr
12519xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12520 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012521 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012522 const char *encoding, int options)
12523{
12524 xmlParserInputBufferPtr input;
12525 xmlParserInputPtr stream;
12526
12527 if (ioread == NULL)
12528 return (NULL);
12529 if (ctxt == NULL)
12530 return (NULL);
12531
12532 xmlCtxtReset(ctxt);
12533
12534 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12535 XML_CHAR_ENCODING_NONE);
12536 if (input == NULL)
12537 return (NULL);
12538 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12539 if (stream == NULL) {
12540 xmlFreeParserInputBuffer(input);
12541 return (NULL);
12542 }
12543 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012544 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012545}