blob: 9267a001a71dd5bd1d7cfdb1b2aa35f4d989b084 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
Owen Taylor3473f882001-02-23 17:55:21 +000060
61#ifdef HAVE_CTYPE_H
62#include <ctype.h>
63#endif
64#ifdef HAVE_STDLIB_H
65#include <stdlib.h>
66#endif
67#ifdef HAVE_SYS_STAT_H
68#include <sys/stat.h>
69#endif
70#ifdef HAVE_FCNTL_H
71#include <fcntl.h>
72#endif
73#ifdef HAVE_UNISTD_H
74#include <unistd.h>
75#endif
76#ifdef HAVE_ZLIB_H
77#include <zlib.h>
78#endif
79
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000080/**
81 * MAX_DEPTH:
82 *
83 * arbitrary depth limit for the XML documents that we allow to
84 * process. This is not a limitation of the parser but a safety
85 * boundary feature.
86 */
87#define MAX_DEPTH 1024
Owen Taylor3473f882001-02-23 17:55:21 +000088
Daniel Veillard0fb18932003-09-07 09:14:37 +000089#define SAX2 1
90
Daniel Veillard21a0f912001-02-25 19:54:14 +000091#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000092#define XML_PARSER_BUFFER_SIZE 100
93
Daniel Veillard5997aca2002-03-18 18:36:20 +000094#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
95
Owen Taylor3473f882001-02-23 17:55:21 +000096/*
Owen Taylor3473f882001-02-23 17:55:21 +000097 * List of XML prefixed PI allowed by W3C specs
98 */
99
Daniel Veillardb44025c2001-10-11 22:55:55 +0000100static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000101 "xml-stylesheet",
102 NULL
103};
104
105/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000106xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
107 const xmlChar **str);
108
Daniel Veillard7d515752003-09-26 19:12:37 +0000109static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000110xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
111 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000112 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000113 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000114
Daniel Veillard81273902003-09-30 00:43:48 +0000115#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000116static void
117xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
118 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000119#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000120
Daniel Veillard7d515752003-09-26 19:12:37 +0000121static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000122xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
123 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000124
125/************************************************************************
126 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000127 * Some factorized error routines *
128 * *
129 ************************************************************************/
130
131/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000132 * xmlErrAttributeDup:
133 * @ctxt: an XML parser context
134 * @prefix: the attribute prefix
135 * @localname: the attribute localname
136 *
137 * Handle a redefinition of attribute error
138 */
139static void
140xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
141 const xmlChar * localname)
142{
143 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000144 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000145 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000146 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
147 (const char *) localname, NULL, NULL, 0, 0,
148 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000149 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000150 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000151 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
152 (const char *) prefix, (const char *) localname,
153 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
154 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000155 ctxt->wellFormed = 0;
156 if (ctxt->recovery == 0)
157 ctxt->disableSAX = 1;
158}
159
160/**
161 * xmlFatalErr:
162 * @ctxt: an XML parser context
163 * @error: the error number
164 * @extra: extra information string
165 *
166 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
167 */
168static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000169xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000170{
171 const char *errmsg;
172
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000173 switch (error) {
174 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000175 errmsg = "CharRef: invalid hexadecimal value\n";
176 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000178 errmsg = "CharRef: invalid decimal value\n";
179 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000181 errmsg = "CharRef: invalid value\n";
182 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000184 errmsg = "internal error";
185 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000187 errmsg = "PEReference at end of document\n";
188 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000189 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000190 errmsg = "PEReference in prolog\n";
191 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000192 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000193 errmsg = "PEReference in epilog\n";
194 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000195 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000196 errmsg = "PEReference: no name\n";
197 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000198 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000199 errmsg = "PEReference: expecting ';'\n";
200 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000201 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000202 errmsg = "Detected an entity reference loop\n";
203 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000204 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000205 errmsg = "EntityValue: \" or ' expected\n";
206 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000207 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000208 errmsg = "PEReferences forbidden in internal subset\n";
209 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000210 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000211 errmsg = "EntityValue: \" or ' expected\n";
212 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000213 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000214 errmsg = "AttValue: \" or ' expected\n";
215 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000216 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000217 errmsg = "Unescaped '<' not allowed in attributes values\n";
218 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000219 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000220 errmsg = "SystemLiteral \" or ' expected\n";
221 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000222 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000223 errmsg = "Unfinished System or Public ID \" or ' expected\n";
224 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000225 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000226 errmsg = "Sequence ']]>' not allowed in content\n";
227 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000228 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000229 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
230 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000231 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000232 errmsg = "PUBLIC, the Public Identifier is missing\n";
233 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000234 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000235 errmsg = "Comment must not contain '--' (double-hyphen)\n";
236 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000238 errmsg = "xmlParsePI : no target name\n";
239 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000240 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000241 errmsg = "Invalid PI name\n";
242 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000243 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000244 errmsg = "NOTATION: Name expected here\n";
245 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000246 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000247 errmsg = "'>' required to close NOTATION declaration\n";
248 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000249 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000250 errmsg = "Entity value required\n";
251 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000252 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000253 errmsg = "Fragment not allowed";
254 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000255 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000256 errmsg = "'(' required to start ATTLIST enumeration\n";
257 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000258 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000259 errmsg = "NmToken expected in ATTLIST enumeration\n";
260 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000261 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 errmsg = "')' required to finish ATTLIST enumeration\n";
263 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000264 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000265 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
266 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000267 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000268 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
269 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000270 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000271 errmsg = "ContentDecl : Name or '(' expected\n";
272 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000273 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000274 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
275 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000276 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000277 errmsg =
278 "PEReference: forbidden within markup decl in internal subset\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "expected '>'\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "XML conditional section '[' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "Content error in the external subset\n";
288 break;
289 case XML_ERR_CONDSEC_INVALID_KEYWORD:
290 errmsg =
291 "conditional section INCLUDE or IGNORE keyword expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section not closed\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Text declaration '<?xml' required\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "parsing XML declaration: '?>' expected\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "external parsed entities cannot be standalone\n";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "EntityRef: expecting ';'\n";
307 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000309 errmsg = "DOCTYPE improperly terminated\n";
310 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000311 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000312 errmsg = "EndTag: '</' not found\n";
313 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000314 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000315 errmsg = "expected '='\n";
316 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000318 errmsg = "String not closed expecting \" or '\n";
319 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000321 errmsg = "String not started expecting ' or \"\n";
322 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000323 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000324 errmsg = "Invalid XML encoding name\n";
325 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000326 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 errmsg = "standalone accepts only 'yes' or 'no'\n";
328 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000329 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 errmsg = "Document is empty\n";
331 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000332 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 errmsg = "Extra content at the end of the document\n";
334 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000335 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000336 errmsg = "chunk is not well balanced\n";
337 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000338 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 errmsg = "extra content at the end of well balanced chunk\n";
340 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000341 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 errmsg = "Malformed declaration expecting version\n";
343 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000344#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 case:
346 errmsg = "\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 default:
350 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 }
352 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000353 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
355 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 ctxt->wellFormed = 0;
357 if (ctxt->recovery == 0)
358 ctxt->disableSAX = 1;
359}
360
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000361/**
362 * xmlFatalErrMsg:
363 * @ctxt: an XML parser context
364 * @error: the error number
365 * @msg: the error message
366 *
367 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
368 */
369static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
371 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000372{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000373 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000374 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000376 ctxt->wellFormed = 0;
377 if (ctxt->recovery == 0)
378 ctxt->disableSAX = 1;
379}
380
381/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000382 * xmlWarningMsg:
383 * @ctxt: an XML parser context
384 * @error: the error number
385 * @msg: the error message
386 * @str1: extra data
387 * @str2: extra data
388 *
389 * Handle a warning.
390 */
391static void
392xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
393 const char *msg, const xmlChar *str1, const xmlChar *str2)
394{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000395 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000396
Daniel Veillard24eb9782003-10-04 21:08:09 +0000397 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000398 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000399 schannel = ctxt->sax->serror;
400 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000401 (ctxt->sax) ? ctxt->sax->warning : NULL,
402 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000403 ctxt, NULL, XML_FROM_PARSER, error,
404 XML_ERR_WARNING, NULL, 0,
405 (const char *) str1, (const char *) str2, NULL, 0, 0,
406 msg, (const char *) str1, (const char *) str2);
407}
408
409/**
410 * xmlValidityError:
411 * @ctxt: an XML parser context
412 * @error: the error number
413 * @msg: the error message
414 * @str1: extra data
415 *
416 * Handle a warning.
417 */
418static void
419xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
420 const char *msg, const xmlChar *str1)
421{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000422 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000423 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000424 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000425 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000426 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000427 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_DTD, error,
429 XML_ERR_ERROR, NULL, 0, (const char *) str1,
430 NULL, NULL, 0, 0,
431 msg, (const char *) str1);
432 ctxt->valid = 0;
433}
434
435/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000436 * xmlFatalErrMsgInt:
437 * @ctxt: an XML parser context
438 * @error: the error number
439 * @msg: the error message
440 * @val: an integer value
441 *
442 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
443 */
444static void
445xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000447{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000448 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000449 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
451 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000452 ctxt->wellFormed = 0;
453 if (ctxt->recovery == 0)
454 ctxt->disableSAX = 1;
455}
456
457/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000458 * xmlFatalErrMsgStrIntStr:
459 * @ctxt: an XML parser context
460 * @error: the error number
461 * @msg: the error message
462 * @str1: an string info
463 * @val: an integer value
464 * @str2: an string info
465 *
466 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
467 */
468static void
469xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
470 const char *msg, const xmlChar *str1, int val,
471 const xmlChar *str2)
472{
473 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000474 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000475 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
476 NULL, 0, (const char *) str1, (const char *) str2,
477 NULL, val, 0, msg, str1, val, str2);
478 ctxt->wellFormed = 0;
479 if (ctxt->recovery == 0)
480 ctxt->disableSAX = 1;
481}
482
483/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000484 * xmlFatalErrMsgStr:
485 * @ctxt: an XML parser context
486 * @error: the error number
487 * @msg: the error message
488 * @val: a string value
489 *
490 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
491 */
492static void
493xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000495{
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000496 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000497 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 XML_FROM_PARSER, error, XML_ERR_FATAL,
499 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
500 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000501 ctxt->wellFormed = 0;
502 if (ctxt->recovery == 0)
503 ctxt->disableSAX = 1;
504}
505
506/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000507 * xmlErrMsgStr:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 * @val: a string value
512 *
513 * Handle a non fatal parser error
514 */
515static void
516xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar * val)
518{
519 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000520 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000521 XML_FROM_PARSER, error, XML_ERR_ERROR,
522 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
523 val);
524}
525
526/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000527 * xmlNsErr:
528 * @ctxt: an XML parser context
529 * @error: the error number
530 * @msg: the message
531 * @info1: extra information string
532 * @info2: extra information string
533 *
534 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
535 */
536static void
537xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
538 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const xmlChar * info1, const xmlChar * info2,
540 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000541{
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000542 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000543 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000544 XML_ERR_ERROR, NULL, 0, (const char *) info1,
545 (const char *) info2, (const char *) info3, 0, 0, msg,
546 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000547 ctxt->nsWellFormed = 0;
548}
549
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000550/************************************************************************
551 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000552 * SAX2 defaulted attributes handling *
553 * *
554 ************************************************************************/
555
556/**
557 * xmlDetectSAX2:
558 * @ctxt: an XML parser context
559 *
560 * Do the SAX2 detection and specific intialization
561 */
562static void
563xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
564 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000565#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000566 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
567 ((ctxt->sax->startElementNs != NULL) ||
568 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000569#else
570 ctxt->sax2 = 1;
571#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000572
573 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
574 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
575 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
576}
577
Daniel Veillarde57ec792003-09-10 10:50:59 +0000578typedef struct _xmlDefAttrs xmlDefAttrs;
579typedef xmlDefAttrs *xmlDefAttrsPtr;
580struct _xmlDefAttrs {
581 int nbAttrs; /* number of defaulted attributes on that element */
582 int maxAttrs; /* the size of the array */
583 const xmlChar *values[4]; /* array of localname/prefix/values */
584};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000585
586/**
587 * xmlAddDefAttrs:
588 * @ctxt: an XML parser context
589 * @fullname: the element fullname
590 * @fullattr: the attribute fullname
591 * @value: the attribute value
592 *
593 * Add a defaulted attribute for an element
594 */
595static void
596xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
597 const xmlChar *fullname,
598 const xmlChar *fullattr,
599 const xmlChar *value) {
600 xmlDefAttrsPtr defaults;
601 int len;
602 const xmlChar *name;
603 const xmlChar *prefix;
604
605 if (ctxt->attsDefault == NULL) {
606 ctxt->attsDefault = xmlHashCreate(10);
607 if (ctxt->attsDefault == NULL)
608 goto mem_error;
609 }
610
611 /*
612 * plit the element name into prefix:localname , the string found
613 * are within the DTD and hen not associated to namespace names.
614 */
615 name = xmlSplitQName3(fullname, &len);
616 if (name == NULL) {
617 name = xmlDictLookup(ctxt->dict, fullname, -1);
618 prefix = NULL;
619 } else {
620 name = xmlDictLookup(ctxt->dict, name, -1);
621 prefix = xmlDictLookup(ctxt->dict, fullname, len);
622 }
623
624 /*
625 * make sure there is some storage
626 */
627 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
628 if (defaults == NULL) {
629 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
630 12 * sizeof(const xmlChar *));
631 if (defaults == NULL)
632 goto mem_error;
633 defaults->maxAttrs = 4;
634 defaults->nbAttrs = 0;
635 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
636 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
637 defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
638 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
639 if (defaults == NULL)
640 goto mem_error;
641 defaults->maxAttrs *= 2;
642 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
643 }
644
645 /*
646 * plit the element name into prefix:localname , the string found
647 * are within the DTD and hen not associated to namespace names.
648 */
649 name = xmlSplitQName3(fullattr, &len);
650 if (name == NULL) {
651 name = xmlDictLookup(ctxt->dict, fullattr, -1);
652 prefix = NULL;
653 } else {
654 name = xmlDictLookup(ctxt->dict, name, -1);
655 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
656 }
657
658 defaults->values[4 * defaults->nbAttrs] = name;
659 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
660 /* intern the string and precompute the end */
661 len = xmlStrlen(value);
662 value = xmlDictLookup(ctxt->dict, value, len);
663 defaults->values[4 * defaults->nbAttrs + 2] = value;
664 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
665 defaults->nbAttrs++;
666
667 return;
668
669mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000670 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 return;
672}
673
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000674/**
675 * xmlAddSpecialAttr:
676 * @ctxt: an XML parser context
677 * @fullname: the element fullname
678 * @fullattr: the attribute fullname
679 * @type: the attribute type
680 *
681 * Register that this attribute is not CDATA
682 */
683static void
684xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
685 const xmlChar *fullname,
686 const xmlChar *fullattr,
687 int type)
688{
689 if (ctxt->attsSpecial == NULL) {
690 ctxt->attsSpecial = xmlHashCreate(10);
691 if (ctxt->attsSpecial == NULL)
692 goto mem_error;
693 }
694
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000695 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
696 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000697 return;
698
699mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000700 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000701 return;
702}
703
Daniel Veillard4432df22003-09-28 18:58:27 +0000704/**
705 * xmlCheckLanguageID:
706 * @lang: pointer to the string value
707 *
708 * Checks that the value conforms to the LanguageID production:
709 *
710 * NOTE: this is somewhat deprecated, those productions were removed from
711 * the XML Second edition.
712 *
713 * [33] LanguageID ::= Langcode ('-' Subcode)*
714 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
715 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
716 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
717 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
718 * [38] Subcode ::= ([a-z] | [A-Z])+
719 *
720 * Returns 1 if correct 0 otherwise
721 **/
722int
723xmlCheckLanguageID(const xmlChar * lang)
724{
725 const xmlChar *cur = lang;
726
727 if (cur == NULL)
728 return (0);
729 if (((cur[0] == 'i') && (cur[1] == '-')) ||
730 ((cur[0] == 'I') && (cur[1] == '-'))) {
731 /*
732 * IANA code
733 */
734 cur += 2;
735 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
736 ((cur[0] >= 'a') && (cur[0] <= 'z')))
737 cur++;
738 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
739 ((cur[0] == 'X') && (cur[1] == '-'))) {
740 /*
741 * User code
742 */
743 cur += 2;
744 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
745 ((cur[0] >= 'a') && (cur[0] <= 'z')))
746 cur++;
747 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
748 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
749 /*
750 * ISO639
751 */
752 cur++;
753 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
754 ((cur[0] >= 'a') && (cur[0] <= 'z')))
755 cur++;
756 else
757 return (0);
758 } else
759 return (0);
760 while (cur[0] != 0) { /* non input consuming */
761 if (cur[0] != '-')
762 return (0);
763 cur++;
764 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
765 ((cur[0] >= 'a') && (cur[0] <= 'z')))
766 cur++;
767 else
768 return (0);
769 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
770 ((cur[0] >= 'a') && (cur[0] <= 'z')))
771 cur++;
772 }
773 return (1);
774}
775
Owen Taylor3473f882001-02-23 17:55:21 +0000776/************************************************************************
777 * *
778 * Parser stacks related functions and macros *
779 * *
780 ************************************************************************/
781
782xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
783 const xmlChar ** str);
784
Daniel Veillard0fb18932003-09-07 09:14:37 +0000785#ifdef SAX2
786/**
787 * nsPush:
788 * @ctxt: an XML parser context
789 * @prefix: the namespace prefix or NULL
790 * @URL: the namespace name
791 *
792 * Pushes a new parser namespace on top of the ns stack
793 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000794 * Returns -1 in case of error, -2 if the namespace should be discarded
795 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000796 */
797static int
798nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
799{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000800 if (ctxt->options & XML_PARSE_NSCLEAN) {
801 int i;
802 for (i = 0;i < ctxt->nsNr;i += 2) {
803 if (ctxt->nsTab[i] == prefix) {
804 /* in scope */
805 if (ctxt->nsTab[i + 1] == URL)
806 return(-2);
807 /* out of scope keep it */
808 break;
809 }
810 }
811 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000812 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
813 ctxt->nsMax = 10;
814 ctxt->nsNr = 0;
815 ctxt->nsTab = (const xmlChar **)
816 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
817 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000818 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000819 ctxt->nsMax = 0;
820 return (-1);
821 }
822 } else if (ctxt->nsNr >= ctxt->nsMax) {
823 ctxt->nsMax *= 2;
824 ctxt->nsTab = (const xmlChar **)
825 xmlRealloc(ctxt->nsTab,
826 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
827 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000828 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000829 ctxt->nsMax /= 2;
830 return (-1);
831 }
832 }
833 ctxt->nsTab[ctxt->nsNr++] = prefix;
834 ctxt->nsTab[ctxt->nsNr++] = URL;
835 return (ctxt->nsNr);
836}
837/**
838 * nsPop:
839 * @ctxt: an XML parser context
840 * @nr: the number to pop
841 *
842 * Pops the top @nr parser prefix/namespace from the ns stack
843 *
844 * Returns the number of namespaces removed
845 */
846static int
847nsPop(xmlParserCtxtPtr ctxt, int nr)
848{
849 int i;
850
851 if (ctxt->nsTab == NULL) return(0);
852 if (ctxt->nsNr < nr) {
853 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
854 nr = ctxt->nsNr;
855 }
856 if (ctxt->nsNr <= 0)
857 return (0);
858
859 for (i = 0;i < nr;i++) {
860 ctxt->nsNr--;
861 ctxt->nsTab[ctxt->nsNr] = NULL;
862 }
863 return(nr);
864}
865#endif
866
867static int
868xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
869 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000870 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000871 int maxatts;
872
873 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000874 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000875 atts = (const xmlChar **)
876 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000878 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000879 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
880 if (attallocs == NULL) goto mem_error;
881 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000882 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 } else if (nr + 5 > ctxt->maxatts) {
884 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000885 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
886 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000887 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000888 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000889 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
890 (maxatts / 5) * sizeof(int));
891 if (attallocs == NULL) goto mem_error;
892 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000893 ctxt->maxatts = maxatts;
894 }
895 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000896mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000897 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000898 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000899}
900
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000901/**
902 * inputPush:
903 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000904 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000905 *
906 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000907 *
908 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000909 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000910extern int
911inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
912{
913 if (ctxt->inputNr >= ctxt->inputMax) {
914 ctxt->inputMax *= 2;
915 ctxt->inputTab =
916 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
917 ctxt->inputMax *
918 sizeof(ctxt->inputTab[0]));
919 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000920 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000921 return (0);
922 }
923 }
924 ctxt->inputTab[ctxt->inputNr] = value;
925 ctxt->input = value;
926 return (ctxt->inputNr++);
927}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000928/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000929 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000930 * @ctxt: an XML parser context
931 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000932 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000933 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000934 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000935 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000936extern xmlParserInputPtr
937inputPop(xmlParserCtxtPtr ctxt)
938{
939 xmlParserInputPtr ret;
940
941 if (ctxt->inputNr <= 0)
942 return (0);
943 ctxt->inputNr--;
944 if (ctxt->inputNr > 0)
945 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
946 else
947 ctxt->input = NULL;
948 ret = ctxt->inputTab[ctxt->inputNr];
949 ctxt->inputTab[ctxt->inputNr] = 0;
950 return (ret);
951}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000952/**
953 * nodePush:
954 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000955 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000956 *
957 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000958 *
959 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000960 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000961extern int
962nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
963{
964 if (ctxt->nodeNr >= ctxt->nodeMax) {
965 ctxt->nodeMax *= 2;
966 ctxt->nodeTab =
967 (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
968 ctxt->nodeMax *
969 sizeof(ctxt->nodeTab[0]));
970 if (ctxt->nodeTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000971 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000972 return (0);
973 }
974 }
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000975#ifdef MAX_DEPTH
976 if (ctxt->nodeNr > MAX_DEPTH) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000977 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000978 "Excessive depth in document: change MAX_DEPTH = %d\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000979 MAX_DEPTH);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000980 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000981 return(0);
982 }
983#endif
Daniel Veillard1c732d22002-11-30 11:22:59 +0000984 ctxt->nodeTab[ctxt->nodeNr] = value;
985 ctxt->node = value;
986 return (ctxt->nodeNr++);
987}
988/**
989 * nodePop:
990 * @ctxt: an XML parser context
991 *
992 * Pops the top element node from the node stack
993 *
994 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +0000995 */
Daniel Veillard1c732d22002-11-30 11:22:59 +0000996extern xmlNodePtr
997nodePop(xmlParserCtxtPtr ctxt)
998{
999 xmlNodePtr ret;
1000
1001 if (ctxt->nodeNr <= 0)
1002 return (0);
1003 ctxt->nodeNr--;
1004 if (ctxt->nodeNr > 0)
1005 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1006 else
1007 ctxt->node = NULL;
1008 ret = ctxt->nodeTab[ctxt->nodeNr];
1009 ctxt->nodeTab[ctxt->nodeNr] = 0;
1010 return (ret);
1011}
1012/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001013 * nameNsPush:
1014 * @ctxt: an XML parser context
1015 * @value: the element name
1016 * @prefix: the element prefix
1017 * @URI: the element namespace name
1018 *
1019 * Pushes a new element name/prefix/URL on top of the name stack
1020 *
1021 * Returns -1 in case of error, the index in the stack otherwise
1022 */
1023static int
1024nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1025 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1026{
1027 if (ctxt->nameNr >= ctxt->nameMax) {
1028 const xmlChar * *tmp;
1029 void **tmp2;
1030 ctxt->nameMax *= 2;
1031 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1032 ctxt->nameMax *
1033 sizeof(ctxt->nameTab[0]));
1034 if (tmp == NULL) {
1035 ctxt->nameMax /= 2;
1036 goto mem_error;
1037 }
1038 ctxt->nameTab = tmp;
1039 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1040 ctxt->nameMax * 3 *
1041 sizeof(ctxt->pushTab[0]));
1042 if (tmp2 == NULL) {
1043 ctxt->nameMax /= 2;
1044 goto mem_error;
1045 }
1046 ctxt->pushTab = tmp2;
1047 }
1048 ctxt->nameTab[ctxt->nameNr] = value;
1049 ctxt->name = value;
1050 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1051 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001052 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001053 return (ctxt->nameNr++);
1054mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001055 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056 return (-1);
1057}
1058/**
1059 * nameNsPop:
1060 * @ctxt: an XML parser context
1061 *
1062 * Pops the top element/prefix/URI name from the name stack
1063 *
1064 * Returns the name just removed
1065 */
1066static const xmlChar *
1067nameNsPop(xmlParserCtxtPtr ctxt)
1068{
1069 const xmlChar *ret;
1070
1071 if (ctxt->nameNr <= 0)
1072 return (0);
1073 ctxt->nameNr--;
1074 if (ctxt->nameNr > 0)
1075 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1076 else
1077 ctxt->name = NULL;
1078 ret = ctxt->nameTab[ctxt->nameNr];
1079 ctxt->nameTab[ctxt->nameNr] = NULL;
1080 return (ret);
1081}
1082
1083/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001084 * namePush:
1085 * @ctxt: an XML parser context
1086 * @value: the element name
1087 *
1088 * Pushes a new element name on top of the name stack
1089 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001090 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001091 */
1092extern int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001093namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001094{
1095 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001096 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001097 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001098 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001099 ctxt->nameMax *
1100 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001101 if (tmp == NULL) {
1102 ctxt->nameMax /= 2;
1103 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001104 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001105 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001106 }
1107 ctxt->nameTab[ctxt->nameNr] = value;
1108 ctxt->name = value;
1109 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001110mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001111 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001112 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001113}
1114/**
1115 * namePop:
1116 * @ctxt: an XML parser context
1117 *
1118 * Pops the top element name from the name stack
1119 *
1120 * Returns the name just removed
1121 */
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001122extern const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001123namePop(xmlParserCtxtPtr ctxt)
1124{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001125 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001126
1127 if (ctxt->nameNr <= 0)
1128 return (0);
1129 ctxt->nameNr--;
1130 if (ctxt->nameNr > 0)
1131 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1132 else
1133 ctxt->name = NULL;
1134 ret = ctxt->nameTab[ctxt->nameNr];
1135 ctxt->nameTab[ctxt->nameNr] = 0;
1136 return (ret);
1137}
Owen Taylor3473f882001-02-23 17:55:21 +00001138
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001139static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001140 if (ctxt->spaceNr >= ctxt->spaceMax) {
1141 ctxt->spaceMax *= 2;
1142 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1143 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1144 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001145 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001146 return(0);
1147 }
1148 }
1149 ctxt->spaceTab[ctxt->spaceNr] = val;
1150 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1151 return(ctxt->spaceNr++);
1152}
1153
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001154static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001155 int ret;
1156 if (ctxt->spaceNr <= 0) return(0);
1157 ctxt->spaceNr--;
1158 if (ctxt->spaceNr > 0)
1159 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1160 else
1161 ctxt->space = NULL;
1162 ret = ctxt->spaceTab[ctxt->spaceNr];
1163 ctxt->spaceTab[ctxt->spaceNr] = -1;
1164 return(ret);
1165}
1166
1167/*
1168 * Macros for accessing the content. Those should be used only by the parser,
1169 * and not exported.
1170 *
1171 * Dirty macros, i.e. one often need to make assumption on the context to
1172 * use them
1173 *
1174 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1175 * To be used with extreme caution since operations consuming
1176 * characters may move the input buffer to a different location !
1177 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1178 * This should be used internally by the parser
1179 * only to compare to ASCII values otherwise it would break when
1180 * running with UTF-8 encoding.
1181 * RAW same as CUR but in the input buffer, bypass any token
1182 * extraction that may have been done
1183 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1184 * to compare on ASCII based substring.
1185 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001186 * strings without newlines within the parser.
1187 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1188 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001189 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1190 *
1191 * NEXT Skip to the next character, this does the proper decoding
1192 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001193 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001194 * CUR_CHAR(l) returns the current unicode character (int), set l
1195 * to the number of xmlChars used for the encoding [0-5].
1196 * CUR_SCHAR same but operate on a string instead of the context
1197 * COPY_BUF copy the current unicode char to the target buffer, increment
1198 * the index
1199 * GROW, SHRINK handling of input buffers
1200 */
1201
Daniel Veillardfdc91562002-07-01 21:52:03 +00001202#define RAW (*ctxt->input->cur)
1203#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001204#define NXT(val) ctxt->input->cur[(val)]
1205#define CUR_PTR ctxt->input->cur
1206
1207#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001208 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001209 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001210 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001211 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1212 xmlPopInput(ctxt); \
1213 } while (0)
1214
Daniel Veillarda880b122003-04-21 21:36:41 +00001215#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001216 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1217 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001218 xmlSHRINK (ctxt);
1219
1220static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1221 xmlParserInputShrink(ctxt->input);
1222 if ((*ctxt->input->cur == 0) &&
1223 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1224 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001225 }
Owen Taylor3473f882001-02-23 17:55:21 +00001226
Daniel Veillarda880b122003-04-21 21:36:41 +00001227#define GROW if ((ctxt->progressive == 0) && \
1228 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001229 xmlGROW (ctxt);
1230
1231static void xmlGROW (xmlParserCtxtPtr ctxt) {
1232 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1233 if ((*ctxt->input->cur == 0) &&
1234 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1235 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001236}
Owen Taylor3473f882001-02-23 17:55:21 +00001237
1238#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1239
1240#define NEXT xmlNextChar(ctxt)
1241
Daniel Veillard21a0f912001-02-25 19:54:14 +00001242#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001243 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001244 ctxt->input->cur++; \
1245 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001246 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1248 }
1249
Owen Taylor3473f882001-02-23 17:55:21 +00001250#define NEXTL(l) do { \
1251 if (*(ctxt->input->cur) == '\n') { \
1252 ctxt->input->line++; ctxt->input->col = 1; \
1253 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001254 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001255 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001256 } while (0)
1257
1258#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1259#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1260
1261#define COPY_BUF(l,b,i,v) \
1262 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001263 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001264
1265/**
1266 * xmlSkipBlankChars:
1267 * @ctxt: the XML parser context
1268 *
1269 * skip all blanks character found at that point in the input streams.
1270 * It pops up finished entities in the process if allowable at that point.
1271 *
1272 * Returns the number of space chars skipped
1273 */
1274
1275int
1276xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001277 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001278
1279 /*
1280 * It's Okay to use CUR/NEXT here since all the blanks are on
1281 * the ASCII range.
1282 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001283 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1284 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001285 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001286 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001287 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001288 cur = ctxt->input->cur;
1289 while (IS_BLANK(*cur)) {
1290 if (*cur == '\n') {
1291 ctxt->input->line++; ctxt->input->col = 1;
1292 }
1293 cur++;
1294 res++;
1295 if (*cur == 0) {
1296 ctxt->input->cur = cur;
1297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1298 cur = ctxt->input->cur;
1299 }
1300 }
1301 ctxt->input->cur = cur;
1302 } else {
1303 int cur;
1304 do {
1305 cur = CUR;
1306 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1307 NEXT;
1308 cur = CUR;
1309 res++;
1310 }
1311 while ((cur == 0) && (ctxt->inputNr > 1) &&
1312 (ctxt->instate != XML_PARSER_COMMENT)) {
1313 xmlPopInput(ctxt);
1314 cur = CUR;
1315 }
1316 /*
1317 * Need to handle support of entities branching here
1318 */
1319 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1320 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1321 }
Owen Taylor3473f882001-02-23 17:55:21 +00001322 return(res);
1323}
1324
1325/************************************************************************
1326 * *
1327 * Commodity functions to handle entities *
1328 * *
1329 ************************************************************************/
1330
1331/**
1332 * xmlPopInput:
1333 * @ctxt: an XML parser context
1334 *
1335 * xmlPopInput: the current input pointed by ctxt->input came to an end
1336 * pop it and return the next char.
1337 *
1338 * Returns the current xmlChar in the parser context
1339 */
1340xmlChar
1341xmlPopInput(xmlParserCtxtPtr ctxt) {
1342 if (ctxt->inputNr == 1) return(0); /* End of main Input */
1343 if (xmlParserDebugEntities)
1344 xmlGenericError(xmlGenericErrorContext,
1345 "Popping input %d\n", ctxt->inputNr);
1346 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001347 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001348 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1349 return(xmlPopInput(ctxt));
1350 return(CUR);
1351}
1352
1353/**
1354 * xmlPushInput:
1355 * @ctxt: an XML parser context
1356 * @input: an XML parser input fragment (entity, XML fragment ...).
1357 *
1358 * xmlPushInput: switch to a new input stream which is stacked on top
1359 * of the previous one(s).
1360 */
1361void
1362xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1363 if (input == NULL) return;
1364
1365 if (xmlParserDebugEntities) {
1366 if ((ctxt->input != NULL) && (ctxt->input->filename))
1367 xmlGenericError(xmlGenericErrorContext,
1368 "%s(%d): ", ctxt->input->filename,
1369 ctxt->input->line);
1370 xmlGenericError(xmlGenericErrorContext,
1371 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1372 }
1373 inputPush(ctxt, input);
1374 GROW;
1375}
1376
1377/**
1378 * xmlParseCharRef:
1379 * @ctxt: an XML parser context
1380 *
1381 * parse Reference declarations
1382 *
1383 * [66] CharRef ::= '&#' [0-9]+ ';' |
1384 * '&#x' [0-9a-fA-F]+ ';'
1385 *
1386 * [ WFC: Legal Character ]
1387 * Characters referred to using character references must match the
1388 * production for Char.
1389 *
1390 * Returns the value parsed (as an int), 0 in case of error
1391 */
1392int
1393xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001394 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001395 int count = 0;
1396
Owen Taylor3473f882001-02-23 17:55:21 +00001397 /*
1398 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1399 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001400 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001401 (NXT(2) == 'x')) {
1402 SKIP(3);
1403 GROW;
1404 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001405 if (count++ > 20) {
1406 count = 0;
1407 GROW;
1408 }
1409 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001410 val = val * 16 + (CUR - '0');
1411 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1412 val = val * 16 + (CUR - 'a') + 10;
1413 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1414 val = val * 16 + (CUR - 'A') + 10;
1415 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001416 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001417 val = 0;
1418 break;
1419 }
1420 NEXT;
1421 count++;
1422 }
1423 if (RAW == ';') {
1424 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001425 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001426 ctxt->nbChars ++;
1427 ctxt->input->cur++;
1428 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001429 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001430 SKIP(2);
1431 GROW;
1432 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001433 if (count++ > 20) {
1434 count = 0;
1435 GROW;
1436 }
1437 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001438 val = val * 10 + (CUR - '0');
1439 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001440 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001441 val = 0;
1442 break;
1443 }
1444 NEXT;
1445 count++;
1446 }
1447 if (RAW == ';') {
1448 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001449 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001450 ctxt->nbChars ++;
1451 ctxt->input->cur++;
1452 }
1453 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001454 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001455 }
1456
1457 /*
1458 * [ WFC: Legal Character ]
1459 * Characters referred to using character references must match the
1460 * production for Char.
1461 */
Daniel Veillard73b013f2003-09-30 12:36:01 +00001462 if (xmlIsChar(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001463 return(val);
1464 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001465 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1466 "xmlParseCharRef: invalid xmlChar value %d\n",
1467 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001468 }
1469 return(0);
1470}
1471
1472/**
1473 * xmlParseStringCharRef:
1474 * @ctxt: an XML parser context
1475 * @str: a pointer to an index in the string
1476 *
1477 * parse Reference declarations, variant parsing from a string rather
1478 * than an an input flow.
1479 *
1480 * [66] CharRef ::= '&#' [0-9]+ ';' |
1481 * '&#x' [0-9a-fA-F]+ ';'
1482 *
1483 * [ WFC: Legal Character ]
1484 * Characters referred to using character references must match the
1485 * production for Char.
1486 *
1487 * Returns the value parsed (as an int), 0 in case of error, str will be
1488 * updated to the current value of the index
1489 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001490static int
Owen Taylor3473f882001-02-23 17:55:21 +00001491xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1492 const xmlChar *ptr;
1493 xmlChar cur;
1494 int val = 0;
1495
1496 if ((str == NULL) || (*str == NULL)) return(0);
1497 ptr = *str;
1498 cur = *ptr;
1499 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1500 ptr += 3;
1501 cur = *ptr;
1502 while (cur != ';') { /* Non input consuming loop */
1503 if ((cur >= '0') && (cur <= '9'))
1504 val = val * 16 + (cur - '0');
1505 else if ((cur >= 'a') && (cur <= 'f'))
1506 val = val * 16 + (cur - 'a') + 10;
1507 else if ((cur >= 'A') && (cur <= 'F'))
1508 val = val * 16 + (cur - 'A') + 10;
1509 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001510 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001511 val = 0;
1512 break;
1513 }
1514 ptr++;
1515 cur = *ptr;
1516 }
1517 if (cur == ';')
1518 ptr++;
1519 } else if ((cur == '&') && (ptr[1] == '#')){
1520 ptr += 2;
1521 cur = *ptr;
1522 while (cur != ';') { /* Non input consuming loops */
1523 if ((cur >= '0') && (cur <= '9'))
1524 val = val * 10 + (cur - '0');
1525 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001526 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001527 val = 0;
1528 break;
1529 }
1530 ptr++;
1531 cur = *ptr;
1532 }
1533 if (cur == ';')
1534 ptr++;
1535 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001536 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001537 return(0);
1538 }
1539 *str = ptr;
1540
1541 /*
1542 * [ WFC: Legal Character ]
1543 * Characters referred to using character references must match the
1544 * production for Char.
1545 */
Daniel Veillard73b013f2003-09-30 12:36:01 +00001546 if (xmlIsChar(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00001547 return(val);
1548 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001549 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1550 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1551 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001552 }
1553 return(0);
1554}
1555
1556/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001557 * xmlNewBlanksWrapperInputStream:
1558 * @ctxt: an XML parser context
1559 * @entity: an Entity pointer
1560 *
1561 * Create a new input stream for wrapping
1562 * blanks around a PEReference
1563 *
1564 * Returns the new input stream or NULL
1565 */
1566
1567static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1568
Daniel Veillardf4862f02002-09-10 11:13:43 +00001569static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001570xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1571 xmlParserInputPtr input;
1572 xmlChar *buffer;
1573 size_t length;
1574 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001575 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1576 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001577 return(NULL);
1578 }
1579 if (xmlParserDebugEntities)
1580 xmlGenericError(xmlGenericErrorContext,
1581 "new blanks wrapper for entity: %s\n", entity->name);
1582 input = xmlNewInputStream(ctxt);
1583 if (input == NULL) {
1584 return(NULL);
1585 }
1586 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001587 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001588 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001589 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001590 return(NULL);
1591 }
1592 buffer [0] = ' ';
1593 buffer [1] = '%';
1594 buffer [length-3] = ';';
1595 buffer [length-2] = ' ';
1596 buffer [length-1] = 0;
1597 memcpy(buffer + 2, entity->name, length - 5);
1598 input->free = deallocblankswrapper;
1599 input->base = buffer;
1600 input->cur = buffer;
1601 input->length = length;
1602 input->end = &buffer[length];
1603 return(input);
1604}
1605
1606/**
Owen Taylor3473f882001-02-23 17:55:21 +00001607 * xmlParserHandlePEReference:
1608 * @ctxt: the parser context
1609 *
1610 * [69] PEReference ::= '%' Name ';'
1611 *
1612 * [ WFC: No Recursion ]
1613 * A parsed entity must not contain a recursive
1614 * reference to itself, either directly or indirectly.
1615 *
1616 * [ WFC: Entity Declared ]
1617 * In a document without any DTD, a document with only an internal DTD
1618 * subset which contains no parameter entity references, or a document
1619 * with "standalone='yes'", ... ... The declaration of a parameter
1620 * entity must precede any reference to it...
1621 *
1622 * [ VC: Entity Declared ]
1623 * In a document with an external subset or external parameter entities
1624 * with "standalone='no'", ... ... The declaration of a parameter entity
1625 * must precede any reference to it...
1626 *
1627 * [ WFC: In DTD ]
1628 * Parameter-entity references may only appear in the DTD.
1629 * NOTE: misleading but this is handled.
1630 *
1631 * A PEReference may have been detected in the current input stream
1632 * the handling is done accordingly to
1633 * http://www.w3.org/TR/REC-xml#entproc
1634 * i.e.
1635 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001636 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001637 */
1638void
1639xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001640 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001641 xmlEntityPtr entity = NULL;
1642 xmlParserInputPtr input;
1643
Owen Taylor3473f882001-02-23 17:55:21 +00001644 if (RAW != '%') return;
1645 switch(ctxt->instate) {
1646 case XML_PARSER_CDATA_SECTION:
1647 return;
1648 case XML_PARSER_COMMENT:
1649 return;
1650 case XML_PARSER_START_TAG:
1651 return;
1652 case XML_PARSER_END_TAG:
1653 return;
1654 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001655 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001656 return;
1657 case XML_PARSER_PROLOG:
1658 case XML_PARSER_START:
1659 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001660 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001661 return;
1662 case XML_PARSER_ENTITY_DECL:
1663 case XML_PARSER_CONTENT:
1664 case XML_PARSER_ATTRIBUTE_VALUE:
1665 case XML_PARSER_PI:
1666 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001667 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001668 /* we just ignore it there */
1669 return;
1670 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001671 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001672 return;
1673 case XML_PARSER_ENTITY_VALUE:
1674 /*
1675 * NOTE: in the case of entity values, we don't do the
1676 * substitution here since we need the literal
1677 * entity value to be able to save the internal
1678 * subset of the document.
1679 * This will be handled by xmlStringDecodeEntities
1680 */
1681 return;
1682 case XML_PARSER_DTD:
1683 /*
1684 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1685 * In the internal DTD subset, parameter-entity references
1686 * can occur only where markup declarations can occur, not
1687 * within markup declarations.
1688 * In that case this is handled in xmlParseMarkupDecl
1689 */
1690 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1691 return;
Daniel Veillardf5582f12002-06-11 10:08:16 +00001692 if (IS_BLANK(NXT(1)) || NXT(1) == 0)
1693 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001694 break;
1695 case XML_PARSER_IGNORE:
1696 return;
1697 }
1698
1699 NEXT;
1700 name = xmlParseName(ctxt);
1701 if (xmlParserDebugEntities)
1702 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001703 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001704 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001705 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001706 } else {
1707 if (RAW == ';') {
1708 NEXT;
1709 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1710 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1711 if (entity == NULL) {
1712
1713 /*
1714 * [ WFC: Entity Declared ]
1715 * In a document without any DTD, a document with only an
1716 * internal DTD subset which contains no parameter entity
1717 * references, or a document with "standalone='yes'", ...
1718 * ... The declaration of a parameter entity must precede
1719 * any reference to it...
1720 */
1721 if ((ctxt->standalone == 1) ||
1722 ((ctxt->hasExternalSubset == 0) &&
1723 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001724 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001725 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001726 } else {
1727 /*
1728 * [ VC: Entity Declared ]
1729 * In a document with an external subset or external
1730 * parameter entities with "standalone='no'", ...
1731 * ... The declaration of a parameter entity must precede
1732 * any reference to it...
1733 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001734 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1735 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1736 "PEReference: %%%s; not found\n",
1737 name);
1738 } else
1739 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1740 "PEReference: %%%s; not found\n",
1741 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001742 ctxt->valid = 0;
1743 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001744 } else if (ctxt->input->free != deallocblankswrapper) {
1745 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1746 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001747 } else {
1748 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1749 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001750 xmlChar start[4];
1751 xmlCharEncoding enc;
1752
Owen Taylor3473f882001-02-23 17:55:21 +00001753 /*
1754 * handle the extra spaces added before and after
1755 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001756 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001757 */
1758 input = xmlNewEntityInputStream(ctxt, entity);
1759 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001760
1761 /*
1762 * Get the 4 first bytes and decode the charset
1763 * if enc != XML_CHAR_ENCODING_NONE
1764 * plug some encoding conversion routines.
1765 */
1766 GROW
Daniel Veillarde059b892002-06-13 15:32:10 +00001767 if (entity->length >= 4) {
1768 start[0] = RAW;
1769 start[1] = NXT(1);
1770 start[2] = NXT(2);
1771 start[3] = NXT(3);
1772 enc = xmlDetectCharEncoding(start, 4);
1773 if (enc != XML_CHAR_ENCODING_NONE) {
1774 xmlSwitchEncoding(ctxt, enc);
1775 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001776 }
1777
Owen Taylor3473f882001-02-23 17:55:21 +00001778 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillard8f597c32003-10-06 08:19:27 +00001779 (memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001780 xmlParseTextDecl(ctxt);
1781 }
Owen Taylor3473f882001-02-23 17:55:21 +00001782 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001783 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1784 "PEReference: %s is not a parameter entity\n",
1785 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001786 }
1787 }
1788 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001789 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001790 }
Owen Taylor3473f882001-02-23 17:55:21 +00001791 }
1792}
1793
1794/*
1795 * Macro used to grow the current buffer.
1796 */
1797#define growBuffer(buffer) { \
1798 buffer##_size *= 2; \
1799 buffer = (xmlChar *) \
1800 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001801 if (buffer == NULL) goto mem_error; \
Owen Taylor3473f882001-02-23 17:55:21 +00001802}
1803
1804/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001805 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001806 * @ctxt: the parser context
1807 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001808 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001809 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1810 * @end: an end marker xmlChar, 0 if none
1811 * @end2: an end marker xmlChar, 0 if none
1812 * @end3: an end marker xmlChar, 0 if none
1813 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001814 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001815 *
1816 * [67] Reference ::= EntityRef | CharRef
1817 *
1818 * [69] PEReference ::= '%' Name ';'
1819 *
1820 * Returns A newly allocated string with the substitution done. The caller
1821 * must deallocate it !
1822 */
1823xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1825 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001826 xmlChar *buffer = NULL;
1827 int buffer_size = 0;
1828
1829 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001830 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001831 xmlEntityPtr ent;
1832 int c,l;
1833 int nbchars = 0;
1834
Daniel Veillarde57ec792003-09-10 10:50:59 +00001835 if ((str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001836 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001837 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001838
1839 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001840 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001841 return(NULL);
1842 }
1843
1844 /*
1845 * allocate a translation buffer.
1846 */
1847 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001848 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001849 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001850
1851 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001852 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001853 * we are operating on already parsed values.
1854 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001855 if (str < last)
1856 c = CUR_SCHAR(str, l);
1857 else
1858 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001859 while ((c != 0) && (c != end) && /* non input consuming loop */
1860 (c != end2) && (c != end3)) {
1861
1862 if (c == 0) break;
1863 if ((c == '&') && (str[1] == '#')) {
1864 int val = xmlParseStringCharRef(ctxt, &str);
1865 if (val != 0) {
1866 COPY_BUF(0,buffer,nbchars,val);
1867 }
1868 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1869 if (xmlParserDebugEntities)
1870 xmlGenericError(xmlGenericErrorContext,
1871 "String decoding Entity Reference: %.30s\n",
1872 str);
1873 ent = xmlParseStringEntityRef(ctxt, &str);
1874 if ((ent != NULL) &&
1875 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1876 if (ent->content != NULL) {
1877 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1878 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001879 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1880 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001881 }
1882 } else if ((ent != NULL) && (ent->content != NULL)) {
1883 xmlChar *rep;
1884
1885 ctxt->depth++;
1886 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1887 0, 0, 0);
1888 ctxt->depth--;
1889 if (rep != NULL) {
1890 current = rep;
1891 while (*current != 0) { /* non input consuming loop */
1892 buffer[nbchars++] = *current++;
1893 if (nbchars >
1894 buffer_size - XML_PARSER_BUFFER_SIZE) {
1895 growBuffer(buffer);
1896 }
1897 }
1898 xmlFree(rep);
1899 }
1900 } else if (ent != NULL) {
1901 int i = xmlStrlen(ent->name);
1902 const xmlChar *cur = ent->name;
1903
1904 buffer[nbchars++] = '&';
1905 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
1906 growBuffer(buffer);
1907 }
1908 for (;i > 0;i--)
1909 buffer[nbchars++] = *cur++;
1910 buffer[nbchars++] = ';';
1911 }
1912 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1913 if (xmlParserDebugEntities)
1914 xmlGenericError(xmlGenericErrorContext,
1915 "String decoding PE Reference: %.30s\n", str);
1916 ent = xmlParseStringPEReference(ctxt, &str);
1917 if (ent != NULL) {
1918 xmlChar *rep;
1919
1920 ctxt->depth++;
1921 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1922 0, 0, 0);
1923 ctxt->depth--;
1924 if (rep != NULL) {
1925 current = rep;
1926 while (*current != 0) { /* non input consuming loop */
1927 buffer[nbchars++] = *current++;
1928 if (nbchars >
1929 buffer_size - XML_PARSER_BUFFER_SIZE) {
1930 growBuffer(buffer);
1931 }
1932 }
1933 xmlFree(rep);
1934 }
1935 }
1936 } else {
1937 COPY_BUF(l,buffer,nbchars,c);
1938 str += l;
1939 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1940 growBuffer(buffer);
1941 }
1942 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001943 if (str < last)
1944 c = CUR_SCHAR(str, l);
1945 else
1946 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001947 }
1948 buffer[nbchars++] = 0;
1949 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001950
1951mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001952 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001953 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001954}
1955
Daniel Veillarde57ec792003-09-10 10:50:59 +00001956/**
1957 * xmlStringDecodeEntities:
1958 * @ctxt: the parser context
1959 * @str: the input string
1960 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1961 * @end: an end marker xmlChar, 0 if none
1962 * @end2: an end marker xmlChar, 0 if none
1963 * @end3: an end marker xmlChar, 0 if none
1964 *
1965 * Takes a entity string content and process to do the adequate substitutions.
1966 *
1967 * [67] Reference ::= EntityRef | CharRef
1968 *
1969 * [69] PEReference ::= '%' Name ';'
1970 *
1971 * Returns A newly allocated string with the substitution done. The caller
1972 * must deallocate it !
1973 */
1974xmlChar *
1975xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
1976 xmlChar end, xmlChar end2, xmlChar end3) {
1977 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
1978 end, end2, end3));
1979}
Owen Taylor3473f882001-02-23 17:55:21 +00001980
1981/************************************************************************
1982 * *
1983 * Commodity functions to handle xmlChars *
1984 * *
1985 ************************************************************************/
1986
1987/**
1988 * xmlStrndup:
1989 * @cur: the input xmlChar *
1990 * @len: the len of @cur
1991 *
1992 * a strndup for array of xmlChar's
1993 *
1994 * Returns a new xmlChar * or NULL
1995 */
1996xmlChar *
1997xmlStrndup(const xmlChar *cur, int len) {
1998 xmlChar *ret;
1999
2000 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002001 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002002 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002003 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002004 return(NULL);
2005 }
2006 memcpy(ret, cur, len * sizeof(xmlChar));
2007 ret[len] = 0;
2008 return(ret);
2009}
2010
2011/**
2012 * xmlStrdup:
2013 * @cur: the input xmlChar *
2014 *
2015 * a strdup for array of xmlChar's. Since they are supposed to be
2016 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2017 * a termination mark of '0'.
2018 *
2019 * Returns a new xmlChar * or NULL
2020 */
2021xmlChar *
2022xmlStrdup(const xmlChar *cur) {
2023 const xmlChar *p = cur;
2024
2025 if (cur == NULL) return(NULL);
2026 while (*p != 0) p++; /* non input consuming */
2027 return(xmlStrndup(cur, p - cur));
2028}
2029
2030/**
2031 * xmlCharStrndup:
2032 * @cur: the input char *
2033 * @len: the len of @cur
2034 *
2035 * a strndup for char's to xmlChar's
2036 *
2037 * Returns a new xmlChar * or NULL
2038 */
2039
2040xmlChar *
2041xmlCharStrndup(const char *cur, int len) {
2042 int i;
2043 xmlChar *ret;
2044
2045 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002046 ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002047 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002048 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002049 return(NULL);
2050 }
2051 for (i = 0;i < len;i++)
2052 ret[i] = (xmlChar) cur[i];
2053 ret[len] = 0;
2054 return(ret);
2055}
2056
2057/**
2058 * xmlCharStrdup:
2059 * @cur: the input char *
Owen Taylor3473f882001-02-23 17:55:21 +00002060 *
2061 * a strdup for char's to xmlChar's
2062 *
2063 * Returns a new xmlChar * or NULL
2064 */
2065
2066xmlChar *
2067xmlCharStrdup(const char *cur) {
2068 const char *p = cur;
2069
2070 if (cur == NULL) return(NULL);
2071 while (*p != '\0') p++; /* non input consuming */
2072 return(xmlCharStrndup(cur, p - cur));
2073}
2074
2075/**
2076 * xmlStrcmp:
2077 * @str1: the first xmlChar *
2078 * @str2: the second xmlChar *
2079 *
2080 * a strcmp for xmlChar's
2081 *
2082 * Returns the integer result of the comparison
2083 */
2084
2085int
2086xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
2087 register int tmp;
2088
2089 if (str1 == str2) return(0);
2090 if (str1 == NULL) return(-1);
2091 if (str2 == NULL) return(1);
2092 do {
2093 tmp = *str1++ - *str2;
2094 if (tmp != 0) return(tmp);
2095 } while (*str2++ != 0);
2096 return 0;
2097}
2098
2099/**
2100 * xmlStrEqual:
2101 * @str1: the first xmlChar *
2102 * @str2: the second xmlChar *
2103 *
2104 * Check if both string are equal of have same content
2105 * Should be a bit more readable and faster than xmlStrEqual()
2106 *
2107 * Returns 1 if they are equal, 0 if they are different
2108 */
2109
2110int
2111xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
2112 if (str1 == str2) return(1);
2113 if (str1 == NULL) return(0);
2114 if (str2 == NULL) return(0);
2115 do {
2116 if (*str1++ != *str2) return(0);
2117 } while (*str2++);
2118 return(1);
2119}
2120
2121/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00002122 * xmlStrQEqual:
2123 * @pref: the prefix of the QName
2124 * @name: the localname of the QName
2125 * @str: the second xmlChar *
2126 *
2127 * Check if a QName is Equal to a given string
2128 *
2129 * Returns 1 if they are equal, 0 if they are different
2130 */
2131
2132int
2133xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
2134 if (pref == NULL) return(xmlStrEqual(name, str));
2135 if (name == NULL) return(0);
2136 if (str == NULL) return(0);
2137
2138 do {
2139 if (*pref++ != *str) return(0);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002140 } while ((*str++) && (*pref));
Daniel Veillarde57ec792003-09-10 10:50:59 +00002141 if (*str++ != ':') return(0);
2142 do {
2143 if (*name++ != *str) return(0);
2144 } while (*str++);
2145 return(1);
2146}
2147
2148/**
Owen Taylor3473f882001-02-23 17:55:21 +00002149 * xmlStrncmp:
2150 * @str1: the first xmlChar *
2151 * @str2: the second xmlChar *
2152 * @len: the max comparison length
2153 *
2154 * a strncmp for xmlChar's
2155 *
2156 * Returns the integer result of the comparison
2157 */
2158
2159int
2160xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
2161 register int tmp;
2162
2163 if (len <= 0) return(0);
2164 if (str1 == str2) return(0);
2165 if (str1 == NULL) return(-1);
2166 if (str2 == NULL) return(1);
2167 do {
2168 tmp = *str1++ - *str2;
2169 if (tmp != 0 || --len == 0) return(tmp);
2170 } while (*str2++ != 0);
2171 return 0;
2172}
2173
Daniel Veillardb44025c2001-10-11 22:55:55 +00002174static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00002175 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
2176 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
2177 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
2178 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
2179 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
2180 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
2181 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
2182 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
2183 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2184 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2185 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2186 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
2187 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
2188 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
2189 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
2190 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
2191 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
2192 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
2193 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
2194 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
2195 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
2196 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
2197 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
2198 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
2199 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
2200 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
2201 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
2202 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
2203 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
2204 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
2205 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
2206 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
2207};
2208
2209/**
2210 * xmlStrcasecmp:
2211 * @str1: the first xmlChar *
2212 * @str2: the second xmlChar *
2213 *
2214 * a strcasecmp for xmlChar's
2215 *
2216 * Returns the integer result of the comparison
2217 */
2218
2219int
2220xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
2221 register int tmp;
2222
2223 if (str1 == str2) return(0);
2224 if (str1 == NULL) return(-1);
2225 if (str2 == NULL) return(1);
2226 do {
2227 tmp = casemap[*str1++] - casemap[*str2];
2228 if (tmp != 0) return(tmp);
2229 } while (*str2++ != 0);
2230 return 0;
2231}
2232
2233/**
2234 * xmlStrncasecmp:
2235 * @str1: the first xmlChar *
2236 * @str2: the second xmlChar *
2237 * @len: the max comparison length
2238 *
2239 * a strncasecmp for xmlChar's
2240 *
2241 * Returns the integer result of the comparison
2242 */
2243
2244int
2245xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
2246 register int tmp;
2247
2248 if (len <= 0) return(0);
2249 if (str1 == str2) return(0);
2250 if (str1 == NULL) return(-1);
2251 if (str2 == NULL) return(1);
2252 do {
2253 tmp = casemap[*str1++] - casemap[*str2];
2254 if (tmp != 0 || --len == 0) return(tmp);
2255 } while (*str2++ != 0);
2256 return 0;
2257}
2258
2259/**
2260 * xmlStrchr:
2261 * @str: the xmlChar * array
2262 * @val: the xmlChar to search
2263 *
2264 * a strchr for xmlChar's
2265 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002266 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002267 */
2268
2269const xmlChar *
2270xmlStrchr(const xmlChar *str, xmlChar val) {
2271 if (str == NULL) return(NULL);
2272 while (*str != 0) { /* non input consuming */
2273 if (*str == val) return((xmlChar *) str);
2274 str++;
2275 }
2276 return(NULL);
2277}
2278
2279/**
2280 * xmlStrstr:
2281 * @str: the xmlChar * array (haystack)
2282 * @val: the xmlChar to search (needle)
2283 *
2284 * a strstr for xmlChar's
2285 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002286 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002287 */
2288
2289const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00002290xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002291 int n;
2292
2293 if (str == NULL) return(NULL);
2294 if (val == NULL) return(NULL);
2295 n = xmlStrlen(val);
2296
2297 if (n == 0) return(str);
2298 while (*str != 0) { /* non input consuming */
2299 if (*str == *val) {
2300 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
2301 }
2302 str++;
2303 }
2304 return(NULL);
2305}
2306
2307/**
2308 * xmlStrcasestr:
2309 * @str: the xmlChar * array (haystack)
2310 * @val: the xmlChar to search (needle)
2311 *
2312 * a case-ignoring strstr for xmlChar's
2313 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002314 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002315 */
2316
2317const xmlChar *
2318xmlStrcasestr(const xmlChar *str, xmlChar *val) {
2319 int n;
2320
2321 if (str == NULL) return(NULL);
2322 if (val == NULL) return(NULL);
2323 n = xmlStrlen(val);
2324
2325 if (n == 0) return(str);
2326 while (*str != 0) { /* non input consuming */
2327 if (casemap[*str] == casemap[*val])
2328 if (!xmlStrncasecmp(str, val, n)) return(str);
2329 str++;
2330 }
2331 return(NULL);
2332}
2333
2334/**
2335 * xmlStrsub:
2336 * @str: the xmlChar * array (haystack)
2337 * @start: the index of the first char (zero based)
2338 * @len: the length of the substring
2339 *
2340 * Extract a substring of a given string
2341 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002342 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00002343 */
2344
2345xmlChar *
2346xmlStrsub(const xmlChar *str, int start, int len) {
2347 int i;
2348
2349 if (str == NULL) return(NULL);
2350 if (start < 0) return(NULL);
2351 if (len < 0) return(NULL);
2352
2353 for (i = 0;i < start;i++) {
2354 if (*str == 0) return(NULL);
2355 str++;
2356 }
2357 if (*str == 0) return(NULL);
2358 return(xmlStrndup(str, len));
2359}
2360
2361/**
2362 * xmlStrlen:
2363 * @str: the xmlChar * array
2364 *
2365 * length of a xmlChar's string
2366 *
2367 * Returns the number of xmlChar contained in the ARRAY.
2368 */
2369
2370int
2371xmlStrlen(const xmlChar *str) {
2372 int len = 0;
2373
2374 if (str == NULL) return(0);
2375 while (*str != 0) { /* non input consuming */
2376 str++;
2377 len++;
2378 }
2379 return(len);
2380}
2381
2382/**
2383 * xmlStrncat:
2384 * @cur: the original xmlChar * array
2385 * @add: the xmlChar * array added
2386 * @len: the length of @add
2387 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002388 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00002389 * first bytes of @add.
2390 *
2391 * Returns a new xmlChar *, the original @cur is reallocated if needed
2392 * and should not be freed
2393 */
2394
2395xmlChar *
2396xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
2397 int size;
2398 xmlChar *ret;
2399
2400 if ((add == NULL) || (len == 0))
2401 return(cur);
2402 if (cur == NULL)
2403 return(xmlStrndup(add, len));
2404
2405 size = xmlStrlen(cur);
2406 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
2407 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002408 xmlErrMemory(NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002409 return(cur);
2410 }
2411 memcpy(&ret[size], add, len * sizeof(xmlChar));
2412 ret[size + len] = 0;
2413 return(ret);
2414}
2415
2416/**
2417 * xmlStrcat:
2418 * @cur: the original xmlChar * array
2419 * @add: the xmlChar * array added
2420 *
2421 * a strcat for array of xmlChar's. Since they are supposed to be
2422 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
2423 * a termination mark of '0'.
2424 *
2425 * Returns a new xmlChar * containing the concatenated string.
2426 */
2427xmlChar *
2428xmlStrcat(xmlChar *cur, const xmlChar *add) {
2429 const xmlChar *p = add;
2430
2431 if (add == NULL) return(cur);
2432 if (cur == NULL)
2433 return(xmlStrdup(add));
2434
2435 while (*p != 0) p++; /* non input consuming */
2436 return(xmlStrncat(cur, add, p - add));
2437}
2438
Aleksey Sanine7acf432003-10-02 20:05:27 +00002439/**
2440 * xmlStrPrintf:
2441 * @buf: the result buffer.
2442 * @len: the result buffer length.
2443 * @msg: the message with printf formatting.
2444 * @...: extra parameters for the message.
2445 *
2446 * Formats @msg and places result into @buf.
2447 *
2448 * Returns the number of characters written to @buf or -1 if an error occurs.
2449 */
2450int
2451xmlStrPrintf(xmlChar *buf, int len, const xmlChar *msg, ...) {
2452 va_list args;
2453 int ret;
2454
2455 if((buf == NULL) || (msg == NULL)) {
2456 return(-1);
2457 }
2458
2459 va_start(args, msg);
Daniel Veillardbb5abab2003-10-03 22:21:51 +00002460 ret = vsnprintf((char *) buf, len, (const char *) msg, args);
Aleksey Sanine7acf432003-10-02 20:05:27 +00002461 va_end(args);
Daniel Veillardd96f6d32003-10-07 21:25:12 +00002462 buf[len - 1] = 0; /* be safe ! */
Aleksey Sanine7acf432003-10-02 20:05:27 +00002463
2464 return(ret);
2465}
2466
Owen Taylor3473f882001-02-23 17:55:21 +00002467/************************************************************************
2468 * *
2469 * Commodity functions, cleanup needed ? *
2470 * *
2471 ************************************************************************/
2472
2473/**
2474 * areBlanks:
2475 * @ctxt: an XML parser context
2476 * @str: a xmlChar *
2477 * @len: the size of @str
2478 *
2479 * Is this a sequence of blank chars that one can ignore ?
2480 *
2481 * Returns 1 if ignorable 0 otherwise.
2482 */
2483
2484static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2485 int i, ret;
2486 xmlNodePtr lastChild;
2487
Daniel Veillard05c13a22001-09-09 08:38:09 +00002488 /*
2489 * Don't spend time trying to differentiate them, the same callback is
2490 * used !
2491 */
2492 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002493 return(0);
2494
Owen Taylor3473f882001-02-23 17:55:21 +00002495 /*
2496 * Check for xml:space value.
2497 */
2498 if (*(ctxt->space) == 1)
2499 return(0);
2500
2501 /*
2502 * Check that the string is made of blanks
2503 */
2504 for (i = 0;i < len;i++)
2505 if (!(IS_BLANK(str[i]))) return(0);
2506
2507 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002508 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002509 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002510 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002511 if (ctxt->myDoc != NULL) {
2512 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2513 if (ret == 0) return(1);
2514 if (ret == 1) return(0);
2515 }
2516
2517 /*
2518 * Otherwise, heuristic :-\
2519 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002520 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002521 if ((ctxt->node->children == NULL) &&
2522 (RAW == '<') && (NXT(1) == '/')) return(0);
2523
2524 lastChild = xmlGetLastChild(ctxt->node);
2525 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002526 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2527 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002528 } else if (xmlNodeIsText(lastChild))
2529 return(0);
2530 else if ((ctxt->node->children != NULL) &&
2531 (xmlNodeIsText(ctxt->node->children)))
2532 return(0);
2533 return(1);
2534}
2535
Owen Taylor3473f882001-02-23 17:55:21 +00002536/************************************************************************
2537 * *
2538 * Extra stuff for namespace support *
2539 * Relates to http://www.w3.org/TR/WD-xml-names *
2540 * *
2541 ************************************************************************/
2542
2543/**
2544 * xmlSplitQName:
2545 * @ctxt: an XML parser context
2546 * @name: an XML parser context
2547 * @prefix: a xmlChar **
2548 *
2549 * parse an UTF8 encoded XML qualified name string
2550 *
2551 * [NS 5] QName ::= (Prefix ':')? LocalPart
2552 *
2553 * [NS 6] Prefix ::= NCName
2554 *
2555 * [NS 7] LocalPart ::= NCName
2556 *
2557 * Returns the local part, and prefix is updated
2558 * to get the Prefix if any.
2559 */
2560
2561xmlChar *
2562xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2563 xmlChar buf[XML_MAX_NAMELEN + 5];
2564 xmlChar *buffer = NULL;
2565 int len = 0;
2566 int max = XML_MAX_NAMELEN;
2567 xmlChar *ret = NULL;
2568 const xmlChar *cur = name;
2569 int c;
2570
2571 *prefix = NULL;
2572
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002573 if (cur == NULL) return(NULL);
2574
Owen Taylor3473f882001-02-23 17:55:21 +00002575#ifndef XML_XML_NAMESPACE
2576 /* xml: prefix is not really a namespace */
2577 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2578 (cur[2] == 'l') && (cur[3] == ':'))
2579 return(xmlStrdup(name));
2580#endif
2581
Daniel Veillard597bc482003-07-24 16:08:28 +00002582 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002583 if (cur[0] == ':')
2584 return(xmlStrdup(name));
2585
2586 c = *cur++;
2587 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2588 buf[len++] = c;
2589 c = *cur++;
2590 }
2591 if (len >= max) {
2592 /*
2593 * Okay someone managed to make a huge name, so he's ready to pay
2594 * for the processing speed.
2595 */
2596 max = len * 2;
2597
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002598 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002599 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002600 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002601 return(NULL);
2602 }
2603 memcpy(buffer, buf, len);
2604 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2605 if (len + 10 > max) {
2606 max *= 2;
2607 buffer = (xmlChar *) xmlRealloc(buffer,
2608 max * sizeof(xmlChar));
2609 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002610 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002611 return(NULL);
2612 }
2613 }
2614 buffer[len++] = c;
2615 c = *cur++;
2616 }
2617 buffer[len] = 0;
2618 }
2619
Daniel Veillard597bc482003-07-24 16:08:28 +00002620 /* nasty but well=formed
2621 if ((c == ':') && (*cur == 0)) {
2622 return(xmlStrdup(name));
2623 } */
2624
Owen Taylor3473f882001-02-23 17:55:21 +00002625 if (buffer == NULL)
2626 ret = xmlStrndup(buf, len);
2627 else {
2628 ret = buffer;
2629 buffer = NULL;
2630 max = XML_MAX_NAMELEN;
2631 }
2632
2633
2634 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002635 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002636 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002637 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002638 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002639 }
Owen Taylor3473f882001-02-23 17:55:21 +00002640 len = 0;
2641
Daniel Veillardbb284f42002-10-16 18:02:47 +00002642 /*
2643 * Check that the first character is proper to start
2644 * a new name
2645 */
2646 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2647 ((c >= 0x41) && (c <= 0x5A)) ||
2648 (c == '_') || (c == ':'))) {
2649 int l;
2650 int first = CUR_SCHAR(cur, l);
2651
2652 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002653 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002654 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002655 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002656 }
2657 }
2658 cur++;
2659
Owen Taylor3473f882001-02-23 17:55:21 +00002660 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2661 buf[len++] = c;
2662 c = *cur++;
2663 }
2664 if (len >= max) {
2665 /*
2666 * Okay someone managed to make a huge name, so he's ready to pay
2667 * for the processing speed.
2668 */
2669 max = len * 2;
2670
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002671 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002672 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002673 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002674 return(NULL);
2675 }
2676 memcpy(buffer, buf, len);
2677 while (c != 0) { /* tested bigname2.xml */
2678 if (len + 10 > max) {
2679 max *= 2;
2680 buffer = (xmlChar *) xmlRealloc(buffer,
2681 max * sizeof(xmlChar));
2682 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002683 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002684 return(NULL);
2685 }
2686 }
2687 buffer[len++] = c;
2688 c = *cur++;
2689 }
2690 buffer[len] = 0;
2691 }
2692
2693 if (buffer == NULL)
2694 ret = xmlStrndup(buf, len);
2695 else {
2696 ret = buffer;
2697 }
2698 }
2699
2700 return(ret);
2701}
2702
2703/************************************************************************
2704 * *
2705 * The parser itself *
2706 * Relates to http://www.w3.org/TR/REC-xml *
2707 * *
2708 ************************************************************************/
2709
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002710static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002711static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002712 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002713
Owen Taylor3473f882001-02-23 17:55:21 +00002714/**
2715 * xmlParseName:
2716 * @ctxt: an XML parser context
2717 *
2718 * parse an XML name.
2719 *
2720 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2721 * CombiningChar | Extender
2722 *
2723 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2724 *
2725 * [6] Names ::= Name (S Name)*
2726 *
2727 * Returns the Name parsed or NULL
2728 */
2729
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002730const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002731xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002732 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002733 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002734 int count = 0;
2735
2736 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002737
2738 /*
2739 * Accelerator for simple ASCII names
2740 */
2741 in = ctxt->input->cur;
2742 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2743 ((*in >= 0x41) && (*in <= 0x5A)) ||
2744 (*in == '_') || (*in == ':')) {
2745 in++;
2746 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2747 ((*in >= 0x41) && (*in <= 0x5A)) ||
2748 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002749 (*in == '_') || (*in == '-') ||
2750 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002751 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002752 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002753 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002754 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002755 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002756 ctxt->nbChars += count;
2757 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002758 if (ret == NULL)
2759 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002760 return(ret);
2761 }
2762 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002763 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002764}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002765
Daniel Veillard46de64e2002-05-29 08:21:33 +00002766/**
2767 * xmlParseNameAndCompare:
2768 * @ctxt: an XML parser context
2769 *
2770 * parse an XML name and compares for match
2771 * (specialized for endtag parsing)
2772 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002773 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2774 * and the name for mismatch
2775 */
2776
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002777static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002778xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
2779 const xmlChar *cmp = other;
2780 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002781 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002782
2783 GROW;
2784
2785 in = ctxt->input->cur;
2786 while (*in != 0 && *in == *cmp) {
2787 ++in;
2788 ++cmp;
2789 }
2790 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
2791 /* success */
2792 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002793 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002794 }
2795 /* failure (or end of input buffer), check with full function */
2796 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002797 /* strings coming from the dictionnary direct compare possible */
2798 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002799 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002800 }
2801 return ret;
2802}
2803
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002804static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002805xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002806 int len = 0, l;
2807 int c;
2808 int count = 0;
2809
2810 /*
2811 * Handler for more complex cases
2812 */
2813 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002814 c = CUR_CHAR(l);
2815 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2816 (!IS_LETTER(c) && (c != '_') &&
2817 (c != ':'))) {
2818 return(NULL);
2819 }
2820
2821 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
Daniel Veillard73b013f2003-09-30 12:36:01 +00002822 ((xmlIsLetter(c)) || (xmlIsDigit(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002823 (c == '.') || (c == '-') ||
2824 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002825 (xmlIsCombining(c)) ||
2826 (xmlIsExtender(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002827 if (count++ > 100) {
2828 count = 0;
2829 GROW;
2830 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002831 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002832 NEXTL(l);
2833 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002834 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002835 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002836}
2837
2838/**
2839 * xmlParseStringName:
2840 * @ctxt: an XML parser context
2841 * @str: a pointer to the string pointer (IN/OUT)
2842 *
2843 * parse an XML name.
2844 *
2845 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2846 * CombiningChar | Extender
2847 *
2848 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2849 *
2850 * [6] Names ::= Name (S Name)*
2851 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002852 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002853 * is updated to the current location in the string.
2854 */
2855
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002856static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002857xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2858 xmlChar buf[XML_MAX_NAMELEN + 5];
2859 const xmlChar *cur = *str;
2860 int len = 0, l;
2861 int c;
2862
2863 c = CUR_SCHAR(cur, l);
Daniel Veillard73b013f2003-09-30 12:36:01 +00002864 if (!xmlIsLetter(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002865 (c != ':')) {
2866 return(NULL);
2867 }
2868
Daniel Veillard73b013f2003-09-30 12:36:01 +00002869 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002870 (c == '.') || (c == '-') ||
2871 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002872 (xmlIsCombining(c)) ||
2873 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002874 COPY_BUF(l,buf,len,c);
2875 cur += l;
2876 c = CUR_SCHAR(cur, l);
2877 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2878 /*
2879 * Okay someone managed to make a huge name, so he's ready to pay
2880 * for the processing speed.
2881 */
2882 xmlChar *buffer;
2883 int max = len * 2;
2884
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002885 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002887 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(NULL);
2889 }
2890 memcpy(buffer, buf, len);
Daniel Veillard73b013f2003-09-30 12:36:01 +00002891 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) ||
2892 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002893 (c == '.') || (c == '-') ||
2894 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002895 (xmlIsCombining(c)) ||
2896 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002897 if (len + 10 > max) {
2898 max *= 2;
2899 buffer = (xmlChar *) xmlRealloc(buffer,
2900 max * sizeof(xmlChar));
2901 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002902 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002903 return(NULL);
2904 }
2905 }
2906 COPY_BUF(l,buffer,len,c);
2907 cur += l;
2908 c = CUR_SCHAR(cur, l);
2909 }
2910 buffer[len] = 0;
2911 *str = cur;
2912 return(buffer);
2913 }
2914 }
2915 *str = cur;
2916 return(xmlStrndup(buf, len));
2917}
2918
2919/**
2920 * xmlParseNmtoken:
2921 * @ctxt: an XML parser context
2922 *
2923 * parse an XML Nmtoken.
2924 *
2925 * [7] Nmtoken ::= (NameChar)+
2926 *
2927 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
2928 *
2929 * Returns the Nmtoken parsed or NULL
2930 */
2931
2932xmlChar *
2933xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2934 xmlChar buf[XML_MAX_NAMELEN + 5];
2935 int len = 0, l;
2936 int c;
2937 int count = 0;
2938
2939 GROW;
2940 c = CUR_CHAR(l);
2941
Daniel Veillard73b013f2003-09-30 12:36:01 +00002942 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002943 (c == '.') || (c == '-') ||
2944 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002945 (xmlIsCombining(c)) ||
2946 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002947 if (count++ > 100) {
2948 count = 0;
2949 GROW;
2950 }
2951 COPY_BUF(l,buf,len,c);
2952 NEXTL(l);
2953 c = CUR_CHAR(l);
2954 if (len >= XML_MAX_NAMELEN) {
2955 /*
2956 * Okay someone managed to make a huge token, so he's ready to pay
2957 * for the processing speed.
2958 */
2959 xmlChar *buffer;
2960 int max = len * 2;
2961
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002962 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002963 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002964 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002965 return(NULL);
2966 }
2967 memcpy(buffer, buf, len);
Daniel Veillard73b013f2003-09-30 12:36:01 +00002968 while ((xmlIsLetter(c)) || (xmlIsDigit(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002969 (c == '.') || (c == '-') ||
2970 (c == '_') || (c == ':') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002971 (xmlIsCombining(c)) ||
2972 (xmlIsExtender(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002973 if (count++ > 100) {
2974 count = 0;
2975 GROW;
2976 }
2977 if (len + 10 > max) {
2978 max *= 2;
2979 buffer = (xmlChar *) xmlRealloc(buffer,
2980 max * sizeof(xmlChar));
2981 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002982 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002983 return(NULL);
2984 }
2985 }
2986 COPY_BUF(l,buffer,len,c);
2987 NEXTL(l);
2988 c = CUR_CHAR(l);
2989 }
2990 buffer[len] = 0;
2991 return(buffer);
2992 }
2993 }
2994 if (len == 0)
2995 return(NULL);
2996 return(xmlStrndup(buf, len));
2997}
2998
2999/**
3000 * xmlParseEntityValue:
3001 * @ctxt: an XML parser context
3002 * @orig: if non-NULL store a copy of the original entity value
3003 *
3004 * parse a value for ENTITY declarations
3005 *
3006 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3007 * "'" ([^%&'] | PEReference | Reference)* "'"
3008 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003009 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003010 */
3011
3012xmlChar *
3013xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3014 xmlChar *buf = NULL;
3015 int len = 0;
3016 int size = XML_PARSER_BUFFER_SIZE;
3017 int c, l;
3018 xmlChar stop;
3019 xmlChar *ret = NULL;
3020 const xmlChar *cur = NULL;
3021 xmlParserInputPtr input;
3022
3023 if (RAW == '"') stop = '"';
3024 else if (RAW == '\'') stop = '\'';
3025 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003026 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 return(NULL);
3028 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003029 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003030 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003031 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003032 return(NULL);
3033 }
3034
3035 /*
3036 * The content of the entity definition is copied in a buffer.
3037 */
3038
3039 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3040 input = ctxt->input;
3041 GROW;
3042 NEXT;
3043 c = CUR_CHAR(l);
3044 /*
3045 * NOTE: 4.4.5 Included in Literal
3046 * When a parameter entity reference appears in a literal entity
3047 * value, ... a single or double quote character in the replacement
3048 * text is always treated as a normal data character and will not
3049 * terminate the literal.
3050 * In practice it means we stop the loop only when back at parsing
3051 * the initial entity and the quote is found
3052 */
Daniel Veillard73b013f2003-09-30 12:36:01 +00003053 while ((xmlIsChar(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003054 (ctxt->input != input))) {
3055 if (len + 5 >= size) {
3056 size *= 2;
3057 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3058 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003059 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003060 return(NULL);
3061 }
3062 }
3063 COPY_BUF(l,buf,len,c);
3064 NEXTL(l);
3065 /*
3066 * Pop-up of finished entities.
3067 */
3068 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3069 xmlPopInput(ctxt);
3070
3071 GROW;
3072 c = CUR_CHAR(l);
3073 if (c == 0) {
3074 GROW;
3075 c = CUR_CHAR(l);
3076 }
3077 }
3078 buf[len] = 0;
3079
3080 /*
3081 * Raise problem w.r.t. '&' and '%' being used in non-entities
3082 * reference constructs. Note Charref will be handled in
3083 * xmlStringDecodeEntities()
3084 */
3085 cur = buf;
3086 while (*cur != 0) { /* non input consuming */
3087 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3088 xmlChar *name;
3089 xmlChar tmp = *cur;
3090
3091 cur++;
3092 name = xmlParseStringName(ctxt, &cur);
3093 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003094 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003095 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003096 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003097 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003098 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3099 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003100 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003101 }
3102 if (name != NULL)
3103 xmlFree(name);
3104 }
3105 cur++;
3106 }
3107
3108 /*
3109 * Then PEReference entities are substituted.
3110 */
3111 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003112 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003113 xmlFree(buf);
3114 } else {
3115 NEXT;
3116 /*
3117 * NOTE: 4.4.7 Bypassed
3118 * When a general entity reference appears in the EntityValue in
3119 * an entity declaration, it is bypassed and left as is.
3120 * so XML_SUBSTITUTE_REF is not set here.
3121 */
3122 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3123 0, 0, 0);
3124 if (orig != NULL)
3125 *orig = buf;
3126 else
3127 xmlFree(buf);
3128 }
3129
3130 return(ret);
3131}
3132
3133/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003134 * xmlParseAttValueComplex:
3135 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003136 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003137 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003138 *
3139 * parse a value for an attribute, this is the fallback function
3140 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003141 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003142 *
3143 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3144 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003145static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003146xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003147 xmlChar limit = 0;
3148 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003149 int len = 0;
3150 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003151 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003152 xmlChar *current = NULL;
3153 xmlEntityPtr ent;
3154
Owen Taylor3473f882001-02-23 17:55:21 +00003155 if (NXT(0) == '"') {
3156 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3157 limit = '"';
3158 NEXT;
3159 } else if (NXT(0) == '\'') {
3160 limit = '\'';
3161 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3162 NEXT;
3163 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003164 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003165 return(NULL);
3166 }
3167
3168 /*
3169 * allocate a translation buffer.
3170 */
3171 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003172 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003173 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003174
3175 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003176 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003177 */
3178 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003179 while ((NXT(0) != limit) && /* checked */
3180 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003181 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003182 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003183 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003184 if (NXT(1) == '#') {
3185 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003186
Owen Taylor3473f882001-02-23 17:55:21 +00003187 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003188 if (ctxt->replaceEntities) {
3189 if (len > buf_size - 10) {
3190 growBuffer(buf);
3191 }
3192 buf[len++] = '&';
3193 } else {
3194 /*
3195 * The reparsing will be done in xmlStringGetNodeList()
3196 * called by the attribute() function in SAX.c
3197 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003198 if (len > buf_size - 10) {
3199 growBuffer(buf);
3200 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003201 buf[len++] = '&';
3202 buf[len++] = '#';
3203 buf[len++] = '3';
3204 buf[len++] = '8';
3205 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003206 }
3207 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003208 if (len > buf_size - 10) {
3209 growBuffer(buf);
3210 }
Owen Taylor3473f882001-02-23 17:55:21 +00003211 len += xmlCopyChar(0, &buf[len], val);
3212 }
3213 } else {
3214 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003215 if ((ent != NULL) &&
3216 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3217 if (len > buf_size - 10) {
3218 growBuffer(buf);
3219 }
3220 if ((ctxt->replaceEntities == 0) &&
3221 (ent->content[0] == '&')) {
3222 buf[len++] = '&';
3223 buf[len++] = '#';
3224 buf[len++] = '3';
3225 buf[len++] = '8';
3226 buf[len++] = ';';
3227 } else {
3228 buf[len++] = ent->content[0];
3229 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003230 } else if ((ent != NULL) &&
3231 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003232 xmlChar *rep;
3233
3234 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3235 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003236 XML_SUBSTITUTE_REF,
3237 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003238 if (rep != NULL) {
3239 current = rep;
3240 while (*current != 0) { /* non input consuming */
3241 buf[len++] = *current++;
3242 if (len > buf_size - 10) {
3243 growBuffer(buf);
3244 }
3245 }
3246 xmlFree(rep);
3247 }
3248 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003249 if (len > buf_size - 10) {
3250 growBuffer(buf);
3251 }
Owen Taylor3473f882001-02-23 17:55:21 +00003252 if (ent->content != NULL)
3253 buf[len++] = ent->content[0];
3254 }
3255 } else if (ent != NULL) {
3256 int i = xmlStrlen(ent->name);
3257 const xmlChar *cur = ent->name;
3258
3259 /*
3260 * This may look absurd but is needed to detect
3261 * entities problems
3262 */
3263 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3264 (ent->content != NULL)) {
3265 xmlChar *rep;
3266 rep = xmlStringDecodeEntities(ctxt, ent->content,
3267 XML_SUBSTITUTE_REF, 0, 0, 0);
3268 if (rep != NULL)
3269 xmlFree(rep);
3270 }
3271
3272 /*
3273 * Just output the reference
3274 */
3275 buf[len++] = '&';
3276 if (len > buf_size - i - 10) {
3277 growBuffer(buf);
3278 }
3279 for (;i > 0;i--)
3280 buf[len++] = *cur++;
3281 buf[len++] = ';';
3282 }
3283 }
3284 } else {
3285 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003286 if ((len != 0) || (!normalize)) {
3287 if ((!normalize) || (!in_space)) {
3288 COPY_BUF(l,buf,len,0x20);
3289 if (len > buf_size - 10) {
3290 growBuffer(buf);
3291 }
3292 }
3293 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003294 }
3295 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003296 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003297 COPY_BUF(l,buf,len,c);
3298 if (len > buf_size - 10) {
3299 growBuffer(buf);
3300 }
3301 }
3302 NEXTL(l);
3303 }
3304 GROW;
3305 c = CUR_CHAR(l);
3306 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003307 if ((in_space) && (normalize)) {
3308 while (buf[len - 1] == 0x20) len--;
3309 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003310 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003311 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003312 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003313 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003314 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3315 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003316 } else
3317 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003318 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003319 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003320
3321mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003322 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003323 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003324}
3325
3326/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003327 * xmlParseAttValue:
3328 * @ctxt: an XML parser context
3329 *
3330 * parse a value for an attribute
3331 * Note: the parser won't do substitution of entities here, this
3332 * will be handled later in xmlStringGetNodeList
3333 *
3334 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3335 * "'" ([^<&'] | Reference)* "'"
3336 *
3337 * 3.3.3 Attribute-Value Normalization:
3338 * Before the value of an attribute is passed to the application or
3339 * checked for validity, the XML processor must normalize it as follows:
3340 * - a character reference is processed by appending the referenced
3341 * character to the attribute value
3342 * - an entity reference is processed by recursively processing the
3343 * replacement text of the entity
3344 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3345 * appending #x20 to the normalized value, except that only a single
3346 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3347 * parsed entity or the literal entity value of an internal parsed entity
3348 * - other characters are processed by appending them to the normalized value
3349 * If the declared value is not CDATA, then the XML processor must further
3350 * process the normalized attribute value by discarding any leading and
3351 * trailing space (#x20) characters, and by replacing sequences of space
3352 * (#x20) characters by a single space (#x20) character.
3353 * All attributes for which no declaration has been read should be treated
3354 * by a non-validating parser as if declared CDATA.
3355 *
3356 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3357 */
3358
3359
3360xmlChar *
3361xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003362 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003363}
3364
3365/**
Owen Taylor3473f882001-02-23 17:55:21 +00003366 * xmlParseSystemLiteral:
3367 * @ctxt: an XML parser context
3368 *
3369 * parse an XML Literal
3370 *
3371 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3372 *
3373 * Returns the SystemLiteral parsed or NULL
3374 */
3375
3376xmlChar *
3377xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3378 xmlChar *buf = NULL;
3379 int len = 0;
3380 int size = XML_PARSER_BUFFER_SIZE;
3381 int cur, l;
3382 xmlChar stop;
3383 int state = ctxt->instate;
3384 int count = 0;
3385
3386 SHRINK;
3387 if (RAW == '"') {
3388 NEXT;
3389 stop = '"';
3390 } else if (RAW == '\'') {
3391 NEXT;
3392 stop = '\'';
3393 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003394 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003395 return(NULL);
3396 }
3397
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003398 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003399 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003400 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003401 return(NULL);
3402 }
3403 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3404 cur = CUR_CHAR(l);
Daniel Veillard73b013f2003-09-30 12:36:01 +00003405 while ((xmlIsChar(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003406 if (len + 5 >= size) {
3407 size *= 2;
3408 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3409 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003410 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003411 ctxt->instate = (xmlParserInputState) state;
3412 return(NULL);
3413 }
3414 }
3415 count++;
3416 if (count > 50) {
3417 GROW;
3418 count = 0;
3419 }
3420 COPY_BUF(l,buf,len,cur);
3421 NEXTL(l);
3422 cur = CUR_CHAR(l);
3423 if (cur == 0) {
3424 GROW;
3425 SHRINK;
3426 cur = CUR_CHAR(l);
3427 }
3428 }
3429 buf[len] = 0;
3430 ctxt->instate = (xmlParserInputState) state;
Daniel Veillard73b013f2003-09-30 12:36:01 +00003431 if (!xmlIsChar(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003432 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003433 } else {
3434 NEXT;
3435 }
3436 return(buf);
3437}
3438
3439/**
3440 * xmlParsePubidLiteral:
3441 * @ctxt: an XML parser context
3442 *
3443 * parse an XML public literal
3444 *
3445 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3446 *
3447 * Returns the PubidLiteral parsed or NULL.
3448 */
3449
3450xmlChar *
3451xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3452 xmlChar *buf = NULL;
3453 int len = 0;
3454 int size = XML_PARSER_BUFFER_SIZE;
3455 xmlChar cur;
3456 xmlChar stop;
3457 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003458 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003459
3460 SHRINK;
3461 if (RAW == '"') {
3462 NEXT;
3463 stop = '"';
3464 } else if (RAW == '\'') {
3465 NEXT;
3466 stop = '\'';
3467 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003468 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003469 return(NULL);
3470 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003471 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003472 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003473 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003474 return(NULL);
3475 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003476 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003477 cur = CUR;
3478 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
3479 if (len + 1 >= size) {
3480 size *= 2;
3481 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3482 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003483 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003484 return(NULL);
3485 }
3486 }
3487 buf[len++] = cur;
3488 count++;
3489 if (count > 50) {
3490 GROW;
3491 count = 0;
3492 }
3493 NEXT;
3494 cur = CUR;
3495 if (cur == 0) {
3496 GROW;
3497 SHRINK;
3498 cur = CUR;
3499 }
3500 }
3501 buf[len] = 0;
3502 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003503 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003504 } else {
3505 NEXT;
3506 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003507 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003508 return(buf);
3509}
3510
Daniel Veillard48b2f892001-02-25 16:11:03 +00003511void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003512/**
3513 * xmlParseCharData:
3514 * @ctxt: an XML parser context
3515 * @cdata: int indicating whether we are within a CDATA section
3516 *
3517 * parse a CharData section.
3518 * if we are within a CDATA section ']]>' marks an end of section.
3519 *
3520 * The right angle bracket (>) may be represented using the string "&gt;",
3521 * and must, for compatibility, be escaped using "&gt;" or a character
3522 * reference when it appears in the string "]]>" in content, when that
3523 * string is not marking the end of a CDATA section.
3524 *
3525 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3526 */
3527
3528void
3529xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003530 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003531 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003532 int line = ctxt->input->line;
3533 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003534
3535 SHRINK;
3536 GROW;
3537 /*
3538 * Accelerated common case where input don't need to be
3539 * modified before passing it to the handler.
3540 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003541 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003542 in = ctxt->input->cur;
3543 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003544get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00003545 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
3546 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003547 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003548 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003549 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003550 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003551 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003552 ctxt->input->line++;
3553 in++;
3554 }
3555 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003556 }
3557 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003558 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003559 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003560 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003561 return;
3562 }
3563 in++;
3564 goto get_more;
3565 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003566 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003567 if (nbchar > 0) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003568 if ((ctxt->sax->ignorableWhitespace !=
3569 ctxt->sax->characters) &&
3570 (IS_BLANK(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003571 const xmlChar *tmp = ctxt->input->cur;
3572 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003573
Daniel Veillarda7374592001-05-10 14:17:55 +00003574 if (areBlanks(ctxt, tmp, nbchar)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003575 ctxt->sax->ignorableWhitespace(ctxt->userData,
3576 tmp, nbchar);
3577 } else if (ctxt->sax->characters != NULL)
3578 ctxt->sax->characters(ctxt->userData,
3579 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003580 line = ctxt->input->line;
3581 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003582 } else {
3583 if (ctxt->sax->characters != NULL)
3584 ctxt->sax->characters(ctxt->userData,
3585 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003586 line = ctxt->input->line;
3587 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003588 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003589 }
3590 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003591 if (*in == 0xD) {
3592 in++;
3593 if (*in == 0xA) {
3594 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003595 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003596 ctxt->input->line++;
3597 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003598 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003599 in--;
3600 }
3601 if (*in == '<') {
3602 return;
3603 }
3604 if (*in == '&') {
3605 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003606 }
3607 SHRINK;
3608 GROW;
3609 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003610 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003611 nbchar = 0;
3612 }
Daniel Veillard50582112001-03-26 22:52:16 +00003613 ctxt->input->line = line;
3614 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003615 xmlParseCharDataComplex(ctxt, cdata);
3616}
3617
Daniel Veillard01c13b52002-12-10 15:19:08 +00003618/**
3619 * xmlParseCharDataComplex:
3620 * @ctxt: an XML parser context
3621 * @cdata: int indicating whether we are within a CDATA section
3622 *
3623 * parse a CharData section.this is the fallback function
3624 * of xmlParseCharData() when the parsing requires handling
3625 * of non-ASCII characters.
3626 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003627void
3628xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003629 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3630 int nbchar = 0;
3631 int cur, l;
3632 int count = 0;
3633
3634 SHRINK;
3635 GROW;
3636 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003637 while ((cur != '<') && /* checked */
3638 (cur != '&') &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00003639 (xmlIsChar(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003640 if ((cur == ']') && (NXT(1) == ']') &&
3641 (NXT(2) == '>')) {
3642 if (cdata) break;
3643 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003644 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003645 }
3646 }
3647 COPY_BUF(l,buf,nbchar,cur);
3648 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003649 buf[nbchar] = 0;
3650
Owen Taylor3473f882001-02-23 17:55:21 +00003651 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003652 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003653 */
3654 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3655 if (areBlanks(ctxt, buf, nbchar)) {
3656 if (ctxt->sax->ignorableWhitespace != NULL)
3657 ctxt->sax->ignorableWhitespace(ctxt->userData,
3658 buf, nbchar);
3659 } else {
3660 if (ctxt->sax->characters != NULL)
3661 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3662 }
3663 }
3664 nbchar = 0;
3665 }
3666 count++;
3667 if (count > 50) {
3668 GROW;
3669 count = 0;
3670 }
3671 NEXTL(l);
3672 cur = CUR_CHAR(l);
3673 }
3674 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003675 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003676 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003677 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003678 */
3679 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
3680 if (areBlanks(ctxt, buf, nbchar)) {
3681 if (ctxt->sax->ignorableWhitespace != NULL)
3682 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3683 } else {
3684 if (ctxt->sax->characters != NULL)
3685 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3686 }
3687 }
3688 }
3689}
3690
3691/**
3692 * xmlParseExternalID:
3693 * @ctxt: an XML parser context
3694 * @publicID: a xmlChar** receiving PubidLiteral
3695 * @strict: indicate whether we should restrict parsing to only
3696 * production [75], see NOTE below
3697 *
3698 * Parse an External ID or a Public ID
3699 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003700 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003701 * 'PUBLIC' S PubidLiteral S SystemLiteral
3702 *
3703 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3704 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3705 *
3706 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3707 *
3708 * Returns the function returns SystemLiteral and in the second
3709 * case publicID receives PubidLiteral, is strict is off
3710 * it is possible to return NULL and have publicID set.
3711 */
3712
3713xmlChar *
3714xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3715 xmlChar *URI = NULL;
3716
3717 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003718
3719 *publicID = NULL;
Daniel Veillard8f597c32003-10-06 08:19:27 +00003720 if (memcmp(CUR_PTR, "SYSTEM", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003721 SKIP(6);
3722 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003723 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3724 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003725 }
3726 SKIP_BLANKS;
3727 URI = xmlParseSystemLiteral(ctxt);
3728 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003729 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003730 }
Daniel Veillard8f597c32003-10-06 08:19:27 +00003731 } else if (memcmp(CUR_PTR, "PUBLIC", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00003732 SKIP(6);
3733 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003735 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003736 }
3737 SKIP_BLANKS;
3738 *publicID = xmlParsePubidLiteral(ctxt);
3739 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003740 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003741 }
3742 if (strict) {
3743 /*
3744 * We don't handle [83] so "S SystemLiteral" is required.
3745 */
3746 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003747 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003748 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003749 }
3750 } else {
3751 /*
3752 * We handle [83] so we return immediately, if
3753 * "S SystemLiteral" is not detected. From a purely parsing
3754 * point of view that's a nice mess.
3755 */
3756 const xmlChar *ptr;
3757 GROW;
3758
3759 ptr = CUR_PTR;
3760 if (!IS_BLANK(*ptr)) return(NULL);
3761
3762 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
3763 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3764 }
3765 SKIP_BLANKS;
3766 URI = xmlParseSystemLiteral(ctxt);
3767 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003768 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003769 }
3770 }
3771 return(URI);
3772}
3773
3774/**
3775 * xmlParseComment:
3776 * @ctxt: an XML parser context
3777 *
3778 * Skip an XML (SGML) comment <!-- .... -->
3779 * The spec says that "For compatibility, the string "--" (double-hyphen)
3780 * must not occur within comments. "
3781 *
3782 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3783 */
3784void
3785xmlParseComment(xmlParserCtxtPtr ctxt) {
3786 xmlChar *buf = NULL;
3787 int len;
3788 int size = XML_PARSER_BUFFER_SIZE;
3789 int q, ql;
3790 int r, rl;
3791 int cur, l;
3792 xmlParserInputState state;
3793 xmlParserInputPtr input = ctxt->input;
3794 int count = 0;
3795
3796 /*
3797 * Check that there is a comment right here.
3798 */
3799 if ((RAW != '<') || (NXT(1) != '!') ||
3800 (NXT(2) != '-') || (NXT(3) != '-')) return;
3801
3802 state = ctxt->instate;
3803 ctxt->instate = XML_PARSER_COMMENT;
3804 SHRINK;
3805 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003806 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003807 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003808 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003809 ctxt->instate = state;
3810 return;
3811 }
3812 q = CUR_CHAR(ql);
3813 NEXTL(ql);
3814 r = CUR_CHAR(rl);
3815 NEXTL(rl);
3816 cur = CUR_CHAR(l);
3817 len = 0;
Daniel Veillard73b013f2003-09-30 12:36:01 +00003818 while (xmlIsChar(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003819 ((cur != '>') ||
3820 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003821 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003822 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003823 }
3824 if (len + 5 >= size) {
3825 size *= 2;
3826 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3827 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003828 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003829 ctxt->instate = state;
3830 return;
3831 }
3832 }
3833 COPY_BUF(ql,buf,len,q);
3834 q = r;
3835 ql = rl;
3836 r = cur;
3837 rl = l;
3838
3839 count++;
3840 if (count > 50) {
3841 GROW;
3842 count = 0;
3843 }
3844 NEXTL(l);
3845 cur = CUR_CHAR(l);
3846 if (cur == 0) {
3847 SHRINK;
3848 GROW;
3849 cur = CUR_CHAR(l);
3850 }
3851 }
3852 buf[len] = 0;
Daniel Veillard73b013f2003-09-30 12:36:01 +00003853 if (!xmlIsChar(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003854 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003855 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003856 xmlFree(buf);
3857 } else {
3858 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003859 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3860 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003861 }
3862 NEXT;
3863 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3864 (!ctxt->disableSAX))
3865 ctxt->sax->comment(ctxt->userData, buf);
3866 xmlFree(buf);
3867 }
3868 ctxt->instate = state;
3869}
3870
3871/**
3872 * xmlParsePITarget:
3873 * @ctxt: an XML parser context
3874 *
3875 * parse the name of a PI
3876 *
3877 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3878 *
3879 * Returns the PITarget name or NULL
3880 */
3881
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003882const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003883xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003884 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003885
3886 name = xmlParseName(ctxt);
3887 if ((name != NULL) &&
3888 ((name[0] == 'x') || (name[0] == 'X')) &&
3889 ((name[1] == 'm') || (name[1] == 'M')) &&
3890 ((name[2] == 'l') || (name[2] == 'L'))) {
3891 int i;
3892 if ((name[0] == 'x') && (name[1] == 'm') &&
3893 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003894 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003895 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003896 return(name);
3897 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003898 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003899 return(name);
3900 }
3901 for (i = 0;;i++) {
3902 if (xmlW3CPIs[i] == NULL) break;
3903 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3904 return(name);
3905 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003906 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3907 "xmlParsePITarget: invalid name prefix 'xml'\n",
3908 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003909 }
3910 return(name);
3911}
3912
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003913#ifdef LIBXML_CATALOG_ENABLED
3914/**
3915 * xmlParseCatalogPI:
3916 * @ctxt: an XML parser context
3917 * @catalog: the PI value string
3918 *
3919 * parse an XML Catalog Processing Instruction.
3920 *
3921 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3922 *
3923 * Occurs only if allowed by the user and if happening in the Misc
3924 * part of the document before any doctype informations
3925 * This will add the given catalog to the parsing context in order
3926 * to be used if there is a resolution need further down in the document
3927 */
3928
3929static void
3930xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3931 xmlChar *URL = NULL;
3932 const xmlChar *tmp, *base;
3933 xmlChar marker;
3934
3935 tmp = catalog;
3936 while (IS_BLANK(*tmp)) tmp++;
3937 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3938 goto error;
3939 tmp += 7;
3940 while (IS_BLANK(*tmp)) tmp++;
3941 if (*tmp != '=') {
3942 return;
3943 }
3944 tmp++;
3945 while (IS_BLANK(*tmp)) tmp++;
3946 marker = *tmp;
3947 if ((marker != '\'') && (marker != '"'))
3948 goto error;
3949 tmp++;
3950 base = tmp;
3951 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3952 if (*tmp == 0)
3953 goto error;
3954 URL = xmlStrndup(base, tmp - base);
3955 tmp++;
3956 while (IS_BLANK(*tmp)) tmp++;
3957 if (*tmp != 0)
3958 goto error;
3959
3960 if (URL != NULL) {
3961 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3962 xmlFree(URL);
3963 }
3964 return;
3965
3966error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003967 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3968 "Catalog PI syntax error: %s\n",
3969 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003970 if (URL != NULL)
3971 xmlFree(URL);
3972}
3973#endif
3974
Owen Taylor3473f882001-02-23 17:55:21 +00003975/**
3976 * xmlParsePI:
3977 * @ctxt: an XML parser context
3978 *
3979 * parse an XML Processing Instruction.
3980 *
3981 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3982 *
3983 * The processing is transfered to SAX once parsed.
3984 */
3985
3986void
3987xmlParsePI(xmlParserCtxtPtr ctxt) {
3988 xmlChar *buf = NULL;
3989 int len = 0;
3990 int size = XML_PARSER_BUFFER_SIZE;
3991 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003992 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003993 xmlParserInputState state;
3994 int count = 0;
3995
3996 if ((RAW == '<') && (NXT(1) == '?')) {
3997 xmlParserInputPtr input = ctxt->input;
3998 state = ctxt->instate;
3999 ctxt->instate = XML_PARSER_PI;
4000 /*
4001 * this is a Processing Instruction.
4002 */
4003 SKIP(2);
4004 SHRINK;
4005
4006 /*
4007 * Parse the target name and check for special support like
4008 * namespace.
4009 */
4010 target = xmlParsePITarget(ctxt);
4011 if (target != NULL) {
4012 if ((RAW == '?') && (NXT(1) == '>')) {
4013 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004014 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4015 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004016 }
4017 SKIP(2);
4018
4019 /*
4020 * SAX: PI detected.
4021 */
4022 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4023 (ctxt->sax->processingInstruction != NULL))
4024 ctxt->sax->processingInstruction(ctxt->userData,
4025 target, NULL);
4026 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004027 return;
4028 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004029 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004030 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004031 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004032 ctxt->instate = state;
4033 return;
4034 }
4035 cur = CUR;
4036 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004037 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4038 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004039 }
4040 SKIP_BLANKS;
4041 cur = CUR_CHAR(l);
Daniel Veillard73b013f2003-09-30 12:36:01 +00004042 while (xmlIsChar(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004043 ((cur != '?') || (NXT(1) != '>'))) {
4044 if (len + 5 >= size) {
4045 size *= 2;
4046 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4047 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004048 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004049 ctxt->instate = state;
4050 return;
4051 }
4052 }
4053 count++;
4054 if (count > 50) {
4055 GROW;
4056 count = 0;
4057 }
4058 COPY_BUF(l,buf,len,cur);
4059 NEXTL(l);
4060 cur = CUR_CHAR(l);
4061 if (cur == 0) {
4062 SHRINK;
4063 GROW;
4064 cur = CUR_CHAR(l);
4065 }
4066 }
4067 buf[len] = 0;
4068 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004069 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4070 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004071 } else {
4072 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004073 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4074 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004075 }
4076 SKIP(2);
4077
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004078#ifdef LIBXML_CATALOG_ENABLED
4079 if (((state == XML_PARSER_MISC) ||
4080 (state == XML_PARSER_START)) &&
4081 (xmlStrEqual(target, XML_CATALOG_PI))) {
4082 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4083 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4084 (allow == XML_CATA_ALLOW_ALL))
4085 xmlParseCatalogPI(ctxt, buf);
4086 }
4087#endif
4088
4089
Owen Taylor3473f882001-02-23 17:55:21 +00004090 /*
4091 * SAX: PI detected.
4092 */
4093 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4094 (ctxt->sax->processingInstruction != NULL))
4095 ctxt->sax->processingInstruction(ctxt->userData,
4096 target, buf);
4097 }
4098 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004099 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004100 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004101 }
4102 ctxt->instate = state;
4103 }
4104}
4105
4106/**
4107 * xmlParseNotationDecl:
4108 * @ctxt: an XML parser context
4109 *
4110 * parse a notation declaration
4111 *
4112 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4113 *
4114 * Hence there is actually 3 choices:
4115 * 'PUBLIC' S PubidLiteral
4116 * 'PUBLIC' S PubidLiteral S SystemLiteral
4117 * and 'SYSTEM' S SystemLiteral
4118 *
4119 * See the NOTE on xmlParseExternalID().
4120 */
4121
4122void
4123xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004124 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004125 xmlChar *Pubid;
4126 xmlChar *Systemid;
4127
Daniel Veillard8f597c32003-10-06 08:19:27 +00004128 if (memcmp(CUR_PTR, "<!NOTATION", 10) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004129 xmlParserInputPtr input = ctxt->input;
4130 SHRINK;
4131 SKIP(10);
4132 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004133 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4134 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004135 return;
4136 }
4137 SKIP_BLANKS;
4138
Daniel Veillard76d66f42001-05-16 21:05:17 +00004139 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004140 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004141 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004142 return;
4143 }
4144 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004145 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004146 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004147 return;
4148 }
4149 SKIP_BLANKS;
4150
4151 /*
4152 * Parse the IDs.
4153 */
4154 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4155 SKIP_BLANKS;
4156
4157 if (RAW == '>') {
4158 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4160 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004161 }
4162 NEXT;
4163 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4164 (ctxt->sax->notationDecl != NULL))
4165 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4166 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004167 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004168 }
Owen Taylor3473f882001-02-23 17:55:21 +00004169 if (Systemid != NULL) xmlFree(Systemid);
4170 if (Pubid != NULL) xmlFree(Pubid);
4171 }
4172}
4173
4174/**
4175 * xmlParseEntityDecl:
4176 * @ctxt: an XML parser context
4177 *
4178 * parse <!ENTITY declarations
4179 *
4180 * [70] EntityDecl ::= GEDecl | PEDecl
4181 *
4182 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4183 *
4184 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4185 *
4186 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4187 *
4188 * [74] PEDef ::= EntityValue | ExternalID
4189 *
4190 * [76] NDataDecl ::= S 'NDATA' S Name
4191 *
4192 * [ VC: Notation Declared ]
4193 * The Name must match the declared name of a notation.
4194 */
4195
4196void
4197xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004198 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004199 xmlChar *value = NULL;
4200 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004201 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004202 int isParameter = 0;
4203 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004204 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004205
4206 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004207 if (memcmp(CUR_PTR, "<!ENTITY", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004208 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004209 SHRINK;
4210 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004211 skipped = SKIP_BLANKS;
4212 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004213 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4214 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004215 }
Owen Taylor3473f882001-02-23 17:55:21 +00004216
4217 if (RAW == '%') {
4218 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004219 skipped = SKIP_BLANKS;
4220 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004221 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4222 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004223 }
Owen Taylor3473f882001-02-23 17:55:21 +00004224 isParameter = 1;
4225 }
4226
Daniel Veillard76d66f42001-05-16 21:05:17 +00004227 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004228 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004229 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4230 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004231 return;
4232 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004233 skipped = SKIP_BLANKS;
4234 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004235 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4236 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004237 }
Owen Taylor3473f882001-02-23 17:55:21 +00004238
Daniel Veillardf5582f12002-06-11 10:08:16 +00004239 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004240 /*
4241 * handle the various case of definitions...
4242 */
4243 if (isParameter) {
4244 if ((RAW == '"') || (RAW == '\'')) {
4245 value = xmlParseEntityValue(ctxt, &orig);
4246 if (value) {
4247 if ((ctxt->sax != NULL) &&
4248 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4249 ctxt->sax->entityDecl(ctxt->userData, name,
4250 XML_INTERNAL_PARAMETER_ENTITY,
4251 NULL, NULL, value);
4252 }
4253 } else {
4254 URI = xmlParseExternalID(ctxt, &literal, 1);
4255 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004256 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004257 }
4258 if (URI) {
4259 xmlURIPtr uri;
4260
4261 uri = xmlParseURI((const char *) URI);
4262 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004263 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4264 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004265 /*
4266 * This really ought to be a well formedness error
4267 * but the XML Core WG decided otherwise c.f. issue
4268 * E26 of the XML erratas.
4269 */
Owen Taylor3473f882001-02-23 17:55:21 +00004270 } else {
4271 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004272 /*
4273 * Okay this is foolish to block those but not
4274 * invalid URIs.
4275 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004276 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004277 } else {
4278 if ((ctxt->sax != NULL) &&
4279 (!ctxt->disableSAX) &&
4280 (ctxt->sax->entityDecl != NULL))
4281 ctxt->sax->entityDecl(ctxt->userData, name,
4282 XML_EXTERNAL_PARAMETER_ENTITY,
4283 literal, URI, NULL);
4284 }
4285 xmlFreeURI(uri);
4286 }
4287 }
4288 }
4289 } else {
4290 if ((RAW == '"') || (RAW == '\'')) {
4291 value = xmlParseEntityValue(ctxt, &orig);
4292 if ((ctxt->sax != NULL) &&
4293 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4294 ctxt->sax->entityDecl(ctxt->userData, name,
4295 XML_INTERNAL_GENERAL_ENTITY,
4296 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004297 /*
4298 * For expat compatibility in SAX mode.
4299 */
4300 if ((ctxt->myDoc == NULL) ||
4301 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4302 if (ctxt->myDoc == NULL) {
4303 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4304 }
4305 if (ctxt->myDoc->intSubset == NULL)
4306 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4307 BAD_CAST "fake", NULL, NULL);
4308
Daniel Veillard1af9a412003-08-20 22:54:39 +00004309 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4310 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004311 }
Owen Taylor3473f882001-02-23 17:55:21 +00004312 } else {
4313 URI = xmlParseExternalID(ctxt, &literal, 1);
4314 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004315 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004316 }
4317 if (URI) {
4318 xmlURIPtr uri;
4319
4320 uri = xmlParseURI((const char *)URI);
4321 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004322 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4323 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004324 /*
4325 * This really ought to be a well formedness error
4326 * but the XML Core WG decided otherwise c.f. issue
4327 * E26 of the XML erratas.
4328 */
Owen Taylor3473f882001-02-23 17:55:21 +00004329 } else {
4330 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004331 /*
4332 * Okay this is foolish to block those but not
4333 * invalid URIs.
4334 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004335 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004336 }
4337 xmlFreeURI(uri);
4338 }
4339 }
4340 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004341 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4342 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004343 }
4344 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004345 if (memcmp(CUR_PTR, "NDATA", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004346 SKIP(5);
4347 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004348 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4349 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004350 }
4351 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004352 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004353 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4354 (ctxt->sax->unparsedEntityDecl != NULL))
4355 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4356 literal, URI, ndata);
4357 } else {
4358 if ((ctxt->sax != NULL) &&
4359 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4360 ctxt->sax->entityDecl(ctxt->userData, name,
4361 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4362 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004363 /*
4364 * For expat compatibility in SAX mode.
4365 * assuming the entity repalcement was asked for
4366 */
4367 if ((ctxt->replaceEntities != 0) &&
4368 ((ctxt->myDoc == NULL) ||
4369 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4370 if (ctxt->myDoc == NULL) {
4371 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4372 }
4373
4374 if (ctxt->myDoc->intSubset == NULL)
4375 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4376 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004377 xmlSAX2EntityDecl(ctxt, name,
4378 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4379 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004380 }
Owen Taylor3473f882001-02-23 17:55:21 +00004381 }
4382 }
4383 }
4384 SKIP_BLANKS;
4385 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004386 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004387 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004388 } else {
4389 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004390 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4391 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004392 }
4393 NEXT;
4394 }
4395 if (orig != NULL) {
4396 /*
4397 * Ugly mechanism to save the raw entity value.
4398 */
4399 xmlEntityPtr cur = NULL;
4400
4401 if (isParameter) {
4402 if ((ctxt->sax != NULL) &&
4403 (ctxt->sax->getParameterEntity != NULL))
4404 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4405 } else {
4406 if ((ctxt->sax != NULL) &&
4407 (ctxt->sax->getEntity != NULL))
4408 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004409 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004410 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004411 }
Owen Taylor3473f882001-02-23 17:55:21 +00004412 }
4413 if (cur != NULL) {
4414 if (cur->orig != NULL)
4415 xmlFree(orig);
4416 else
4417 cur->orig = orig;
4418 } else
4419 xmlFree(orig);
4420 }
Owen Taylor3473f882001-02-23 17:55:21 +00004421 if (value != NULL) xmlFree(value);
4422 if (URI != NULL) xmlFree(URI);
4423 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004424 }
4425}
4426
4427/**
4428 * xmlParseDefaultDecl:
4429 * @ctxt: an XML parser context
4430 * @value: Receive a possible fixed default value for the attribute
4431 *
4432 * Parse an attribute default declaration
4433 *
4434 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4435 *
4436 * [ VC: Required Attribute ]
4437 * if the default declaration is the keyword #REQUIRED, then the
4438 * attribute must be specified for all elements of the type in the
4439 * attribute-list declaration.
4440 *
4441 * [ VC: Attribute Default Legal ]
4442 * The declared default value must meet the lexical constraints of
4443 * the declared attribute type c.f. xmlValidateAttributeDecl()
4444 *
4445 * [ VC: Fixed Attribute Default ]
4446 * if an attribute has a default value declared with the #FIXED
4447 * keyword, instances of that attribute must match the default value.
4448 *
4449 * [ WFC: No < in Attribute Values ]
4450 * handled in xmlParseAttValue()
4451 *
4452 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4453 * or XML_ATTRIBUTE_FIXED.
4454 */
4455
4456int
4457xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4458 int val;
4459 xmlChar *ret;
4460
4461 *value = NULL;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004462 if (memcmp(CUR_PTR, "#REQUIRED", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004463 SKIP(9);
4464 return(XML_ATTRIBUTE_REQUIRED);
4465 }
Daniel Veillard8f597c32003-10-06 08:19:27 +00004466 if (memcmp(CUR_PTR, "#IMPLIED", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004467 SKIP(8);
4468 return(XML_ATTRIBUTE_IMPLIED);
4469 }
4470 val = XML_ATTRIBUTE_NONE;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004471 if (memcmp(CUR_PTR, "#FIXED", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004472 SKIP(6);
4473 val = XML_ATTRIBUTE_FIXED;
4474 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004475 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4476 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004477 }
4478 SKIP_BLANKS;
4479 }
4480 ret = xmlParseAttValue(ctxt);
4481 ctxt->instate = XML_PARSER_DTD;
4482 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004483 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004484 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004485 } else
4486 *value = ret;
4487 return(val);
4488}
4489
4490/**
4491 * xmlParseNotationType:
4492 * @ctxt: an XML parser context
4493 *
4494 * parse an Notation attribute type.
4495 *
4496 * Note: the leading 'NOTATION' S part has already being parsed...
4497 *
4498 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4499 *
4500 * [ VC: Notation Attributes ]
4501 * Values of this type must match one of the notation names included
4502 * in the declaration; all notation names in the declaration must be declared.
4503 *
4504 * Returns: the notation attribute tree built while parsing
4505 */
4506
4507xmlEnumerationPtr
4508xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004509 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004510 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4511
4512 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004513 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004514 return(NULL);
4515 }
4516 SHRINK;
4517 do {
4518 NEXT;
4519 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004520 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004521 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004522 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4523 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004524 return(ret);
4525 }
4526 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004527 if (cur == NULL) return(ret);
4528 if (last == NULL) ret = last = cur;
4529 else {
4530 last->next = cur;
4531 last = cur;
4532 }
4533 SKIP_BLANKS;
4534 } while (RAW == '|');
4535 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004536 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004537 if ((last != NULL) && (last != ret))
4538 xmlFreeEnumeration(last);
4539 return(ret);
4540 }
4541 NEXT;
4542 return(ret);
4543}
4544
4545/**
4546 * xmlParseEnumerationType:
4547 * @ctxt: an XML parser context
4548 *
4549 * parse an Enumeration attribute type.
4550 *
4551 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4552 *
4553 * [ VC: Enumeration ]
4554 * Values of this type must match one of the Nmtoken tokens in
4555 * the declaration
4556 *
4557 * Returns: the enumeration attribute tree built while parsing
4558 */
4559
4560xmlEnumerationPtr
4561xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4562 xmlChar *name;
4563 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4564
4565 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004566 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004567 return(NULL);
4568 }
4569 SHRINK;
4570 do {
4571 NEXT;
4572 SKIP_BLANKS;
4573 name = xmlParseNmtoken(ctxt);
4574 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004575 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004576 return(ret);
4577 }
4578 cur = xmlCreateEnumeration(name);
4579 xmlFree(name);
4580 if (cur == NULL) return(ret);
4581 if (last == NULL) ret = last = cur;
4582 else {
4583 last->next = cur;
4584 last = cur;
4585 }
4586 SKIP_BLANKS;
4587 } while (RAW == '|');
4588 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004589 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004590 return(ret);
4591 }
4592 NEXT;
4593 return(ret);
4594}
4595
4596/**
4597 * xmlParseEnumeratedType:
4598 * @ctxt: an XML parser context
4599 * @tree: the enumeration tree built while parsing
4600 *
4601 * parse an Enumerated attribute type.
4602 *
4603 * [57] EnumeratedType ::= NotationType | Enumeration
4604 *
4605 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4606 *
4607 *
4608 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4609 */
4610
4611int
4612xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00004613 if (memcmp(CUR_PTR, "NOTATION", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004614 SKIP(8);
4615 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004616 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4617 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004618 return(0);
4619 }
4620 SKIP_BLANKS;
4621 *tree = xmlParseNotationType(ctxt);
4622 if (*tree == NULL) return(0);
4623 return(XML_ATTRIBUTE_NOTATION);
4624 }
4625 *tree = xmlParseEnumerationType(ctxt);
4626 if (*tree == NULL) return(0);
4627 return(XML_ATTRIBUTE_ENUMERATION);
4628}
4629
4630/**
4631 * xmlParseAttributeType:
4632 * @ctxt: an XML parser context
4633 * @tree: the enumeration tree built while parsing
4634 *
4635 * parse the Attribute list def for an element
4636 *
4637 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4638 *
4639 * [55] StringType ::= 'CDATA'
4640 *
4641 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4642 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4643 *
4644 * Validity constraints for attribute values syntax are checked in
4645 * xmlValidateAttributeValue()
4646 *
4647 * [ VC: ID ]
4648 * Values of type ID must match the Name production. A name must not
4649 * appear more than once in an XML document as a value of this type;
4650 * i.e., ID values must uniquely identify the elements which bear them.
4651 *
4652 * [ VC: One ID per Element Type ]
4653 * No element type may have more than one ID attribute specified.
4654 *
4655 * [ VC: ID Attribute Default ]
4656 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4657 *
4658 * [ VC: IDREF ]
4659 * Values of type IDREF must match the Name production, and values
4660 * of type IDREFS must match Names; each IDREF Name must match the value
4661 * of an ID attribute on some element in the XML document; i.e. IDREF
4662 * values must match the value of some ID attribute.
4663 *
4664 * [ VC: Entity Name ]
4665 * Values of type ENTITY must match the Name production, values
4666 * of type ENTITIES must match Names; each Entity Name must match the
4667 * name of an unparsed entity declared in the DTD.
4668 *
4669 * [ VC: Name Token ]
4670 * Values of type NMTOKEN must match the Nmtoken production; values
4671 * of type NMTOKENS must match Nmtokens.
4672 *
4673 * Returns the attribute type
4674 */
4675int
4676xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4677 SHRINK;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004678 if (memcmp(CUR_PTR, "CDATA", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004679 SKIP(5);
4680 return(XML_ATTRIBUTE_CDATA);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004681 } else if (memcmp(CUR_PTR, "IDREFS", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004682 SKIP(6);
4683 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004684 } else if (memcmp(CUR_PTR, "IDREF", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004685 SKIP(5);
4686 return(XML_ATTRIBUTE_IDREF);
4687 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4688 SKIP(2);
4689 return(XML_ATTRIBUTE_ID);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004690 } else if (memcmp(CUR_PTR, "ENTITY", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004691 SKIP(6);
4692 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004693 } else if (memcmp(CUR_PTR, "ENTITIES", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004694 SKIP(8);
4695 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004696 } else if (memcmp(CUR_PTR, "NMTOKENS", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004697 SKIP(8);
4698 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillard8f597c32003-10-06 08:19:27 +00004699 } else if (memcmp(CUR_PTR, "NMTOKEN", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004700 SKIP(7);
4701 return(XML_ATTRIBUTE_NMTOKEN);
4702 }
4703 return(xmlParseEnumeratedType(ctxt, tree));
4704}
4705
4706/**
4707 * xmlParseAttributeListDecl:
4708 * @ctxt: an XML parser context
4709 *
4710 * : parse the Attribute list def for an element
4711 *
4712 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4713 *
4714 * [53] AttDef ::= S Name S AttType S DefaultDecl
4715 *
4716 */
4717void
4718xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004719 const xmlChar *elemName;
4720 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004721 xmlEnumerationPtr tree;
4722
Daniel Veillard8f597c32003-10-06 08:19:27 +00004723 if (memcmp(CUR_PTR, "<!ATTLIST", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004724 xmlParserInputPtr input = ctxt->input;
4725
4726 SKIP(9);
4727 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004728 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004729 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004730 }
4731 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004732 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004733 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004734 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4735 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004736 return;
4737 }
4738 SKIP_BLANKS;
4739 GROW;
4740 while (RAW != '>') {
4741 const xmlChar *check = CUR_PTR;
4742 int type;
4743 int def;
4744 xmlChar *defaultValue = NULL;
4745
4746 GROW;
4747 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004748 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004749 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004750 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4751 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004752 break;
4753 }
4754 GROW;
4755 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004756 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004757 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004758 if (defaultValue != NULL)
4759 xmlFree(defaultValue);
4760 break;
4761 }
4762 SKIP_BLANKS;
4763
4764 type = xmlParseAttributeType(ctxt, &tree);
4765 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004766 if (defaultValue != NULL)
4767 xmlFree(defaultValue);
4768 break;
4769 }
4770
4771 GROW;
4772 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004773 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4774 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004775 if (defaultValue != NULL)
4776 xmlFree(defaultValue);
4777 if (tree != NULL)
4778 xmlFreeEnumeration(tree);
4779 break;
4780 }
4781 SKIP_BLANKS;
4782
4783 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4784 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004785 if (defaultValue != NULL)
4786 xmlFree(defaultValue);
4787 if (tree != NULL)
4788 xmlFreeEnumeration(tree);
4789 break;
4790 }
4791
4792 GROW;
4793 if (RAW != '>') {
4794 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004795 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004796 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004797 if (defaultValue != NULL)
4798 xmlFree(defaultValue);
4799 if (tree != NULL)
4800 xmlFreeEnumeration(tree);
4801 break;
4802 }
4803 SKIP_BLANKS;
4804 }
4805 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004806 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4807 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004808 if (defaultValue != NULL)
4809 xmlFree(defaultValue);
4810 if (tree != NULL)
4811 xmlFreeEnumeration(tree);
4812 break;
4813 }
4814 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4815 (ctxt->sax->attributeDecl != NULL))
4816 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4817 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004818 else if (tree != NULL)
4819 xmlFreeEnumeration(tree);
4820
4821 if ((ctxt->sax2) && (defaultValue != NULL) &&
4822 (def != XML_ATTRIBUTE_IMPLIED) &&
4823 (def != XML_ATTRIBUTE_REQUIRED)) {
4824 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4825 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004826 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4827 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4828 }
Owen Taylor3473f882001-02-23 17:55:21 +00004829 if (defaultValue != NULL)
4830 xmlFree(defaultValue);
4831 GROW;
4832 }
4833 if (RAW == '>') {
4834 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004835 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4836 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004837 }
4838 NEXT;
4839 }
Owen Taylor3473f882001-02-23 17:55:21 +00004840 }
4841}
4842
4843/**
4844 * xmlParseElementMixedContentDecl:
4845 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004846 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004847 *
4848 * parse the declaration for a Mixed Element content
4849 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4850 *
4851 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4852 * '(' S? '#PCDATA' S? ')'
4853 *
4854 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4855 *
4856 * [ VC: No Duplicate Types ]
4857 * The same name must not appear more than once in a single
4858 * mixed-content declaration.
4859 *
4860 * returns: the list of the xmlElementContentPtr describing the element choices
4861 */
4862xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004863xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004864 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004865 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004866
4867 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00004868 if (memcmp(CUR_PTR, "#PCDATA", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004869 SKIP(7);
4870 SKIP_BLANKS;
4871 SHRINK;
4872 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004873 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004874 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4875"Element content declaration doesn't start and stop in the same entity\n",
4876 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004877 }
Owen Taylor3473f882001-02-23 17:55:21 +00004878 NEXT;
4879 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4880 if (RAW == '*') {
4881 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4882 NEXT;
4883 }
4884 return(ret);
4885 }
4886 if ((RAW == '(') || (RAW == '|')) {
4887 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4888 if (ret == NULL) return(NULL);
4889 }
4890 while (RAW == '|') {
4891 NEXT;
4892 if (elem == NULL) {
4893 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4894 if (ret == NULL) return(NULL);
4895 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004896 if (cur != NULL)
4897 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004898 cur = ret;
4899 } else {
4900 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4901 if (n == NULL) return(NULL);
4902 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004903 if (n->c1 != NULL)
4904 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004905 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004906 if (n != NULL)
4907 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004908 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004909 }
4910 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004911 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004912 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004913 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004914 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004915 xmlFreeElementContent(cur);
4916 return(NULL);
4917 }
4918 SKIP_BLANKS;
4919 GROW;
4920 }
4921 if ((RAW == ')') && (NXT(1) == '*')) {
4922 if (elem != NULL) {
4923 cur->c2 = xmlNewElementContent(elem,
4924 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004925 if (cur->c2 != NULL)
4926 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004927 }
4928 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004929 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004930 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4931"Element content declaration doesn't start and stop in the same entity\n",
4932 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004933 }
Owen Taylor3473f882001-02-23 17:55:21 +00004934 SKIP(2);
4935 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004936 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004937 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004938 return(NULL);
4939 }
4940
4941 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004942 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004943 }
4944 return(ret);
4945}
4946
4947/**
4948 * xmlParseElementChildrenContentDecl:
4949 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004950 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004951 *
4952 * parse the declaration for a Mixed Element content
4953 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4954 *
4955 *
4956 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4957 *
4958 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4959 *
4960 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4961 *
4962 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4963 *
4964 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4965 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004966 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004967 * opening or closing parentheses in a choice, seq, or Mixed
4968 * construct is contained in the replacement text for a parameter
4969 * entity, both must be contained in the same replacement text. For
4970 * interoperability, if a parameter-entity reference appears in a
4971 * choice, seq, or Mixed construct, its replacement text should not
4972 * be empty, and neither the first nor last non-blank character of
4973 * the replacement text should be a connector (| or ,).
4974 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004975 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004976 * hierarchy.
4977 */
4978xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004979xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004980 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004981 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004982 xmlChar type = 0;
4983
4984 SKIP_BLANKS;
4985 GROW;
4986 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004987 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004988
Owen Taylor3473f882001-02-23 17:55:21 +00004989 /* Recurse on first child */
4990 NEXT;
4991 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004992 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004993 SKIP_BLANKS;
4994 GROW;
4995 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004996 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004997 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004998 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004999 return(NULL);
5000 }
5001 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005002 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005003 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005004 return(NULL);
5005 }
Owen Taylor3473f882001-02-23 17:55:21 +00005006 GROW;
5007 if (RAW == '?') {
5008 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5009 NEXT;
5010 } else if (RAW == '*') {
5011 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5012 NEXT;
5013 } else if (RAW == '+') {
5014 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5015 NEXT;
5016 } else {
5017 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5018 }
Owen Taylor3473f882001-02-23 17:55:21 +00005019 GROW;
5020 }
5021 SKIP_BLANKS;
5022 SHRINK;
5023 while (RAW != ')') {
5024 /*
5025 * Each loop we parse one separator and one element.
5026 */
5027 if (RAW == ',') {
5028 if (type == 0) type = CUR;
5029
5030 /*
5031 * Detect "Name | Name , Name" error
5032 */
5033 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005034 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005035 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005036 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005037 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005038 xmlFreeElementContent(last);
5039 if (ret != NULL)
5040 xmlFreeElementContent(ret);
5041 return(NULL);
5042 }
5043 NEXT;
5044
5045 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
5046 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005047 if ((last != NULL) && (last != ret))
5048 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00005049 xmlFreeElementContent(ret);
5050 return(NULL);
5051 }
5052 if (last == NULL) {
5053 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005054 if (ret != NULL)
5055 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005056 ret = cur = op;
5057 } else {
5058 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005059 if (op != NULL)
5060 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005061 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005062 if (last != NULL)
5063 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005064 cur =op;
5065 last = NULL;
5066 }
5067 } else if (RAW == '|') {
5068 if (type == 0) type = CUR;
5069
5070 /*
5071 * Detect "Name , Name | Name" error
5072 */
5073 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005074 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005075 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005076 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005077 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005078 xmlFreeElementContent(last);
5079 if (ret != NULL)
5080 xmlFreeElementContent(ret);
5081 return(NULL);
5082 }
5083 NEXT;
5084
5085 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
5086 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005087 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00005088 xmlFreeElementContent(last);
5089 if (ret != NULL)
5090 xmlFreeElementContent(ret);
5091 return(NULL);
5092 }
5093 if (last == NULL) {
5094 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005095 if (ret != NULL)
5096 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005097 ret = cur = op;
5098 } else {
5099 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005100 if (op != NULL)
5101 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005102 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005103 if (last != NULL)
5104 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005105 cur =op;
5106 last = NULL;
5107 }
5108 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005109 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005110 if (ret != NULL)
5111 xmlFreeElementContent(ret);
5112 return(NULL);
5113 }
5114 GROW;
5115 SKIP_BLANKS;
5116 GROW;
5117 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005118 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005119 /* Recurse on second child */
5120 NEXT;
5121 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005122 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005123 SKIP_BLANKS;
5124 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005125 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005126 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005127 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005128 if (ret != NULL)
5129 xmlFreeElementContent(ret);
5130 return(NULL);
5131 }
5132 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 if (RAW == '?') {
5134 last->ocur = XML_ELEMENT_CONTENT_OPT;
5135 NEXT;
5136 } else if (RAW == '*') {
5137 last->ocur = XML_ELEMENT_CONTENT_MULT;
5138 NEXT;
5139 } else if (RAW == '+') {
5140 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5141 NEXT;
5142 } else {
5143 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5144 }
5145 }
5146 SKIP_BLANKS;
5147 GROW;
5148 }
5149 if ((cur != NULL) && (last != NULL)) {
5150 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005151 if (last != NULL)
5152 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005153 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005154 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005155 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5156"Element content declaration doesn't start and stop in the same entity\n",
5157 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005158 }
Owen Taylor3473f882001-02-23 17:55:21 +00005159 NEXT;
5160 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00005161 if (ret != NULL)
5162 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00005163 NEXT;
5164 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005165 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005166 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005167 cur = ret;
5168 /*
5169 * Some normalization:
5170 * (a | b* | c?)* == (a | b | c)*
5171 */
5172 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5173 if ((cur->c1 != NULL) &&
5174 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5175 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5176 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5177 if ((cur->c2 != NULL) &&
5178 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5179 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5180 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5181 cur = cur->c2;
5182 }
5183 }
Owen Taylor3473f882001-02-23 17:55:21 +00005184 NEXT;
5185 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005186 if (ret != NULL) {
5187 int found = 0;
5188
Daniel Veillarde470df72001-04-18 21:41:07 +00005189 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005190 /*
5191 * Some normalization:
5192 * (a | b*)+ == (a | b)*
5193 * (a | b?)+ == (a | b)*
5194 */
5195 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5196 if ((cur->c1 != NULL) &&
5197 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5198 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5199 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5200 found = 1;
5201 }
5202 if ((cur->c2 != NULL) &&
5203 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5204 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5205 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5206 found = 1;
5207 }
5208 cur = cur->c2;
5209 }
5210 if (found)
5211 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5212 }
Owen Taylor3473f882001-02-23 17:55:21 +00005213 NEXT;
5214 }
5215 return(ret);
5216}
5217
5218/**
5219 * xmlParseElementContentDecl:
5220 * @ctxt: an XML parser context
5221 * @name: the name of the element being defined.
5222 * @result: the Element Content pointer will be stored here if any
5223 *
5224 * parse the declaration for an Element content either Mixed or Children,
5225 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5226 *
5227 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5228 *
5229 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5230 */
5231
5232int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005233xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005234 xmlElementContentPtr *result) {
5235
5236 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005237 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005238 int res;
5239
5240 *result = NULL;
5241
5242 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005243 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005244 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005245 return(-1);
5246 }
5247 NEXT;
5248 GROW;
5249 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005250 if (memcmp(CUR_PTR, "#PCDATA", 7) == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005251 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005252 res = XML_ELEMENT_TYPE_MIXED;
5253 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005254 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005255 res = XML_ELEMENT_TYPE_ELEMENT;
5256 }
Owen Taylor3473f882001-02-23 17:55:21 +00005257 SKIP_BLANKS;
5258 *result = tree;
5259 return(res);
5260}
5261
5262/**
5263 * xmlParseElementDecl:
5264 * @ctxt: an XML parser context
5265 *
5266 * parse an Element declaration.
5267 *
5268 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5269 *
5270 * [ VC: Unique Element Type Declaration ]
5271 * No element type may be declared more than once
5272 *
5273 * Returns the type of the element, or -1 in case of error
5274 */
5275int
5276xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005277 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005278 int ret = -1;
5279 xmlElementContentPtr content = NULL;
5280
5281 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005282 if (memcmp(CUR_PTR, "<!ELEMENT", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005283 xmlParserInputPtr input = ctxt->input;
5284
5285 SKIP(9);
5286 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005287 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5288 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005289 }
5290 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005291 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005292 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005293 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5294 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005295 return(-1);
5296 }
5297 while ((RAW == 0) && (ctxt->inputNr > 1))
5298 xmlPopInput(ctxt);
5299 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005300 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5301 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005302 }
5303 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005304 if (memcmp(CUR_PTR, "EMPTY", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005305 SKIP(5);
5306 /*
5307 * Element must always be empty.
5308 */
5309 ret = XML_ELEMENT_TYPE_EMPTY;
5310 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5311 (NXT(2) == 'Y')) {
5312 SKIP(3);
5313 /*
5314 * Element is a generic container.
5315 */
5316 ret = XML_ELEMENT_TYPE_ANY;
5317 } else if (RAW == '(') {
5318 ret = xmlParseElementContentDecl(ctxt, name, &content);
5319 } else {
5320 /*
5321 * [ WFC: PEs in Internal Subset ] error handling.
5322 */
5323 if ((RAW == '%') && (ctxt->external == 0) &&
5324 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005325 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005326 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005327 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005328 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005329 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5330 }
Owen Taylor3473f882001-02-23 17:55:21 +00005331 return(-1);
5332 }
5333
5334 SKIP_BLANKS;
5335 /*
5336 * Pop-up of finished entities.
5337 */
5338 while ((RAW == 0) && (ctxt->inputNr > 1))
5339 xmlPopInput(ctxt);
5340 SKIP_BLANKS;
5341
5342 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005343 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005344 } else {
5345 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005346 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5347 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005348 }
5349
5350 NEXT;
5351 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5352 (ctxt->sax->elementDecl != NULL))
5353 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5354 content);
5355 }
5356 if (content != NULL) {
5357 xmlFreeElementContent(content);
5358 }
Owen Taylor3473f882001-02-23 17:55:21 +00005359 }
5360 return(ret);
5361}
5362
5363/**
Owen Taylor3473f882001-02-23 17:55:21 +00005364 * xmlParseConditionalSections
5365 * @ctxt: an XML parser context
5366 *
5367 * [61] conditionalSect ::= includeSect | ignoreSect
5368 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5369 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5370 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5371 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5372 */
5373
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005374static void
Owen Taylor3473f882001-02-23 17:55:21 +00005375xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5376 SKIP(3);
5377 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005378 if (memcmp(CUR_PTR, "INCLUDE", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005379 SKIP(7);
5380 SKIP_BLANKS;
5381 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005382 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005383 } else {
5384 NEXT;
5385 }
5386 if (xmlParserDebugEntities) {
5387 if ((ctxt->input != NULL) && (ctxt->input->filename))
5388 xmlGenericError(xmlGenericErrorContext,
5389 "%s(%d): ", ctxt->input->filename,
5390 ctxt->input->line);
5391 xmlGenericError(xmlGenericErrorContext,
5392 "Entering INCLUDE Conditional Section\n");
5393 }
5394
5395 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5396 (NXT(2) != '>'))) {
5397 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005398 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005399
5400 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5401 xmlParseConditionalSections(ctxt);
5402 } else if (IS_BLANK(CUR)) {
5403 NEXT;
5404 } else if (RAW == '%') {
5405 xmlParsePEReference(ctxt);
5406 } else
5407 xmlParseMarkupDecl(ctxt);
5408
5409 /*
5410 * Pop-up of finished entities.
5411 */
5412 while ((RAW == 0) && (ctxt->inputNr > 1))
5413 xmlPopInput(ctxt);
5414
Daniel Veillardfdc91562002-07-01 21:52:03 +00005415 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005416 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005417 break;
5418 }
5419 }
5420 if (xmlParserDebugEntities) {
5421 if ((ctxt->input != NULL) && (ctxt->input->filename))
5422 xmlGenericError(xmlGenericErrorContext,
5423 "%s(%d): ", ctxt->input->filename,
5424 ctxt->input->line);
5425 xmlGenericError(xmlGenericErrorContext,
5426 "Leaving INCLUDE Conditional Section\n");
5427 }
5428
Daniel Veillard8f597c32003-10-06 08:19:27 +00005429 } else if (memcmp(CUR_PTR, "IGNORE", 6) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005430 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005431 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005432 int depth = 0;
5433
5434 SKIP(6);
5435 SKIP_BLANKS;
5436 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005437 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005438 } else {
5439 NEXT;
5440 }
5441 if (xmlParserDebugEntities) {
5442 if ((ctxt->input != NULL) && (ctxt->input->filename))
5443 xmlGenericError(xmlGenericErrorContext,
5444 "%s(%d): ", ctxt->input->filename,
5445 ctxt->input->line);
5446 xmlGenericError(xmlGenericErrorContext,
5447 "Entering IGNORE Conditional Section\n");
5448 }
5449
5450 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005451 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005452 * But disable SAX event generating DTD building in the meantime
5453 */
5454 state = ctxt->disableSAX;
5455 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005456 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005457 ctxt->instate = XML_PARSER_IGNORE;
5458
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005459 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005460 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5461 depth++;
5462 SKIP(3);
5463 continue;
5464 }
5465 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5466 if (--depth >= 0) SKIP(3);
5467 continue;
5468 }
5469 NEXT;
5470 continue;
5471 }
5472
5473 ctxt->disableSAX = state;
5474 ctxt->instate = instate;
5475
5476 if (xmlParserDebugEntities) {
5477 if ((ctxt->input != NULL) && (ctxt->input->filename))
5478 xmlGenericError(xmlGenericErrorContext,
5479 "%s(%d): ", ctxt->input->filename,
5480 ctxt->input->line);
5481 xmlGenericError(xmlGenericErrorContext,
5482 "Leaving IGNORE Conditional Section\n");
5483 }
5484
5485 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005486 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005487 }
5488
5489 if (RAW == 0)
5490 SHRINK;
5491
5492 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005493 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005494 } else {
5495 SKIP(3);
5496 }
5497}
5498
5499/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005500 * xmlParseMarkupDecl:
5501 * @ctxt: an XML parser context
5502 *
5503 * parse Markup declarations
5504 *
5505 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5506 * NotationDecl | PI | Comment
5507 *
5508 * [ VC: Proper Declaration/PE Nesting ]
5509 * Parameter-entity replacement text must be properly nested with
5510 * markup declarations. That is to say, if either the first character
5511 * or the last character of a markup declaration (markupdecl above) is
5512 * contained in the replacement text for a parameter-entity reference,
5513 * both must be contained in the same replacement text.
5514 *
5515 * [ WFC: PEs in Internal Subset ]
5516 * In the internal DTD subset, parameter-entity references can occur
5517 * only where markup declarations can occur, not within markup declarations.
5518 * (This does not apply to references that occur in external parameter
5519 * entities or to the external subset.)
5520 */
5521void
5522xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5523 GROW;
5524 xmlParseElementDecl(ctxt);
5525 xmlParseAttributeListDecl(ctxt);
5526 xmlParseEntityDecl(ctxt);
5527 xmlParseNotationDecl(ctxt);
5528 xmlParsePI(ctxt);
5529 xmlParseComment(ctxt);
5530 /*
5531 * This is only for internal subset. On external entities,
5532 * the replacement is done before parsing stage
5533 */
5534 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5535 xmlParsePEReference(ctxt);
5536
5537 /*
5538 * Conditional sections are allowed from entities included
5539 * by PE References in the internal subset.
5540 */
5541 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5542 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5543 xmlParseConditionalSections(ctxt);
5544 }
5545 }
5546
5547 ctxt->instate = XML_PARSER_DTD;
5548}
5549
5550/**
5551 * xmlParseTextDecl:
5552 * @ctxt: an XML parser context
5553 *
5554 * parse an XML declaration header for external entities
5555 *
5556 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5557 *
5558 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5559 */
5560
5561void
5562xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5563 xmlChar *version;
5564
5565 /*
5566 * We know that '<?xml' is here.
5567 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00005568 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005569 SKIP(5);
5570 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005571 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005572 return;
5573 }
5574
5575 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005576 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5577 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005578 }
5579 SKIP_BLANKS;
5580
5581 /*
5582 * We may have the VersionInfo here.
5583 */
5584 version = xmlParseVersionInfo(ctxt);
5585 if (version == NULL)
5586 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005587 else {
5588 if (!IS_BLANK(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005589 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5590 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005591 }
5592 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005593 ctxt->input->version = version;
5594
5595 /*
5596 * We must have the encoding declaration
5597 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005598 xmlParseEncodingDecl(ctxt);
5599 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5600 /*
5601 * The XML REC instructs us to stop parsing right here
5602 */
5603 return;
5604 }
5605
5606 SKIP_BLANKS;
5607 if ((RAW == '?') && (NXT(1) == '>')) {
5608 SKIP(2);
5609 } else if (RAW == '>') {
5610 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005611 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005612 NEXT;
5613 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005614 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005615 MOVETO_ENDTAG(CUR_PTR);
5616 NEXT;
5617 }
5618}
5619
5620/**
Owen Taylor3473f882001-02-23 17:55:21 +00005621 * xmlParseExternalSubset:
5622 * @ctxt: an XML parser context
5623 * @ExternalID: the external identifier
5624 * @SystemID: the system identifier (or URL)
5625 *
5626 * parse Markup declarations from an external subset
5627 *
5628 * [30] extSubset ::= textDecl? extSubsetDecl
5629 *
5630 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5631 */
5632void
5633xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5634 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005635 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005636 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00005637 if (memcmp(CUR_PTR, "<?xml", 5) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005638 xmlParseTextDecl(ctxt);
5639 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5640 /*
5641 * The XML REC instructs us to stop parsing right here
5642 */
5643 ctxt->instate = XML_PARSER_EOF;
5644 return;
5645 }
5646 }
5647 if (ctxt->myDoc == NULL) {
5648 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5649 }
5650 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5651 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5652
5653 ctxt->instate = XML_PARSER_DTD;
5654 ctxt->external = 1;
5655 while (((RAW == '<') && (NXT(1) == '?')) ||
5656 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005657 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005658 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005659 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005660
5661 GROW;
5662 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5663 xmlParseConditionalSections(ctxt);
5664 } else if (IS_BLANK(CUR)) {
5665 NEXT;
5666 } else if (RAW == '%') {
5667 xmlParsePEReference(ctxt);
5668 } else
5669 xmlParseMarkupDecl(ctxt);
5670
5671 /*
5672 * Pop-up of finished entities.
5673 */
5674 while ((RAW == 0) && (ctxt->inputNr > 1))
5675 xmlPopInput(ctxt);
5676
Daniel Veillardfdc91562002-07-01 21:52:03 +00005677 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005678 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005679 break;
5680 }
5681 }
5682
5683 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005684 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005685 }
5686
5687}
5688
5689/**
5690 * xmlParseReference:
5691 * @ctxt: an XML parser context
5692 *
5693 * parse and handle entity references in content, depending on the SAX
5694 * interface, this may end-up in a call to character() if this is a
5695 * CharRef, a predefined entity, if there is no reference() callback.
5696 * or if the parser was asked to switch to that mode.
5697 *
5698 * [67] Reference ::= EntityRef | CharRef
5699 */
5700void
5701xmlParseReference(xmlParserCtxtPtr ctxt) {
5702 xmlEntityPtr ent;
5703 xmlChar *val;
5704 if (RAW != '&') return;
5705
5706 if (NXT(1) == '#') {
5707 int i = 0;
5708 xmlChar out[10];
5709 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005710 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005711
5712 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5713 /*
5714 * So we are using non-UTF-8 buffers
5715 * Check that the char fit on 8bits, if not
5716 * generate a CharRef.
5717 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005718 if (value <= 0xFF) {
5719 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005720 out[1] = 0;
5721 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5722 (!ctxt->disableSAX))
5723 ctxt->sax->characters(ctxt->userData, out, 1);
5724 } else {
5725 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005726 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005727 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005728 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005729 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5730 (!ctxt->disableSAX))
5731 ctxt->sax->reference(ctxt->userData, out);
5732 }
5733 } else {
5734 /*
5735 * Just encode the value in UTF-8
5736 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005737 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005738 out[i] = 0;
5739 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5740 (!ctxt->disableSAX))
5741 ctxt->sax->characters(ctxt->userData, out, i);
5742 }
5743 } else {
5744 ent = xmlParseEntityRef(ctxt);
5745 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005746 if (!ctxt->wellFormed)
5747 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005748 if ((ent->name != NULL) &&
5749 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5750 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005751 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005752
5753
5754 /*
5755 * The first reference to the entity trigger a parsing phase
5756 * where the ent->children is filled with the result from
5757 * the parsing.
5758 */
5759 if (ent->children == NULL) {
5760 xmlChar *value;
5761 value = ent->content;
5762
5763 /*
5764 * Check that this entity is well formed
5765 */
5766 if ((value != NULL) &&
5767 (value[1] == 0) && (value[0] == '<') &&
5768 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5769 /*
5770 * DONE: get definite answer on this !!!
5771 * Lots of entity decls are used to declare a single
5772 * char
5773 * <!ENTITY lt "<">
5774 * Which seems to be valid since
5775 * 2.4: The ampersand character (&) and the left angle
5776 * bracket (<) may appear in their literal form only
5777 * when used ... They are also legal within the literal
5778 * entity value of an internal entity declaration;i
5779 * see "4.3.2 Well-Formed Parsed Entities".
5780 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5781 * Looking at the OASIS test suite and James Clark
5782 * tests, this is broken. However the XML REC uses
5783 * it. Is the XML REC not well-formed ????
5784 * This is a hack to avoid this problem
5785 *
5786 * ANSWER: since lt gt amp .. are already defined,
5787 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005788 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005789 * is lousy but acceptable.
5790 */
5791 list = xmlNewDocText(ctxt->myDoc, value);
5792 if (list != NULL) {
5793 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5794 (ent->children == NULL)) {
5795 ent->children = list;
5796 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005797 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005798 list->parent = (xmlNodePtr) ent;
5799 } else {
5800 xmlFreeNodeList(list);
5801 }
5802 } else if (list != NULL) {
5803 xmlFreeNodeList(list);
5804 }
5805 } else {
5806 /*
5807 * 4.3.2: An internal general parsed entity is well-formed
5808 * if its replacement text matches the production labeled
5809 * content.
5810 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005811
5812 void *user_data;
5813 /*
5814 * This is a bit hackish but this seems the best
5815 * way to make sure both SAX and DOM entity support
5816 * behaves okay.
5817 */
5818 if (ctxt->userData == ctxt)
5819 user_data = NULL;
5820 else
5821 user_data = ctxt->userData;
5822
Owen Taylor3473f882001-02-23 17:55:21 +00005823 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5824 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005825 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5826 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005827 ctxt->depth--;
5828 } else if (ent->etype ==
5829 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5830 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005831 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005832 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005833 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005834 ctxt->depth--;
5835 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005836 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005837 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5838 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005839 }
5840 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005841 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005842 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005843 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005844 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5845 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005846 (ent->children == NULL)) {
5847 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005848 if (ctxt->replaceEntities) {
5849 /*
5850 * Prune it directly in the generated document
5851 * except for single text nodes.
5852 */
5853 if ((list->type == XML_TEXT_NODE) &&
5854 (list->next == NULL)) {
5855 list->parent = (xmlNodePtr) ent;
5856 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005857 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005858 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005859 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005860 while (list != NULL) {
5861 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005862 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005863 if (list->next == NULL)
5864 ent->last = list;
5865 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005866 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005867 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005868#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005869 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5870 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005871#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005872 }
5873 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005874 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005875 while (list != NULL) {
5876 list->parent = (xmlNodePtr) ent;
5877 if (list->next == NULL)
5878 ent->last = list;
5879 list = list->next;
5880 }
Owen Taylor3473f882001-02-23 17:55:21 +00005881 }
5882 } else {
5883 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005884 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005885 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005886 } else if ((ret != XML_ERR_OK) &&
5887 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005888 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005889 } else if (list != NULL) {
5890 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005891 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005892 }
5893 }
5894 }
5895 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5896 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5897 /*
5898 * Create a node.
5899 */
5900 ctxt->sax->reference(ctxt->userData, ent->name);
5901 return;
5902 } else if (ctxt->replaceEntities) {
5903 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5904 /*
5905 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005906 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005907 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005908 */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005909 if ((list == NULL) && (ent->owner == 0)) {
5910 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005911 cur = ent->children;
5912 while (cur != NULL) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005913 nw = xmlCopyNode(cur, 1);
5914 if (nw != NULL) {
5915 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005916 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005917 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005918 }
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005919 xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005920 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005921 if (cur == ent->last)
5922 break;
5923 cur = cur->next;
5924 }
Daniel Veillard81273902003-09-30 00:43:48 +00005925#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005926 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005927 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005928#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005929 } else if (list == NULL) {
5930 xmlNodePtr nw = NULL, cur, next, last,
5931 firstChild = NULL;
5932 /*
5933 * Copy the entity child list and make it the new
5934 * entity child list. The goal is to make sure any
5935 * ID or REF referenced will be the one from the
5936 * document content and not the entity copy.
5937 */
5938 cur = ent->children;
5939 ent->children = NULL;
5940 last = ent->last;
5941 ent->last = NULL;
5942 while (cur != NULL) {
5943 next = cur->next;
5944 cur->next = NULL;
5945 cur->parent = NULL;
5946 nw = xmlCopyNode(cur, 1);
5947 if (nw != NULL) {
5948 nw->_private = cur->_private;
5949 if (firstChild == NULL){
5950 firstChild = cur;
5951 }
5952 xmlAddChild((xmlNodePtr) ent, nw);
5953 xmlAddChild(ctxt->node, cur);
5954 }
5955 if (cur == last)
5956 break;
5957 cur = next;
5958 }
5959 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005960#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005961 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5962 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005963#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005964 } else {
5965 /*
5966 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005967 * node with a possible previous text one which
5968 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005969 */
5970 if (ent->children->type == XML_TEXT_NODE)
5971 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5972 if ((ent->last != ent->children) &&
5973 (ent->last->type == XML_TEXT_NODE))
5974 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5975 xmlAddChildList(ctxt->node, ent->children);
5976 }
5977
Owen Taylor3473f882001-02-23 17:55:21 +00005978 /*
5979 * This is to avoid a nasty side effect, see
5980 * characters() in SAX.c
5981 */
5982 ctxt->nodemem = 0;
5983 ctxt->nodelen = 0;
5984 return;
5985 } else {
5986 /*
5987 * Probably running in SAX mode
5988 */
5989 xmlParserInputPtr input;
5990
5991 input = xmlNewEntityInputStream(ctxt, ent);
5992 xmlPushInput(ctxt, input);
5993 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillard8f597c32003-10-06 08:19:27 +00005994 (memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005995 xmlParseTextDecl(ctxt);
5996 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5997 /*
5998 * The XML REC instructs us to stop parsing right here
5999 */
6000 ctxt->instate = XML_PARSER_EOF;
6001 return;
6002 }
6003 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006004 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6005 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006006 }
6007 }
6008 return;
6009 }
6010 }
6011 } else {
6012 val = ent->content;
6013 if (val == NULL) return;
6014 /*
6015 * inline the entity.
6016 */
6017 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6018 (!ctxt->disableSAX))
6019 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6020 }
6021 }
6022}
6023
6024/**
6025 * xmlParseEntityRef:
6026 * @ctxt: an XML parser context
6027 *
6028 * parse ENTITY references declarations
6029 *
6030 * [68] EntityRef ::= '&' Name ';'
6031 *
6032 * [ WFC: Entity Declared ]
6033 * In a document without any DTD, a document with only an internal DTD
6034 * subset which contains no parameter entity references, or a document
6035 * with "standalone='yes'", the Name given in the entity reference
6036 * must match that in an entity declaration, except that well-formed
6037 * documents need not declare any of the following entities: amp, lt,
6038 * gt, apos, quot. The declaration of a parameter entity must precede
6039 * any reference to it. Similarly, the declaration of a general entity
6040 * must precede any reference to it which appears in a default value in an
6041 * attribute-list declaration. Note that if entities are declared in the
6042 * external subset or in external parameter entities, a non-validating
6043 * processor is not obligated to read and process their declarations;
6044 * for such documents, the rule that an entity must be declared is a
6045 * well-formedness constraint only if standalone='yes'.
6046 *
6047 * [ WFC: Parsed Entity ]
6048 * An entity reference must not contain the name of an unparsed entity
6049 *
6050 * Returns the xmlEntityPtr if found, or NULL otherwise.
6051 */
6052xmlEntityPtr
6053xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006054 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006055 xmlEntityPtr ent = NULL;
6056
6057 GROW;
6058
6059 if (RAW == '&') {
6060 NEXT;
6061 name = xmlParseName(ctxt);
6062 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006063 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6064 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006065 } else {
6066 if (RAW == ';') {
6067 NEXT;
6068 /*
6069 * Ask first SAX for entity resolution, otherwise try the
6070 * predefined set.
6071 */
6072 if (ctxt->sax != NULL) {
6073 if (ctxt->sax->getEntity != NULL)
6074 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006075 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006076 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006077 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6078 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006079 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006080 }
Owen Taylor3473f882001-02-23 17:55:21 +00006081 }
6082 /*
6083 * [ WFC: Entity Declared ]
6084 * In a document without any DTD, a document with only an
6085 * internal DTD subset which contains no parameter entity
6086 * references, or a document with "standalone='yes'", the
6087 * Name given in the entity reference must match that in an
6088 * entity declaration, except that well-formed documents
6089 * need not declare any of the following entities: amp, lt,
6090 * gt, apos, quot.
6091 * The declaration of a parameter entity must precede any
6092 * reference to it.
6093 * Similarly, the declaration of a general entity must
6094 * precede any reference to it which appears in a default
6095 * value in an attribute-list declaration. Note that if
6096 * entities are declared in the external subset or in
6097 * external parameter entities, a non-validating processor
6098 * is not obligated to read and process their declarations;
6099 * for such documents, the rule that an entity must be
6100 * declared is a well-formedness constraint only if
6101 * standalone='yes'.
6102 */
6103 if (ent == NULL) {
6104 if ((ctxt->standalone == 1) ||
6105 ((ctxt->hasExternalSubset == 0) &&
6106 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006107 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006108 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006109 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006110 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006111 "Entity '%s' not defined\n", name);
6112 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006113 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006114 }
6115
6116 /*
6117 * [ WFC: Parsed Entity ]
6118 * An entity reference must not contain the name of an
6119 * unparsed entity
6120 */
6121 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006122 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006123 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006124 }
6125
6126 /*
6127 * [ WFC: No External Entity References ]
6128 * Attribute values cannot contain direct or indirect
6129 * entity references to external entities.
6130 */
6131 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6132 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006133 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6134 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006135 }
6136 /*
6137 * [ WFC: No < in Attribute Values ]
6138 * The replacement text of any entity referred to directly or
6139 * indirectly in an attribute value (other than "&lt;") must
6140 * not contain a <.
6141 */
6142 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6143 (ent != NULL) &&
6144 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6145 (ent->content != NULL) &&
6146 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006147 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006148 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006149 }
6150
6151 /*
6152 * Internal check, no parameter entities here ...
6153 */
6154 else {
6155 switch (ent->etype) {
6156 case XML_INTERNAL_PARAMETER_ENTITY:
6157 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006158 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6159 "Attempt to reference the parameter entity '%s'\n",
6160 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006161 break;
6162 default:
6163 break;
6164 }
6165 }
6166
6167 /*
6168 * [ WFC: No Recursion ]
6169 * A parsed entity must not contain a recursive reference
6170 * to itself, either directly or indirectly.
6171 * Done somewhere else
6172 */
6173
6174 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006175 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006176 }
Owen Taylor3473f882001-02-23 17:55:21 +00006177 }
6178 }
6179 return(ent);
6180}
6181
6182/**
6183 * xmlParseStringEntityRef:
6184 * @ctxt: an XML parser context
6185 * @str: a pointer to an index in the string
6186 *
6187 * parse ENTITY references declarations, but this version parses it from
6188 * a string value.
6189 *
6190 * [68] EntityRef ::= '&' Name ';'
6191 *
6192 * [ WFC: Entity Declared ]
6193 * In a document without any DTD, a document with only an internal DTD
6194 * subset which contains no parameter entity references, or a document
6195 * with "standalone='yes'", the Name given in the entity reference
6196 * must match that in an entity declaration, except that well-formed
6197 * documents need not declare any of the following entities: amp, lt,
6198 * gt, apos, quot. The declaration of a parameter entity must precede
6199 * any reference to it. Similarly, the declaration of a general entity
6200 * must precede any reference to it which appears in a default value in an
6201 * attribute-list declaration. Note that if entities are declared in the
6202 * external subset or in external parameter entities, a non-validating
6203 * processor is not obligated to read and process their declarations;
6204 * for such documents, the rule that an entity must be declared is a
6205 * well-formedness constraint only if standalone='yes'.
6206 *
6207 * [ WFC: Parsed Entity ]
6208 * An entity reference must not contain the name of an unparsed entity
6209 *
6210 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6211 * is updated to the current location in the string.
6212 */
6213xmlEntityPtr
6214xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6215 xmlChar *name;
6216 const xmlChar *ptr;
6217 xmlChar cur;
6218 xmlEntityPtr ent = NULL;
6219
6220 if ((str == NULL) || (*str == NULL))
6221 return(NULL);
6222 ptr = *str;
6223 cur = *ptr;
6224 if (cur == '&') {
6225 ptr++;
6226 cur = *ptr;
6227 name = xmlParseStringName(ctxt, &ptr);
6228 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006229 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6230 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006231 } else {
6232 if (*ptr == ';') {
6233 ptr++;
6234 /*
6235 * Ask first SAX for entity resolution, otherwise try the
6236 * predefined set.
6237 */
6238 if (ctxt->sax != NULL) {
6239 if (ctxt->sax->getEntity != NULL)
6240 ent = ctxt->sax->getEntity(ctxt->userData, name);
6241 if (ent == NULL)
6242 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006243 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006244 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006245 }
Owen Taylor3473f882001-02-23 17:55:21 +00006246 }
6247 /*
6248 * [ WFC: Entity Declared ]
6249 * In a document without any DTD, a document with only an
6250 * internal DTD subset which contains no parameter entity
6251 * references, or a document with "standalone='yes'", the
6252 * Name given in the entity reference must match that in an
6253 * entity declaration, except that well-formed documents
6254 * need not declare any of the following entities: amp, lt,
6255 * gt, apos, quot.
6256 * The declaration of a parameter entity must precede any
6257 * reference to it.
6258 * Similarly, the declaration of a general entity must
6259 * precede any reference to it which appears in a default
6260 * value in an attribute-list declaration. Note that if
6261 * entities are declared in the external subset or in
6262 * external parameter entities, a non-validating processor
6263 * is not obligated to read and process their declarations;
6264 * for such documents, the rule that an entity must be
6265 * declared is a well-formedness constraint only if
6266 * standalone='yes'.
6267 */
6268 if (ent == NULL) {
6269 if ((ctxt->standalone == 1) ||
6270 ((ctxt->hasExternalSubset == 0) &&
6271 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006272 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006273 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006274 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006275 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006276 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006277 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006278 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006279 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006280 }
6281
6282 /*
6283 * [ WFC: Parsed Entity ]
6284 * An entity reference must not contain the name of an
6285 * unparsed entity
6286 */
6287 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006288 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006289 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006290 }
6291
6292 /*
6293 * [ WFC: No External Entity References ]
6294 * Attribute values cannot contain direct or indirect
6295 * entity references to external entities.
6296 */
6297 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6298 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006299 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006300 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006301 }
6302 /*
6303 * [ WFC: No < in Attribute Values ]
6304 * The replacement text of any entity referred to directly or
6305 * indirectly in an attribute value (other than "&lt;") must
6306 * not contain a <.
6307 */
6308 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6309 (ent != NULL) &&
6310 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6311 (ent->content != NULL) &&
6312 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006313 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6314 "'<' in entity '%s' is not allowed in attributes values\n",
6315 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006316 }
6317
6318 /*
6319 * Internal check, no parameter entities here ...
6320 */
6321 else {
6322 switch (ent->etype) {
6323 case XML_INTERNAL_PARAMETER_ENTITY:
6324 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006325 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6326 "Attempt to reference the parameter entity '%s'\n",
6327 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006328 break;
6329 default:
6330 break;
6331 }
6332 }
6333
6334 /*
6335 * [ WFC: No Recursion ]
6336 * A parsed entity must not contain a recursive reference
6337 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006338 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006339 */
6340
6341 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006342 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006343 }
6344 xmlFree(name);
6345 }
6346 }
6347 *str = ptr;
6348 return(ent);
6349}
6350
6351/**
6352 * xmlParsePEReference:
6353 * @ctxt: an XML parser context
6354 *
6355 * parse PEReference declarations
6356 * The entity content is handled directly by pushing it's content as
6357 * a new input stream.
6358 *
6359 * [69] PEReference ::= '%' Name ';'
6360 *
6361 * [ WFC: No Recursion ]
6362 * A parsed entity must not contain a recursive
6363 * reference to itself, either directly or indirectly.
6364 *
6365 * [ WFC: Entity Declared ]
6366 * In a document without any DTD, a document with only an internal DTD
6367 * subset which contains no parameter entity references, or a document
6368 * with "standalone='yes'", ... ... The declaration of a parameter
6369 * entity must precede any reference to it...
6370 *
6371 * [ VC: Entity Declared ]
6372 * In a document with an external subset or external parameter entities
6373 * with "standalone='no'", ... ... The declaration of a parameter entity
6374 * must precede any reference to it...
6375 *
6376 * [ WFC: In DTD ]
6377 * Parameter-entity references may only appear in the DTD.
6378 * NOTE: misleading but this is handled.
6379 */
6380void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006381xmlParsePEReference(xmlParserCtxtPtr ctxt)
6382{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006383 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006384 xmlEntityPtr entity = NULL;
6385 xmlParserInputPtr input;
6386
6387 if (RAW == '%') {
6388 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006389 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006390 if (name == NULL) {
6391 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6392 "xmlParsePEReference: no name\n");
6393 } else {
6394 if (RAW == ';') {
6395 NEXT;
6396 if ((ctxt->sax != NULL) &&
6397 (ctxt->sax->getParameterEntity != NULL))
6398 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6399 name);
6400 if (entity == NULL) {
6401 /*
6402 * [ WFC: Entity Declared ]
6403 * In a document without any DTD, a document with only an
6404 * internal DTD subset which contains no parameter entity
6405 * references, or a document with "standalone='yes'", ...
6406 * ... The declaration of a parameter entity must precede
6407 * any reference to it...
6408 */
6409 if ((ctxt->standalone == 1) ||
6410 ((ctxt->hasExternalSubset == 0) &&
6411 (ctxt->hasPErefs == 0))) {
6412 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6413 "PEReference: %%%s; not found\n",
6414 name);
6415 } else {
6416 /*
6417 * [ VC: Entity Declared ]
6418 * In a document with an external subset or external
6419 * parameter entities with "standalone='no'", ...
6420 * ... The declaration of a parameter entity must
6421 * precede any reference to it...
6422 */
6423 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6424 "PEReference: %%%s; not found\n",
6425 name, NULL);
6426 ctxt->valid = 0;
6427 }
6428 } else {
6429 /*
6430 * Internal checking in case the entity quest barfed
6431 */
6432 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6433 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6434 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6435 "Internal: %%%s; is not a parameter entity\n",
6436 name, NULL);
6437 } else if (ctxt->input->free != deallocblankswrapper) {
6438 input =
6439 xmlNewBlanksWrapperInputStream(ctxt, entity);
6440 xmlPushInput(ctxt, input);
6441 } else {
6442 /*
6443 * TODO !!!
6444 * handle the extra spaces added before and after
6445 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6446 */
6447 input = xmlNewEntityInputStream(ctxt, entity);
6448 xmlPushInput(ctxt, input);
6449 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
6450 (memcmp(CUR_PTR, "<?xml", 5) == 0) &&
6451 (IS_BLANK(NXT(5)))) {
6452 xmlParseTextDecl(ctxt);
6453 if (ctxt->errNo ==
6454 XML_ERR_UNSUPPORTED_ENCODING) {
6455 /*
6456 * The XML REC instructs us to stop parsing
6457 * right here
6458 */
6459 ctxt->instate = XML_PARSER_EOF;
6460 return;
6461 }
6462 }
6463 }
6464 }
6465 ctxt->hasPErefs = 1;
6466 } else {
6467 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6468 }
6469 }
Owen Taylor3473f882001-02-23 17:55:21 +00006470 }
6471}
6472
6473/**
6474 * xmlParseStringPEReference:
6475 * @ctxt: an XML parser context
6476 * @str: a pointer to an index in the string
6477 *
6478 * parse PEReference declarations
6479 *
6480 * [69] PEReference ::= '%' Name ';'
6481 *
6482 * [ WFC: No Recursion ]
6483 * A parsed entity must not contain a recursive
6484 * reference to itself, either directly or indirectly.
6485 *
6486 * [ WFC: Entity Declared ]
6487 * In a document without any DTD, a document with only an internal DTD
6488 * subset which contains no parameter entity references, or a document
6489 * with "standalone='yes'", ... ... The declaration of a parameter
6490 * entity must precede any reference to it...
6491 *
6492 * [ VC: Entity Declared ]
6493 * In a document with an external subset or external parameter entities
6494 * with "standalone='no'", ... ... The declaration of a parameter entity
6495 * must precede any reference to it...
6496 *
6497 * [ WFC: In DTD ]
6498 * Parameter-entity references may only appear in the DTD.
6499 * NOTE: misleading but this is handled.
6500 *
6501 * Returns the string of the entity content.
6502 * str is updated to the current value of the index
6503 */
6504xmlEntityPtr
6505xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6506 const xmlChar *ptr;
6507 xmlChar cur;
6508 xmlChar *name;
6509 xmlEntityPtr entity = NULL;
6510
6511 if ((str == NULL) || (*str == NULL)) return(NULL);
6512 ptr = *str;
6513 cur = *ptr;
6514 if (cur == '%') {
6515 ptr++;
6516 cur = *ptr;
6517 name = xmlParseStringName(ctxt, &ptr);
6518 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006519 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6520 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006521 } else {
6522 cur = *ptr;
6523 if (cur == ';') {
6524 ptr++;
6525 cur = *ptr;
6526 if ((ctxt->sax != NULL) &&
6527 (ctxt->sax->getParameterEntity != NULL))
6528 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6529 name);
6530 if (entity == NULL) {
6531 /*
6532 * [ WFC: Entity Declared ]
6533 * In a document without any DTD, a document with only an
6534 * internal DTD subset which contains no parameter entity
6535 * references, or a document with "standalone='yes'", ...
6536 * ... The declaration of a parameter entity must precede
6537 * any reference to it...
6538 */
6539 if ((ctxt->standalone == 1) ||
6540 ((ctxt->hasExternalSubset == 0) &&
6541 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006542 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006543 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006544 } else {
6545 /*
6546 * [ VC: Entity Declared ]
6547 * In a document with an external subset or external
6548 * parameter entities with "standalone='no'", ...
6549 * ... The declaration of a parameter entity must
6550 * precede any reference to it...
6551 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006552 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6553 "PEReference: %%%s; not found\n",
6554 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006555 ctxt->valid = 0;
6556 }
6557 } else {
6558 /*
6559 * Internal checking in case the entity quest barfed
6560 */
6561 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6562 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006563 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6564 "%%%s; is not a parameter entity\n",
6565 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006566 }
6567 }
6568 ctxt->hasPErefs = 1;
6569 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006570 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006571 }
6572 xmlFree(name);
6573 }
6574 }
6575 *str = ptr;
6576 return(entity);
6577}
6578
6579/**
6580 * xmlParseDocTypeDecl:
6581 * @ctxt: an XML parser context
6582 *
6583 * parse a DOCTYPE declaration
6584 *
6585 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6586 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6587 *
6588 * [ VC: Root Element Type ]
6589 * The Name in the document type declaration must match the element
6590 * type of the root element.
6591 */
6592
6593void
6594xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006595 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006596 xmlChar *ExternalID = NULL;
6597 xmlChar *URI = NULL;
6598
6599 /*
6600 * We know that '<!DOCTYPE' has been detected.
6601 */
6602 SKIP(9);
6603
6604 SKIP_BLANKS;
6605
6606 /*
6607 * Parse the DOCTYPE name.
6608 */
6609 name = xmlParseName(ctxt);
6610 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006611 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6612 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006613 }
6614 ctxt->intSubName = name;
6615
6616 SKIP_BLANKS;
6617
6618 /*
6619 * Check for SystemID and ExternalID
6620 */
6621 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6622
6623 if ((URI != NULL) || (ExternalID != NULL)) {
6624 ctxt->hasExternalSubset = 1;
6625 }
6626 ctxt->extSubURI = URI;
6627 ctxt->extSubSystem = ExternalID;
6628
6629 SKIP_BLANKS;
6630
6631 /*
6632 * Create and update the internal subset.
6633 */
6634 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6635 (!ctxt->disableSAX))
6636 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6637
6638 /*
6639 * Is there any internal subset declarations ?
6640 * they are handled separately in xmlParseInternalSubset()
6641 */
6642 if (RAW == '[')
6643 return;
6644
6645 /*
6646 * We should be at the end of the DOCTYPE declaration.
6647 */
6648 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006649 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006650 }
6651 NEXT;
6652}
6653
6654/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006655 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006656 * @ctxt: an XML parser context
6657 *
6658 * parse the internal subset declaration
6659 *
6660 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6661 */
6662
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006663static void
Owen Taylor3473f882001-02-23 17:55:21 +00006664xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6665 /*
6666 * Is there any DTD definition ?
6667 */
6668 if (RAW == '[') {
6669 ctxt->instate = XML_PARSER_DTD;
6670 NEXT;
6671 /*
6672 * Parse the succession of Markup declarations and
6673 * PEReferences.
6674 * Subsequence (markupdecl | PEReference | S)*
6675 */
6676 while (RAW != ']') {
6677 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006678 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006679
6680 SKIP_BLANKS;
6681 xmlParseMarkupDecl(ctxt);
6682 xmlParsePEReference(ctxt);
6683
6684 /*
6685 * Pop-up of finished entities.
6686 */
6687 while ((RAW == 0) && (ctxt->inputNr > 1))
6688 xmlPopInput(ctxt);
6689
6690 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006691 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006692 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006693 break;
6694 }
6695 }
6696 if (RAW == ']') {
6697 NEXT;
6698 SKIP_BLANKS;
6699 }
6700 }
6701
6702 /*
6703 * We should be at the end of the DOCTYPE declaration.
6704 */
6705 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006706 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006707 }
6708 NEXT;
6709}
6710
Daniel Veillard81273902003-09-30 00:43:48 +00006711#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006712/**
6713 * xmlParseAttribute:
6714 * @ctxt: an XML parser context
6715 * @value: a xmlChar ** used to store the value of the attribute
6716 *
6717 * parse an attribute
6718 *
6719 * [41] Attribute ::= Name Eq AttValue
6720 *
6721 * [ WFC: No External Entity References ]
6722 * Attribute values cannot contain direct or indirect entity references
6723 * to external entities.
6724 *
6725 * [ WFC: No < in Attribute Values ]
6726 * The replacement text of any entity referred to directly or indirectly in
6727 * an attribute value (other than "&lt;") must not contain a <.
6728 *
6729 * [ VC: Attribute Value Type ]
6730 * The attribute must have been declared; the value must be of the type
6731 * declared for it.
6732 *
6733 * [25] Eq ::= S? '=' S?
6734 *
6735 * With namespace:
6736 *
6737 * [NS 11] Attribute ::= QName Eq AttValue
6738 *
6739 * Also the case QName == xmlns:??? is handled independently as a namespace
6740 * definition.
6741 *
6742 * Returns the attribute name, and the value in *value.
6743 */
6744
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006745const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006746xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006747 const xmlChar *name;
6748 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006749
6750 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006751 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006752 name = xmlParseName(ctxt);
6753 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006754 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006755 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006756 return(NULL);
6757 }
6758
6759 /*
6760 * read the value
6761 */
6762 SKIP_BLANKS;
6763 if (RAW == '=') {
6764 NEXT;
6765 SKIP_BLANKS;
6766 val = xmlParseAttValue(ctxt);
6767 ctxt->instate = XML_PARSER_CONTENT;
6768 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006769 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006770 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006771 return(NULL);
6772 }
6773
6774 /*
6775 * Check that xml:lang conforms to the specification
6776 * No more registered as an error, just generate a warning now
6777 * since this was deprecated in XML second edition
6778 */
6779 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6780 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006781 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6782 "Malformed value for xml:lang : %s\n",
6783 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006784 }
6785 }
6786
6787 /*
6788 * Check that xml:space conforms to the specification
6789 */
6790 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6791 if (xmlStrEqual(val, BAD_CAST "default"))
6792 *(ctxt->space) = 0;
6793 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6794 *(ctxt->space) = 1;
6795 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006796 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006797"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006798 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006799 }
6800 }
6801
6802 *value = val;
6803 return(name);
6804}
6805
6806/**
6807 * xmlParseStartTag:
6808 * @ctxt: an XML parser context
6809 *
6810 * parse a start of tag either for rule element or
6811 * EmptyElement. In both case we don't parse the tag closing chars.
6812 *
6813 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6814 *
6815 * [ WFC: Unique Att Spec ]
6816 * No attribute name may appear more than once in the same start-tag or
6817 * empty-element tag.
6818 *
6819 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6820 *
6821 * [ WFC: Unique Att Spec ]
6822 * No attribute name may appear more than once in the same start-tag or
6823 * empty-element tag.
6824 *
6825 * With namespace:
6826 *
6827 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6828 *
6829 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6830 *
6831 * Returns the element name parsed
6832 */
6833
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006834const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006835xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006836 const xmlChar *name;
6837 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006838 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006839 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006840 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006841 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006842 int i;
6843
6844 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006845 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006846
6847 name = xmlParseName(ctxt);
6848 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006849 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006850 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006851 return(NULL);
6852 }
6853
6854 /*
6855 * Now parse the attributes, it ends up with the ending
6856 *
6857 * (S Attribute)* S?
6858 */
6859 SKIP_BLANKS;
6860 GROW;
6861
Daniel Veillard21a0f912001-02-25 19:54:14 +00006862 while ((RAW != '>') &&
6863 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006864 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006865 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006866 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006867
6868 attname = xmlParseAttribute(ctxt, &attvalue);
6869 if ((attname != NULL) && (attvalue != NULL)) {
6870 /*
6871 * [ WFC: Unique Att Spec ]
6872 * No attribute name may appear more than once in the same
6873 * start-tag or empty-element tag.
6874 */
6875 for (i = 0; i < nbatts;i += 2) {
6876 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006877 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006878 xmlFree(attvalue);
6879 goto failed;
6880 }
6881 }
Owen Taylor3473f882001-02-23 17:55:21 +00006882 /*
6883 * Add the pair to atts
6884 */
6885 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006886 maxatts = 22; /* allow for 10 attrs by default */
6887 atts = (const xmlChar **)
6888 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006889 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006890 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006891 if (attvalue != NULL)
6892 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006893 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006894 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006895 ctxt->atts = atts;
6896 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006897 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006898 const xmlChar **n;
6899
Owen Taylor3473f882001-02-23 17:55:21 +00006900 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006901 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006902 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006903 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006904 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006905 if (attvalue != NULL)
6906 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006907 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006908 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006909 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006910 ctxt->atts = atts;
6911 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006912 }
6913 atts[nbatts++] = attname;
6914 atts[nbatts++] = attvalue;
6915 atts[nbatts] = NULL;
6916 atts[nbatts + 1] = NULL;
6917 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006918 if (attvalue != NULL)
6919 xmlFree(attvalue);
6920 }
6921
6922failed:
6923
Daniel Veillard3772de32002-12-17 10:31:45 +00006924 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006925 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6926 break;
6927 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006928 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6929 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006930 }
6931 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006932 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6933 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006934 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6935 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006936 break;
6937 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006938 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006939 GROW;
6940 }
6941
6942 /*
6943 * SAX: Start of Element !
6944 */
6945 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006946 (!ctxt->disableSAX)) {
6947 if (nbatts > 0)
6948 ctxt->sax->startElement(ctxt->userData, name, atts);
6949 else
6950 ctxt->sax->startElement(ctxt->userData, name, NULL);
6951 }
Owen Taylor3473f882001-02-23 17:55:21 +00006952
6953 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006954 /* Free only the content strings */
6955 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006956 if (atts[i] != NULL)
6957 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006958 }
6959 return(name);
6960}
6961
6962/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006963 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006964 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006965 * @line: line of the start tag
6966 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006967 *
6968 * parse an end of tag
6969 *
6970 * [42] ETag ::= '</' Name S? '>'
6971 *
6972 * With namespace
6973 *
6974 * [NS 9] ETag ::= '</' QName S? '>'
6975 */
6976
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006977static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006978xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006979 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006980
6981 GROW;
6982 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006983 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006984 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006985 return;
6986 }
6987 SKIP(2);
6988
Daniel Veillard46de64e2002-05-29 08:21:33 +00006989 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006990
6991 /*
6992 * We should definitely be at the ending "S? '>'" part
6993 */
6994 GROW;
6995 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006996 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006997 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006998 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006999 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007000
7001 /*
7002 * [ WFC: Element Type Match ]
7003 * The Name in an element's end-tag must match the element type in the
7004 * start-tag.
7005 *
7006 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007007 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007008 if (name == NULL) name = BAD_CAST "unparseable";
7009 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007010 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007011 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007012 }
7013
7014 /*
7015 * SAX: End of Tag
7016 */
7017 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7018 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007019 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007020
Daniel Veillarde57ec792003-09-10 10:50:59 +00007021 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007022 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007023 return;
7024}
7025
7026/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007027 * xmlParseEndTag:
7028 * @ctxt: an XML parser context
7029 *
7030 * parse an end of tag
7031 *
7032 * [42] ETag ::= '</' Name S? '>'
7033 *
7034 * With namespace
7035 *
7036 * [NS 9] ETag ::= '</' QName S? '>'
7037 */
7038
7039void
7040xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007041 xmlParseEndTag1(ctxt, 0);
7042}
Daniel Veillard81273902003-09-30 00:43:48 +00007043#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007044
7045/************************************************************************
7046 * *
7047 * SAX 2 specific operations *
7048 * *
7049 ************************************************************************/
7050
7051static const xmlChar *
7052xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7053 int len = 0, l;
7054 int c;
7055 int count = 0;
7056
7057 /*
7058 * Handler for more complex cases
7059 */
7060 GROW;
7061 c = CUR_CHAR(l);
7062 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007063 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007064 return(NULL);
7065 }
7066
7067 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
Daniel Veillard73b013f2003-09-30 12:36:01 +00007068 ((xmlIsLetter(c)) || (xmlIsDigit(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007069 (c == '.') || (c == '-') || (c == '_') ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00007070 (xmlIsCombining(c)) ||
7071 (xmlIsExtender(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007072 if (count++ > 100) {
7073 count = 0;
7074 GROW;
7075 }
7076 len += l;
7077 NEXTL(l);
7078 c = CUR_CHAR(l);
7079 }
7080 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7081}
7082
7083/*
7084 * xmlGetNamespace:
7085 * @ctxt: an XML parser context
7086 * @prefix: the prefix to lookup
7087 *
7088 * Lookup the namespace name for the @prefix (which ca be NULL)
7089 * The prefix must come from the @ctxt->dict dictionnary
7090 *
7091 * Returns the namespace name or NULL if not bound
7092 */
7093static const xmlChar *
7094xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7095 int i;
7096
Daniel Veillarde57ec792003-09-10 10:50:59 +00007097 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007098 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007099 if (ctxt->nsTab[i] == prefix) {
7100 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7101 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007102 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007103 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007104 return(NULL);
7105}
7106
7107/**
7108 * xmlParseNCName:
7109 * @ctxt: an XML parser context
7110 *
7111 * parse an XML name.
7112 *
7113 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7114 * CombiningChar | Extender
7115 *
7116 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7117 *
7118 * Returns the Name parsed or NULL
7119 */
7120
7121static const xmlChar *
7122xmlParseNCName(xmlParserCtxtPtr ctxt) {
7123 const xmlChar *in;
7124 const xmlChar *ret;
7125 int count = 0;
7126
7127 /*
7128 * Accelerator for simple ASCII names
7129 */
7130 in = ctxt->input->cur;
7131 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7132 ((*in >= 0x41) && (*in <= 0x5A)) ||
7133 (*in == '_')) {
7134 in++;
7135 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7136 ((*in >= 0x41) && (*in <= 0x5A)) ||
7137 ((*in >= 0x30) && (*in <= 0x39)) ||
7138 (*in == '_') || (*in == '-') ||
7139 (*in == '.'))
7140 in++;
7141 if ((*in > 0) && (*in < 0x80)) {
7142 count = in - ctxt->input->cur;
7143 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7144 ctxt->input->cur = in;
7145 ctxt->nbChars += count;
7146 ctxt->input->col += count;
7147 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007148 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007149 }
7150 return(ret);
7151 }
7152 }
7153 return(xmlParseNCNameComplex(ctxt));
7154}
7155
7156/**
7157 * xmlParseQName:
7158 * @ctxt: an XML parser context
7159 * @prefix: pointer to store the prefix part
7160 *
7161 * parse an XML Namespace QName
7162 *
7163 * [6] QName ::= (Prefix ':')? LocalPart
7164 * [7] Prefix ::= NCName
7165 * [8] LocalPart ::= NCName
7166 *
7167 * Returns the Name parsed or NULL
7168 */
7169
7170static const xmlChar *
7171xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7172 const xmlChar *l, *p;
7173
7174 GROW;
7175
7176 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007177 if (l == NULL) {
7178 if (CUR == ':') {
7179 l = xmlParseName(ctxt);
7180 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007181 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7182 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007183 *prefix = NULL;
7184 return(l);
7185 }
7186 }
7187 return(NULL);
7188 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007189 if (CUR == ':') {
7190 NEXT;
7191 p = l;
7192 l = xmlParseNCName(ctxt);
7193 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007194 xmlChar *tmp;
7195
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007196 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7197 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007198 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7199 p = xmlDictLookup(ctxt->dict, tmp, -1);
7200 if (tmp != NULL) xmlFree(tmp);
7201 *prefix = NULL;
7202 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007203 }
7204 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007205 xmlChar *tmp;
7206
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007207 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7208 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007209 NEXT;
7210 tmp = (xmlChar *) xmlParseName(ctxt);
7211 if (tmp != NULL) {
7212 tmp = xmlBuildQName(tmp, l, NULL, 0);
7213 l = xmlDictLookup(ctxt->dict, tmp, -1);
7214 if (tmp != NULL) xmlFree(tmp);
7215 *prefix = p;
7216 return(l);
7217 }
7218 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7219 l = xmlDictLookup(ctxt->dict, tmp, -1);
7220 if (tmp != NULL) xmlFree(tmp);
7221 *prefix = p;
7222 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007223 }
7224 *prefix = p;
7225 } else
7226 *prefix = NULL;
7227 return(l);
7228}
7229
7230/**
7231 * xmlParseQNameAndCompare:
7232 * @ctxt: an XML parser context
7233 * @name: the localname
7234 * @prefix: the prefix, if any.
7235 *
7236 * parse an XML name and compares for match
7237 * (specialized for endtag parsing)
7238 *
7239 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7240 * and the name for mismatch
7241 */
7242
7243static const xmlChar *
7244xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7245 xmlChar const *prefix) {
7246 const xmlChar *cmp = name;
7247 const xmlChar *in;
7248 const xmlChar *ret;
7249 const xmlChar *prefix2;
7250
7251 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7252
7253 GROW;
7254 in = ctxt->input->cur;
7255
7256 cmp = prefix;
7257 while (*in != 0 && *in == *cmp) {
7258 ++in;
7259 ++cmp;
7260 }
7261 if ((*cmp == 0) && (*in == ':')) {
7262 in++;
7263 cmp = name;
7264 while (*in != 0 && *in == *cmp) {
7265 ++in;
7266 ++cmp;
7267 }
7268 if (*cmp == 0 && (*in == '>' || IS_BLANK (*in))) {
7269 /* success */
7270 ctxt->input->cur = in;
7271 return((const xmlChar*) 1);
7272 }
7273 }
7274 /*
7275 * all strings coms from the dictionary, equality can be done directly
7276 */
7277 ret = xmlParseQName (ctxt, &prefix2);
7278 if ((ret == name) && (prefix == prefix2))
7279 return((const xmlChar*) 1);
7280 return ret;
7281}
7282
7283/**
7284 * xmlParseAttValueInternal:
7285 * @ctxt: an XML parser context
7286 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007287 * @alloc: whether the attribute was reallocated as a new string
7288 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007289 *
7290 * parse a value for an attribute.
7291 * NOTE: if no normalization is needed, the routine will return pointers
7292 * directly from the data buffer.
7293 *
7294 * 3.3.3 Attribute-Value Normalization:
7295 * Before the value of an attribute is passed to the application or
7296 * checked for validity, the XML processor must normalize it as follows:
7297 * - a character reference is processed by appending the referenced
7298 * character to the attribute value
7299 * - an entity reference is processed by recursively processing the
7300 * replacement text of the entity
7301 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7302 * appending #x20 to the normalized value, except that only a single
7303 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7304 * parsed entity or the literal entity value of an internal parsed entity
7305 * - other characters are processed by appending them to the normalized value
7306 * If the declared value is not CDATA, then the XML processor must further
7307 * process the normalized attribute value by discarding any leading and
7308 * trailing space (#x20) characters, and by replacing sequences of space
7309 * (#x20) characters by a single space (#x20) character.
7310 * All attributes for which no declaration has been read should be treated
7311 * by a non-validating parser as if declared CDATA.
7312 *
7313 * Returns the AttValue parsed or NULL. The value has to be freed by the
7314 * caller if it was copied, this can be detected by val[*len] == 0.
7315 */
7316
7317static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007318xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7319 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007320{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007321 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007322 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007323 xmlChar *ret = NULL;
7324
7325 GROW;
7326 in = (xmlChar *) CUR_PTR;
7327 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007328 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007329 return (NULL);
7330 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007331 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007332
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007333 /*
7334 * try to handle in this routine the most common case where no
7335 * allocation of a new string is required and where content is
7336 * pure ASCII.
7337 */
7338 limit = *in++;
7339 end = ctxt->input->end;
7340 start = in;
7341 if (in >= end) {
7342 const xmlChar *oldbase = ctxt->input->base;
7343 GROW;
7344 if (oldbase != ctxt->input->base) {
7345 long delta = ctxt->input->base - oldbase;
7346 start = start + delta;
7347 in = in + delta;
7348 }
7349 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007350 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007351 if (normalize) {
7352 /*
7353 * Skip any leading spaces
7354 */
7355 while ((in < end) && (*in != limit) &&
7356 ((*in == 0x20) || (*in == 0x9) ||
7357 (*in == 0xA) || (*in == 0xD))) {
7358 in++;
7359 start = in;
7360 if (in >= end) {
7361 const xmlChar *oldbase = ctxt->input->base;
7362 GROW;
7363 if (oldbase != ctxt->input->base) {
7364 long delta = ctxt->input->base - oldbase;
7365 start = start + delta;
7366 in = in + delta;
7367 }
7368 end = ctxt->input->end;
7369 }
7370 }
7371 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7372 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7373 if ((*in++ == 0x20) && (*in == 0x20)) break;
7374 if (in >= end) {
7375 const xmlChar *oldbase = ctxt->input->base;
7376 GROW;
7377 if (oldbase != ctxt->input->base) {
7378 long delta = ctxt->input->base - oldbase;
7379 start = start + delta;
7380 in = in + delta;
7381 }
7382 end = ctxt->input->end;
7383 }
7384 }
7385 last = in;
7386 /*
7387 * skip the trailing blanks
7388 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007389 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007390 while ((in < end) && (*in != limit) &&
7391 ((*in == 0x20) || (*in == 0x9) ||
7392 (*in == 0xA) || (*in == 0xD))) {
7393 in++;
7394 if (in >= end) {
7395 const xmlChar *oldbase = ctxt->input->base;
7396 GROW;
7397 if (oldbase != ctxt->input->base) {
7398 long delta = ctxt->input->base - oldbase;
7399 start = start + delta;
7400 in = in + delta;
7401 last = last + delta;
7402 }
7403 end = ctxt->input->end;
7404 }
7405 }
7406 if (*in != limit) goto need_complex;
7407 } else {
7408 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7409 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7410 in++;
7411 if (in >= end) {
7412 const xmlChar *oldbase = ctxt->input->base;
7413 GROW;
7414 if (oldbase != ctxt->input->base) {
7415 long delta = ctxt->input->base - oldbase;
7416 start = start + delta;
7417 in = in + delta;
7418 }
7419 end = ctxt->input->end;
7420 }
7421 }
7422 last = in;
7423 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007424 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007425 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007426 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007427 *len = last - start;
7428 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007429 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007430 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007431 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007432 }
7433 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007434 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007435 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007436need_complex:
7437 if (alloc) *alloc = 1;
7438 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007439}
7440
7441/**
7442 * xmlParseAttribute2:
7443 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007444 * @pref: the element prefix
7445 * @elem: the element name
7446 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007447 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007448 * @len: an int * to save the length of the attribute
7449 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007450 *
7451 * parse an attribute in the new SAX2 framework.
7452 *
7453 * Returns the attribute name, and the value in *value, .
7454 */
7455
7456static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007457xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7458 const xmlChar *pref, const xmlChar *elem,
7459 const xmlChar **prefix, xmlChar **value,
7460 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007461 const xmlChar *name;
7462 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007463 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007464
7465 *value = NULL;
7466 GROW;
7467 name = xmlParseQName(ctxt, prefix);
7468 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007469 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7470 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007471 return(NULL);
7472 }
7473
7474 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007475 * get the type if needed
7476 */
7477 if (ctxt->attsSpecial != NULL) {
7478 int type;
7479
7480 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7481 pref, elem, *prefix, name);
7482 if (type != 0) normalize = 1;
7483 }
7484
7485 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007486 * read the value
7487 */
7488 SKIP_BLANKS;
7489 if (RAW == '=') {
7490 NEXT;
7491 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007492 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007493 ctxt->instate = XML_PARSER_CONTENT;
7494 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007495 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007496 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007497 return(NULL);
7498 }
7499
7500 /*
7501 * Check that xml:lang conforms to the specification
7502 * No more registered as an error, just generate a warning now
7503 * since this was deprecated in XML second edition
7504 */
7505 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7506 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007507 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7508 "Malformed value for xml:lang : %s\n",
7509 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007510 }
7511 }
7512
7513 /*
7514 * Check that xml:space conforms to the specification
7515 */
7516 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7517 if (xmlStrEqual(val, BAD_CAST "default"))
7518 *(ctxt->space) = 0;
7519 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7520 *(ctxt->space) = 1;
7521 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007522 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007523"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7524 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007525 }
7526 }
7527
7528 *value = val;
7529 return(name);
7530}
7531
7532/**
7533 * xmlParseStartTag2:
7534 * @ctxt: an XML parser context
7535 *
7536 * parse a start of tag either for rule element or
7537 * EmptyElement. In both case we don't parse the tag closing chars.
7538 * This routine is called when running SAX2 parsing
7539 *
7540 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7541 *
7542 * [ WFC: Unique Att Spec ]
7543 * No attribute name may appear more than once in the same start-tag or
7544 * empty-element tag.
7545 *
7546 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7547 *
7548 * [ WFC: Unique Att Spec ]
7549 * No attribute name may appear more than once in the same start-tag or
7550 * empty-element tag.
7551 *
7552 * With namespace:
7553 *
7554 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7555 *
7556 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7557 *
7558 * Returns the element name parsed
7559 */
7560
7561static const xmlChar *
7562xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
7563 const xmlChar **URI) {
7564 const xmlChar *localname;
7565 const xmlChar *prefix;
7566 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007567 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007568 const xmlChar *nsname;
7569 xmlChar *attvalue;
7570 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007571 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007572 int nratts, nbatts, nbdef;
7573 int i, j, nbNs, attval;
7574 const xmlChar *base;
7575 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007576
7577 if (RAW != '<') return(NULL);
7578 NEXT1;
7579
7580 /*
7581 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7582 * point since the attribute values may be stored as pointers to
7583 * the buffer and calling SHRINK would destroy them !
7584 * The Shrinking is only possible once the full set of attribute
7585 * callbacks have been done.
7586 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007587reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007588 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007589 base = ctxt->input->base;
7590 cur = ctxt->input->cur - ctxt->input->base;
7591 nbatts = 0;
7592 nratts = 0;
7593 nbdef = 0;
7594 nbNs = 0;
7595 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007596
7597 localname = xmlParseQName(ctxt, &prefix);
7598 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007599 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7600 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007601 return(NULL);
7602 }
7603
7604 /*
7605 * Now parse the attributes, it ends up with the ending
7606 *
7607 * (S Attribute)* S?
7608 */
7609 SKIP_BLANKS;
7610 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007611 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007612
7613 while ((RAW != '>') &&
7614 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007615 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007616 const xmlChar *q = CUR_PTR;
7617 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007618 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007619
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007620 attname = xmlParseAttribute2(ctxt, prefix, localname,
7621 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007622 if ((attname != NULL) && (attvalue != NULL)) {
7623 if (len < 0) len = xmlStrlen(attvalue);
7624 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007625 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7626 xmlURIPtr uri;
7627
7628 if (*URL != 0) {
7629 uri = xmlParseURI((const char *) URL);
7630 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007631 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7632 "xmlns: %s not a valid URI\n",
7633 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007634 } else {
7635 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007636 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7637 "xmlns: URI %s is not absolute\n",
7638 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007639 }
7640 xmlFreeURI(uri);
7641 }
7642 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007643 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007644 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007645 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007646 for (j = 1;j <= nbNs;j++)
7647 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7648 break;
7649 if (j <= nbNs)
7650 xmlErrAttributeDup(ctxt, NULL, attname);
7651 else
7652 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007653 if (alloc != 0) xmlFree(attvalue);
7654 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007655 continue;
7656 }
7657 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007658 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7659 xmlURIPtr uri;
7660
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007661 if (attname == ctxt->str_xml) {
7662 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007663 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7664 "xml namespace prefix mapped to wrong URI\n",
7665 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007666 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007667 /*
7668 * Do not keep a namespace definition node
7669 */
7670 if (alloc != 0) xmlFree(attvalue);
7671 SKIP_BLANKS;
7672 continue;
7673 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007674 uri = xmlParseURI((const char *) URL);
7675 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007676 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7677 "xmlns:%s: '%s' is not a valid URI\n",
7678 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007679 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007680 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007681 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7682 "xmlns:%s: URI %s is not absolute\n",
7683 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007684 }
7685 xmlFreeURI(uri);
7686 }
7687
Daniel Veillard0fb18932003-09-07 09:14:37 +00007688 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007689 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007690 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007691 for (j = 1;j <= nbNs;j++)
7692 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7693 break;
7694 if (j <= nbNs)
7695 xmlErrAttributeDup(ctxt, aprefix, attname);
7696 else
7697 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007698 if (alloc != 0) xmlFree(attvalue);
7699 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007700 continue;
7701 }
7702
7703 /*
7704 * Add the pair to atts
7705 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007706 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7707 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 if (attvalue[len] == 0)
7709 xmlFree(attvalue);
7710 goto failed;
7711 }
7712 maxatts = ctxt->maxatts;
7713 atts = ctxt->atts;
7714 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007715 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007716 atts[nbatts++] = attname;
7717 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007718 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007719 atts[nbatts++] = attvalue;
7720 attvalue += len;
7721 atts[nbatts++] = attvalue;
7722 /*
7723 * tag if some deallocation is needed
7724 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007725 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007726 } else {
7727 if ((attvalue != NULL) && (attvalue[len] == 0))
7728 xmlFree(attvalue);
7729 }
7730
7731failed:
7732
7733 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007734 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007735 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7736 break;
7737 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007738 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7739 "attributes construct error\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007740 }
7741 SKIP_BLANKS;
7742 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7743 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007744 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007745 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007746 break;
7747 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007748 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007749 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007750 }
7751
Daniel Veillard0fb18932003-09-07 09:14:37 +00007752 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007753 * The attributes checkings
Daniel Veillard0fb18932003-09-07 09:14:37 +00007754 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007755 for (i = 0; i < nbatts;i += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007756 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7757 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007758 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007759 "Namespace prefix %s for %s on %s is not defined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007760 atts[i + 1], atts[i], localname);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007761 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007762 atts[i + 2] = nsname;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007763 /*
7764 * [ WFC: Unique Att Spec ]
7765 * No attribute name may appear more than once in the same
7766 * start-tag or empty-element tag.
7767 * As extended by the Namespace in XML REC.
7768 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007769 for (j = 0; j < i;j += 5) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007770 if (atts[i] == atts[j]) {
7771 if (atts[i+1] == atts[j+1]) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007772 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007773 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007774 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007775 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007776 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007777 "Namespaced Attribute %s in '%s' redefined\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007778 atts[i], nsname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007779 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007780 }
7781 }
7782 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007783 }
7784
7785 /*
7786 * The attributes defaulting
7787 */
7788 if (ctxt->attsDefault != NULL) {
7789 xmlDefAttrsPtr defaults;
7790
7791 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7792 if (defaults != NULL) {
7793 for (i = 0;i < defaults->nbAttrs;i++) {
7794 attname = defaults->values[4 * i];
7795 aprefix = defaults->values[4 * i + 1];
7796
7797 /*
7798 * special work for namespaces defaulted defs
7799 */
7800 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7801 /*
7802 * check that it's not a defined namespace
7803 */
7804 for (j = 1;j <= nbNs;j++)
7805 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7806 break;
7807 if (j <= nbNs) continue;
7808
7809 nsname = xmlGetNamespace(ctxt, NULL);
7810 if (nsname != defaults->values[4 * i + 2]) {
7811 if (nsPush(ctxt, NULL,
7812 defaults->values[4 * i + 2]) > 0)
7813 nbNs++;
7814 }
7815 } else if (aprefix == ctxt->str_xmlns) {
7816 /*
7817 * check that it's not a defined namespace
7818 */
7819 for (j = 1;j <= nbNs;j++)
7820 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7821 break;
7822 if (j <= nbNs) continue;
7823
7824 nsname = xmlGetNamespace(ctxt, attname);
7825 if (nsname != defaults->values[2]) {
7826 if (nsPush(ctxt, attname,
7827 defaults->values[4 * i + 2]) > 0)
7828 nbNs++;
7829 }
7830 } else {
7831 /*
7832 * check that it's not a defined attribute
7833 */
7834 for (j = 0;j < nbatts;j+=5) {
7835 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7836 break;
7837 }
7838 if (j < nbatts) continue;
7839
7840 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7841 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007842 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007843 }
7844 maxatts = ctxt->maxatts;
7845 atts = ctxt->atts;
7846 }
7847 atts[nbatts++] = attname;
7848 atts[nbatts++] = aprefix;
7849 if (aprefix == NULL)
7850 atts[nbatts++] = NULL;
7851 else
7852 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7853 atts[nbatts++] = defaults->values[4 * i + 2];
7854 atts[nbatts++] = defaults->values[4 * i + 3];
7855 nbdef++;
7856 }
7857 }
7858 }
7859 }
7860
7861 nsname = xmlGetNamespace(ctxt, prefix);
7862 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007863 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7864 "Namespace prefix %s on %s is not defined\n",
7865 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007866 }
7867 *pref = prefix;
7868 *URI = nsname;
7869
7870 /*
7871 * SAX: Start of Element !
7872 */
7873 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7874 (!ctxt->disableSAX)) {
7875 if (nbNs > 0)
7876 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7877 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7878 nbatts / 5, nbdef, atts);
7879 else
7880 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7881 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7882 }
7883
7884 /*
7885 * Free up attribute allocated strings if needed
7886 */
7887 if (attval != 0) {
7888 for (i = 3,j = 0; j < nratts;i += 5,j++)
7889 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7890 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007891 }
7892
7893 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007894
7895base_changed:
7896 /*
7897 * the attribute strings are valid iif the base didn't changed
7898 */
7899 if (attval != 0) {
7900 for (i = 3,j = 0; j < nratts;i += 5,j++)
7901 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7902 xmlFree((xmlChar *) atts[i]);
7903 }
7904 ctxt->input->cur = ctxt->input->base + cur;
7905 if (ctxt->wellFormed == 1) {
7906 goto reparse;
7907 }
7908 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007909}
7910
7911/**
7912 * xmlParseEndTag2:
7913 * @ctxt: an XML parser context
7914 * @line: line of the start tag
7915 * @nsNr: number of namespaces on the start tag
7916 *
7917 * parse an end of tag
7918 *
7919 * [42] ETag ::= '</' Name S? '>'
7920 *
7921 * With namespace
7922 *
7923 * [NS 9] ETag ::= '</' QName S? '>'
7924 */
7925
7926static void
7927xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
7928 const xmlChar *URI, int line, int nsNr) {
7929 const xmlChar *name;
7930
7931 GROW;
7932 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007933 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007934 return;
7935 }
7936 SKIP(2);
7937
7938 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7939
7940 /*
7941 * We should definitely be at the ending "S? '>'" part
7942 */
7943 GROW;
7944 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007945 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007946 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007947 } else
7948 NEXT1;
7949
7950 /*
7951 * [ WFC: Element Type Match ]
7952 * The Name in an element's end-tag must match the element type in the
7953 * start-tag.
7954 *
7955 */
7956 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007957 if (name == NULL) name = BAD_CAST "unparseable";
7958 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007959 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007960 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007961 }
7962
7963 /*
7964 * SAX: End of Tag
7965 */
7966 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7967 (!ctxt->disableSAX))
7968 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7969
Daniel Veillard0fb18932003-09-07 09:14:37 +00007970 spacePop(ctxt);
7971 if (nsNr != 0)
7972 nsPop(ctxt, nsNr);
7973 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007974}
7975
7976/**
Owen Taylor3473f882001-02-23 17:55:21 +00007977 * xmlParseCDSect:
7978 * @ctxt: an XML parser context
7979 *
7980 * Parse escaped pure raw content.
7981 *
7982 * [18] CDSect ::= CDStart CData CDEnd
7983 *
7984 * [19] CDStart ::= '<![CDATA['
7985 *
7986 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7987 *
7988 * [21] CDEnd ::= ']]>'
7989 */
7990void
7991xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7992 xmlChar *buf = NULL;
7993 int len = 0;
7994 int size = XML_PARSER_BUFFER_SIZE;
7995 int r, rl;
7996 int s, sl;
7997 int cur, l;
7998 int count = 0;
7999
Daniel Veillard8f597c32003-10-06 08:19:27 +00008000 /* Check 2.6.0 was NXT(0) not RAW */
8001 if (memcmp(CUR_PTR, "<![CDATA[", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008002 SKIP(9);
8003 } else
8004 return;
8005
8006 ctxt->instate = XML_PARSER_CDATA_SECTION;
8007 r = CUR_CHAR(rl);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008008 if (!xmlIsChar(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008009 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008010 ctxt->instate = XML_PARSER_CONTENT;
8011 return;
8012 }
8013 NEXTL(rl);
8014 s = CUR_CHAR(sl);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008015 if (!xmlIsChar(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008016 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008017 ctxt->instate = XML_PARSER_CONTENT;
8018 return;
8019 }
8020 NEXTL(sl);
8021 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008022 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008023 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008024 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008025 return;
8026 }
Daniel Veillard73b013f2003-09-30 12:36:01 +00008027 while (xmlIsChar(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008028 ((r != ']') || (s != ']') || (cur != '>'))) {
8029 if (len + 5 >= size) {
8030 size *= 2;
8031 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8032 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008033 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008034 return;
8035 }
8036 }
8037 COPY_BUF(rl,buf,len,r);
8038 r = s;
8039 rl = sl;
8040 s = cur;
8041 sl = l;
8042 count++;
8043 if (count > 50) {
8044 GROW;
8045 count = 0;
8046 }
8047 NEXTL(l);
8048 cur = CUR_CHAR(l);
8049 }
8050 buf[len] = 0;
8051 ctxt->instate = XML_PARSER_CONTENT;
8052 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008053 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008054 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008055 xmlFree(buf);
8056 return;
8057 }
8058 NEXTL(l);
8059
8060 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008061 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008062 */
8063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8064 if (ctxt->sax->cdataBlock != NULL)
8065 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008066 else if (ctxt->sax->characters != NULL)
8067 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008068 }
8069 xmlFree(buf);
8070}
8071
8072/**
8073 * xmlParseContent:
8074 * @ctxt: an XML parser context
8075 *
8076 * Parse a content:
8077 *
8078 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8079 */
8080
8081void
8082xmlParseContent(xmlParserCtxtPtr ctxt) {
8083 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008084 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008085 ((RAW != '<') || (NXT(1) != '/'))) {
8086 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008087 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008088 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008089
8090 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008091 * First case : a Processing Instruction.
8092 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008093 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008094 xmlParsePI(ctxt);
8095 }
8096
8097 /*
8098 * Second case : a CDSection
8099 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008100 /* 2.6.0 test was *cur not RAW */
8101 else if (memcmp(CUR_PTR, "<![CDATA[", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008102 xmlParseCDSect(ctxt);
8103 }
8104
8105 /*
8106 * Third case : a comment
8107 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008108 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008109 (NXT(2) == '-') && (NXT(3) == '-')) {
8110 xmlParseComment(ctxt);
8111 ctxt->instate = XML_PARSER_CONTENT;
8112 }
8113
8114 /*
8115 * Fourth case : a sub-element.
8116 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008117 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008118 xmlParseElement(ctxt);
8119 }
8120
8121 /*
8122 * Fifth case : a reference. If if has not been resolved,
8123 * parsing returns it's Name, create the node
8124 */
8125
Daniel Veillard21a0f912001-02-25 19:54:14 +00008126 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008127 xmlParseReference(ctxt);
8128 }
8129
8130 /*
8131 * Last case, text. Note that References are handled directly.
8132 */
8133 else {
8134 xmlParseCharData(ctxt, 0);
8135 }
8136
8137 GROW;
8138 /*
8139 * Pop-up of finished entities.
8140 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008141 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008142 xmlPopInput(ctxt);
8143 SHRINK;
8144
Daniel Veillardfdc91562002-07-01 21:52:03 +00008145 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008146 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8147 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008148 ctxt->instate = XML_PARSER_EOF;
8149 break;
8150 }
8151 }
8152}
8153
8154/**
8155 * xmlParseElement:
8156 * @ctxt: an XML parser context
8157 *
8158 * parse an XML element, this is highly recursive
8159 *
8160 * [39] element ::= EmptyElemTag | STag content ETag
8161 *
8162 * [ WFC: Element Type Match ]
8163 * The Name in an element's end-tag must match the element type in the
8164 * start-tag.
8165 *
Owen Taylor3473f882001-02-23 17:55:21 +00008166 */
8167
8168void
8169xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008170 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008171 const xmlChar *prefix;
8172 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008173 xmlParserNodeInfo node_info;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008174 int line;
Owen Taylor3473f882001-02-23 17:55:21 +00008175 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008176 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008177
8178 /* Capture start position */
8179 if (ctxt->record_info) {
8180 node_info.begin_pos = ctxt->input->consumed +
8181 (CUR_PTR - ctxt->input->base);
8182 node_info.begin_line = ctxt->input->line;
8183 }
8184
8185 if (ctxt->spaceNr == 0)
8186 spacePush(ctxt, -1);
8187 else
8188 spacePush(ctxt, *ctxt->space);
8189
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008190 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008191#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008192 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008193#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008194 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008195#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008196 else
8197 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008198#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008199 if (name == NULL) {
8200 spacePop(ctxt);
8201 return;
8202 }
8203 namePush(ctxt, name);
8204 ret = ctxt->node;
8205
Daniel Veillard4432df22003-09-28 18:58:27 +00008206#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008207 /*
8208 * [ VC: Root Element Type ]
8209 * The Name in the document type declaration must match the element
8210 * type of the root element.
8211 */
8212 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8213 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8214 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008215#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008216
8217 /*
8218 * Check for an Empty Element.
8219 */
8220 if ((RAW == '/') && (NXT(1) == '>')) {
8221 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008222 if (ctxt->sax2) {
8223 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8224 (!ctxt->disableSAX))
8225 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008226#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008227 } else {
8228 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8229 (!ctxt->disableSAX))
8230 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008231#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008232 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008233 namePop(ctxt);
8234 spacePop(ctxt);
8235 if (nsNr != ctxt->nsNr)
8236 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008237 if ( ret != NULL && ctxt->record_info ) {
8238 node_info.end_pos = ctxt->input->consumed +
8239 (CUR_PTR - ctxt->input->base);
8240 node_info.end_line = ctxt->input->line;
8241 node_info.node = ret;
8242 xmlParserAddNodeInfo(ctxt, &node_info);
8243 }
8244 return;
8245 }
8246 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008247 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008248 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008249 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8250 "Couldn't find end of Start Tag %s line %d\n",
8251 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008252
8253 /*
8254 * end of parsing of this node.
8255 */
8256 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008257 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008258 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008259 if (nsNr != ctxt->nsNr)
8260 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008261
8262 /*
8263 * Capture end position and add node
8264 */
8265 if ( ret != NULL && ctxt->record_info ) {
8266 node_info.end_pos = ctxt->input->consumed +
8267 (CUR_PTR - ctxt->input->base);
8268 node_info.end_line = ctxt->input->line;
8269 node_info.node = ret;
8270 xmlParserAddNodeInfo(ctxt, &node_info);
8271 }
8272 return;
8273 }
8274
8275 /*
8276 * Parse the content of the element:
8277 */
8278 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008279 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008280 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008281 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008282 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008283
8284 /*
8285 * end of parsing of this node.
8286 */
8287 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008288 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008289 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008290 if (nsNr != ctxt->nsNr)
8291 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008292 return;
8293 }
8294
8295 /*
8296 * parse the end of tag: '</' should be here.
8297 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008298 if (ctxt->sax2) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008299 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008300 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008301 }
8302#ifdef LIBXML_SAX1_ENABLED
8303 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008304 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008305#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008306
8307 /*
8308 * Capture end position and add node
8309 */
8310 if ( ret != NULL && ctxt->record_info ) {
8311 node_info.end_pos = ctxt->input->consumed +
8312 (CUR_PTR - ctxt->input->base);
8313 node_info.end_line = ctxt->input->line;
8314 node_info.node = ret;
8315 xmlParserAddNodeInfo(ctxt, &node_info);
8316 }
8317}
8318
8319/**
8320 * xmlParseVersionNum:
8321 * @ctxt: an XML parser context
8322 *
8323 * parse the XML version value.
8324 *
8325 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8326 *
8327 * Returns the string giving the XML version number, or NULL
8328 */
8329xmlChar *
8330xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8331 xmlChar *buf = NULL;
8332 int len = 0;
8333 int size = 10;
8334 xmlChar cur;
8335
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008336 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008337 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008338 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008339 return(NULL);
8340 }
8341 cur = CUR;
8342 while (((cur >= 'a') && (cur <= 'z')) ||
8343 ((cur >= 'A') && (cur <= 'Z')) ||
8344 ((cur >= '0') && (cur <= '9')) ||
8345 (cur == '_') || (cur == '.') ||
8346 (cur == ':') || (cur == '-')) {
8347 if (len + 1 >= size) {
8348 size *= 2;
8349 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8350 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008351 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008352 return(NULL);
8353 }
8354 }
8355 buf[len++] = cur;
8356 NEXT;
8357 cur=CUR;
8358 }
8359 buf[len] = 0;
8360 return(buf);
8361}
8362
8363/**
8364 * xmlParseVersionInfo:
8365 * @ctxt: an XML parser context
8366 *
8367 * parse the XML version.
8368 *
8369 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8370 *
8371 * [25] Eq ::= S? '=' S?
8372 *
8373 * Returns the version string, e.g. "1.0"
8374 */
8375
8376xmlChar *
8377xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8378 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008379
Daniel Veillard8f597c32003-10-06 08:19:27 +00008380 if (memcmp(CUR_PTR, "version", 7) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008381 SKIP(7);
8382 SKIP_BLANKS;
8383 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008384 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008385 return(NULL);
8386 }
8387 NEXT;
8388 SKIP_BLANKS;
8389 if (RAW == '"') {
8390 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008391 version = xmlParseVersionNum(ctxt);
8392 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008393 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008394 } else
8395 NEXT;
8396 } else if (RAW == '\''){
8397 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008398 version = xmlParseVersionNum(ctxt);
8399 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008400 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008401 } else
8402 NEXT;
8403 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008404 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008405 }
8406 }
8407 return(version);
8408}
8409
8410/**
8411 * xmlParseEncName:
8412 * @ctxt: an XML parser context
8413 *
8414 * parse the XML encoding name
8415 *
8416 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8417 *
8418 * Returns the encoding name value or NULL
8419 */
8420xmlChar *
8421xmlParseEncName(xmlParserCtxtPtr ctxt) {
8422 xmlChar *buf = NULL;
8423 int len = 0;
8424 int size = 10;
8425 xmlChar cur;
8426
8427 cur = CUR;
8428 if (((cur >= 'a') && (cur <= 'z')) ||
8429 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008430 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008431 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008432 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008433 return(NULL);
8434 }
8435
8436 buf[len++] = cur;
8437 NEXT;
8438 cur = CUR;
8439 while (((cur >= 'a') && (cur <= 'z')) ||
8440 ((cur >= 'A') && (cur <= 'Z')) ||
8441 ((cur >= '0') && (cur <= '9')) ||
8442 (cur == '.') || (cur == '_') ||
8443 (cur == '-')) {
8444 if (len + 1 >= size) {
8445 size *= 2;
8446 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8447 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008448 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008449 return(NULL);
8450 }
8451 }
8452 buf[len++] = cur;
8453 NEXT;
8454 cur = CUR;
8455 if (cur == 0) {
8456 SHRINK;
8457 GROW;
8458 cur = CUR;
8459 }
8460 }
8461 buf[len] = 0;
8462 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008463 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008464 }
8465 return(buf);
8466}
8467
8468/**
8469 * xmlParseEncodingDecl:
8470 * @ctxt: an XML parser context
8471 *
8472 * parse the XML encoding declaration
8473 *
8474 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8475 *
8476 * this setups the conversion filters.
8477 *
8478 * Returns the encoding value or NULL
8479 */
8480
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008481const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008482xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8483 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008484
8485 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008486 if (memcmp(CUR_PTR, "encoding", 8) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008487 SKIP(8);
8488 SKIP_BLANKS;
8489 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008490 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008491 return(NULL);
8492 }
8493 NEXT;
8494 SKIP_BLANKS;
8495 if (RAW == '"') {
8496 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008497 encoding = xmlParseEncName(ctxt);
8498 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008499 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008500 } else
8501 NEXT;
8502 } else if (RAW == '\''){
8503 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008504 encoding = xmlParseEncName(ctxt);
8505 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008506 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008507 } else
8508 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008509 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008510 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008511 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008512 /*
8513 * UTF-16 encoding stwich has already taken place at this stage,
8514 * more over the little-endian/big-endian selection is already done
8515 */
8516 if ((encoding != NULL) &&
8517 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8518 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008519 if (ctxt->encoding != NULL)
8520 xmlFree((xmlChar *) ctxt->encoding);
8521 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008522 }
8523 /*
8524 * UTF-8 encoding is handled natively
8525 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008526 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008527 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8528 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008529 if (ctxt->encoding != NULL)
8530 xmlFree((xmlChar *) ctxt->encoding);
8531 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008532 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008533 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008534 xmlCharEncodingHandlerPtr handler;
8535
8536 if (ctxt->input->encoding != NULL)
8537 xmlFree((xmlChar *) ctxt->input->encoding);
8538 ctxt->input->encoding = encoding;
8539
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008540 handler = xmlFindCharEncodingHandler((const char *) encoding);
8541 if (handler != NULL) {
8542 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008543 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008544 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008545 "Unsupported encoding %s\n", encoding);
8546 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008547 }
8548 }
8549 }
8550 return(encoding);
8551}
8552
8553/**
8554 * xmlParseSDDecl:
8555 * @ctxt: an XML parser context
8556 *
8557 * parse the XML standalone declaration
8558 *
8559 * [32] SDDecl ::= S 'standalone' Eq
8560 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8561 *
8562 * [ VC: Standalone Document Declaration ]
8563 * TODO The standalone document declaration must have the value "no"
8564 * if any external markup declarations contain declarations of:
8565 * - attributes with default values, if elements to which these
8566 * attributes apply appear in the document without specifications
8567 * of values for these attributes, or
8568 * - entities (other than amp, lt, gt, apos, quot), if references
8569 * to those entities appear in the document, or
8570 * - attributes with values subject to normalization, where the
8571 * attribute appears in the document with a value which will change
8572 * as a result of normalization, or
8573 * - element types with element content, if white space occurs directly
8574 * within any instance of those types.
8575 *
8576 * Returns 1 if standalone, 0 otherwise
8577 */
8578
8579int
8580xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8581 int standalone = -1;
8582
8583 SKIP_BLANKS;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008584 if (memcmp(CUR_PTR, "standalone", 10) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008585 SKIP(10);
8586 SKIP_BLANKS;
8587 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008588 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008589 return(standalone);
8590 }
8591 NEXT;
8592 SKIP_BLANKS;
8593 if (RAW == '\''){
8594 NEXT;
8595 if ((RAW == 'n') && (NXT(1) == 'o')) {
8596 standalone = 0;
8597 SKIP(2);
8598 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8599 (NXT(2) == 's')) {
8600 standalone = 1;
8601 SKIP(3);
8602 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008603 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008604 }
8605 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008606 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008607 } else
8608 NEXT;
8609 } else if (RAW == '"'){
8610 NEXT;
8611 if ((RAW == 'n') && (NXT(1) == 'o')) {
8612 standalone = 0;
8613 SKIP(2);
8614 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8615 (NXT(2) == 's')) {
8616 standalone = 1;
8617 SKIP(3);
8618 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008619 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008620 }
8621 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008622 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008623 } else
8624 NEXT;
8625 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008626 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008627 }
8628 }
8629 return(standalone);
8630}
8631
8632/**
8633 * xmlParseXMLDecl:
8634 * @ctxt: an XML parser context
8635 *
8636 * parse an XML declaration header
8637 *
8638 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8639 */
8640
8641void
8642xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8643 xmlChar *version;
8644
8645 /*
8646 * We know that '<?xml' is here.
8647 */
8648 SKIP(5);
8649
8650 if (!IS_BLANK(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008651 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8652 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008653 }
8654 SKIP_BLANKS;
8655
8656 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008657 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008658 */
8659 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008660 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008661 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008662 } else {
8663 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8664 /*
8665 * TODO: Blueberry should be detected here
8666 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008667 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8668 "Unsupported version '%s'\n",
8669 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008670 }
8671 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008672 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008673 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008674 }
Owen Taylor3473f882001-02-23 17:55:21 +00008675
8676 /*
8677 * We may have the encoding declaration
8678 */
8679 if (!IS_BLANK(RAW)) {
8680 if ((RAW == '?') && (NXT(1) == '>')) {
8681 SKIP(2);
8682 return;
8683 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008684 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008685 }
8686 xmlParseEncodingDecl(ctxt);
8687 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8688 /*
8689 * The XML REC instructs us to stop parsing right here
8690 */
8691 return;
8692 }
8693
8694 /*
8695 * We may have the standalone status.
8696 */
8697 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
8698 if ((RAW == '?') && (NXT(1) == '>')) {
8699 SKIP(2);
8700 return;
8701 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008702 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008703 }
8704 SKIP_BLANKS;
8705 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8706
8707 SKIP_BLANKS;
8708 if ((RAW == '?') && (NXT(1) == '>')) {
8709 SKIP(2);
8710 } else if (RAW == '>') {
8711 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008712 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008713 NEXT;
8714 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008715 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008716 MOVETO_ENDTAG(CUR_PTR);
8717 NEXT;
8718 }
8719}
8720
8721/**
8722 * xmlParseMisc:
8723 * @ctxt: an XML parser context
8724 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008725 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008726 *
8727 * [27] Misc ::= Comment | PI | S
8728 */
8729
8730void
8731xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008732 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillard8f597c32003-10-06 08:19:27 +00008733 (memcmp(CUR_PTR, "<!--", 4) == 0) ||
Daniel Veillard561b7f82002-03-20 21:55:57 +00008734 IS_BLANK(CUR)) {
8735 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008736 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00008737 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008738 NEXT;
8739 } else
8740 xmlParseComment(ctxt);
8741 }
8742}
8743
8744/**
8745 * xmlParseDocument:
8746 * @ctxt: an XML parser context
8747 *
8748 * parse an XML document (and build a tree if using the standard SAX
8749 * interface).
8750 *
8751 * [1] document ::= prolog element Misc*
8752 *
8753 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8754 *
8755 * Returns 0, -1 in case of error. the parser context is augmented
8756 * as a result of the parsing.
8757 */
8758
8759int
8760xmlParseDocument(xmlParserCtxtPtr ctxt) {
8761 xmlChar start[4];
8762 xmlCharEncoding enc;
8763
8764 xmlInitParser();
8765
8766 GROW;
8767
8768 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008769 * SAX: detecting the level.
8770 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008771 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008772
8773 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008774 * SAX: beginning of the document processing.
8775 */
8776 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8777 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8778
Daniel Veillard50f34372001-08-03 12:06:36 +00008779 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008780 /*
8781 * Get the 4 first bytes and decode the charset
8782 * if enc != XML_CHAR_ENCODING_NONE
8783 * plug some encoding conversion routines.
8784 */
8785 start[0] = RAW;
8786 start[1] = NXT(1);
8787 start[2] = NXT(2);
8788 start[3] = NXT(3);
8789 enc = xmlDetectCharEncoding(start, 4);
8790 if (enc != XML_CHAR_ENCODING_NONE) {
8791 xmlSwitchEncoding(ctxt, enc);
8792 }
Owen Taylor3473f882001-02-23 17:55:21 +00008793 }
8794
8795
8796 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008797 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008798 }
8799
8800 /*
8801 * Check for the XMLDecl in the Prolog.
8802 */
8803 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008804 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008805
8806 /*
8807 * Note that we will switch encoding on the fly.
8808 */
8809 xmlParseXMLDecl(ctxt);
8810 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8811 /*
8812 * The XML REC instructs us to stop parsing right here
8813 */
8814 return(-1);
8815 }
8816 ctxt->standalone = ctxt->input->standalone;
8817 SKIP_BLANKS;
8818 } else {
8819 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8820 }
8821 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8822 ctxt->sax->startDocument(ctxt->userData);
8823
8824 /*
8825 * The Misc part of the Prolog
8826 */
8827 GROW;
8828 xmlParseMisc(ctxt);
8829
8830 /*
8831 * Then possibly doc type declaration(s) and more Misc
8832 * (doctypedecl Misc*)?
8833 */
8834 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008835 if (memcmp(CUR_PTR, "<!DOCTYPE", 9) == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00008836
8837 ctxt->inSubset = 1;
8838 xmlParseDocTypeDecl(ctxt);
8839 if (RAW == '[') {
8840 ctxt->instate = XML_PARSER_DTD;
8841 xmlParseInternalSubset(ctxt);
8842 }
8843
8844 /*
8845 * Create and update the external subset.
8846 */
8847 ctxt->inSubset = 2;
8848 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8849 (!ctxt->disableSAX))
8850 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8851 ctxt->extSubSystem, ctxt->extSubURI);
8852 ctxt->inSubset = 0;
8853
8854
8855 ctxt->instate = XML_PARSER_PROLOG;
8856 xmlParseMisc(ctxt);
8857 }
8858
8859 /*
8860 * Time to start parsing the tree itself
8861 */
8862 GROW;
8863 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008864 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8865 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008866 } else {
8867 ctxt->instate = XML_PARSER_CONTENT;
8868 xmlParseElement(ctxt);
8869 ctxt->instate = XML_PARSER_EPILOG;
8870
8871
8872 /*
8873 * The Misc part at the end
8874 */
8875 xmlParseMisc(ctxt);
8876
Daniel Veillard561b7f82002-03-20 21:55:57 +00008877 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008878 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008879 }
8880 ctxt->instate = XML_PARSER_EOF;
8881 }
8882
8883 /*
8884 * SAX: end of the document processing.
8885 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008886 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008887 ctxt->sax->endDocument(ctxt->userData);
8888
Daniel Veillard5997aca2002-03-18 18:36:20 +00008889 /*
8890 * Remove locally kept entity definitions if the tree was not built
8891 */
8892 if ((ctxt->myDoc != NULL) &&
8893 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8894 xmlFreeDoc(ctxt->myDoc);
8895 ctxt->myDoc = NULL;
8896 }
8897
Daniel Veillardc7612992002-02-17 22:47:37 +00008898 if (! ctxt->wellFormed) {
8899 ctxt->valid = 0;
8900 return(-1);
8901 }
Owen Taylor3473f882001-02-23 17:55:21 +00008902 return(0);
8903}
8904
8905/**
8906 * xmlParseExtParsedEnt:
8907 * @ctxt: an XML parser context
8908 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008909 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008910 * An external general parsed entity is well-formed if it matches the
8911 * production labeled extParsedEnt.
8912 *
8913 * [78] extParsedEnt ::= TextDecl? content
8914 *
8915 * Returns 0, -1 in case of error. the parser context is augmented
8916 * as a result of the parsing.
8917 */
8918
8919int
8920xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8921 xmlChar start[4];
8922 xmlCharEncoding enc;
8923
8924 xmlDefaultSAXHandlerInit();
8925
Daniel Veillard309f81d2003-09-23 09:02:53 +00008926 xmlDetectSAX2(ctxt);
8927
Owen Taylor3473f882001-02-23 17:55:21 +00008928 GROW;
8929
8930 /*
8931 * SAX: beginning of the document processing.
8932 */
8933 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8934 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8935
8936 /*
8937 * Get the 4 first bytes and decode the charset
8938 * if enc != XML_CHAR_ENCODING_NONE
8939 * plug some encoding conversion routines.
8940 */
8941 start[0] = RAW;
8942 start[1] = NXT(1);
8943 start[2] = NXT(2);
8944 start[3] = NXT(3);
8945 enc = xmlDetectCharEncoding(start, 4);
8946 if (enc != XML_CHAR_ENCODING_NONE) {
8947 xmlSwitchEncoding(ctxt, enc);
8948 }
8949
8950
8951 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008952 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008953 }
8954
8955 /*
8956 * Check for the XMLDecl in the Prolog.
8957 */
8958 GROW;
Daniel Veillard8f597c32003-10-06 08:19:27 +00008959 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008960
8961 /*
8962 * Note that we will switch encoding on the fly.
8963 */
8964 xmlParseXMLDecl(ctxt);
8965 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8966 /*
8967 * The XML REC instructs us to stop parsing right here
8968 */
8969 return(-1);
8970 }
8971 SKIP_BLANKS;
8972 } else {
8973 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8974 }
8975 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8976 ctxt->sax->startDocument(ctxt->userData);
8977
8978 /*
8979 * Doing validity checking on chunk doesn't make sense
8980 */
8981 ctxt->instate = XML_PARSER_CONTENT;
8982 ctxt->validate = 0;
8983 ctxt->loadsubset = 0;
8984 ctxt->depth = 0;
8985
8986 xmlParseContent(ctxt);
8987
8988 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008989 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008990 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008991 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008992 }
8993
8994 /*
8995 * SAX: end of the document processing.
8996 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008997 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008998 ctxt->sax->endDocument(ctxt->userData);
8999
9000 if (! ctxt->wellFormed) return(-1);
9001 return(0);
9002}
9003
Daniel Veillard73b013f2003-09-30 12:36:01 +00009004#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009005/************************************************************************
9006 * *
9007 * Progressive parsing interfaces *
9008 * *
9009 ************************************************************************/
9010
9011/**
9012 * xmlParseLookupSequence:
9013 * @ctxt: an XML parser context
9014 * @first: the first char to lookup
9015 * @next: the next char to lookup or zero
9016 * @third: the next char to lookup or zero
9017 *
9018 * Try to find if a sequence (first, next, third) or just (first next) or
9019 * (first) is available in the input stream.
9020 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9021 * to avoid rescanning sequences of bytes, it DOES change the state of the
9022 * parser, do not use liberally.
9023 *
9024 * Returns the index to the current parsing point if the full sequence
9025 * is available, -1 otherwise.
9026 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009027static int
Owen Taylor3473f882001-02-23 17:55:21 +00009028xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9029 xmlChar next, xmlChar third) {
9030 int base, len;
9031 xmlParserInputPtr in;
9032 const xmlChar *buf;
9033
9034 in = ctxt->input;
9035 if (in == NULL) return(-1);
9036 base = in->cur - in->base;
9037 if (base < 0) return(-1);
9038 if (ctxt->checkIndex > base)
9039 base = ctxt->checkIndex;
9040 if (in->buf == NULL) {
9041 buf = in->base;
9042 len = in->length;
9043 } else {
9044 buf = in->buf->buffer->content;
9045 len = in->buf->buffer->use;
9046 }
9047 /* take into account the sequence length */
9048 if (third) len -= 2;
9049 else if (next) len --;
9050 for (;base < len;base++) {
9051 if (buf[base] == first) {
9052 if (third != 0) {
9053 if ((buf[base + 1] != next) ||
9054 (buf[base + 2] != third)) continue;
9055 } else if (next != 0) {
9056 if (buf[base + 1] != next) continue;
9057 }
9058 ctxt->checkIndex = 0;
9059#ifdef DEBUG_PUSH
9060 if (next == 0)
9061 xmlGenericError(xmlGenericErrorContext,
9062 "PP: lookup '%c' found at %d\n",
9063 first, base);
9064 else if (third == 0)
9065 xmlGenericError(xmlGenericErrorContext,
9066 "PP: lookup '%c%c' found at %d\n",
9067 first, next, base);
9068 else
9069 xmlGenericError(xmlGenericErrorContext,
9070 "PP: lookup '%c%c%c' found at %d\n",
9071 first, next, third, base);
9072#endif
9073 return(base - (in->cur - in->base));
9074 }
9075 }
9076 ctxt->checkIndex = base;
9077#ifdef DEBUG_PUSH
9078 if (next == 0)
9079 xmlGenericError(xmlGenericErrorContext,
9080 "PP: lookup '%c' failed\n", first);
9081 else if (third == 0)
9082 xmlGenericError(xmlGenericErrorContext,
9083 "PP: lookup '%c%c' failed\n", first, next);
9084 else
9085 xmlGenericError(xmlGenericErrorContext,
9086 "PP: lookup '%c%c%c' failed\n", first, next, third);
9087#endif
9088 return(-1);
9089}
9090
9091/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009092 * xmlParseGetLasts:
9093 * @ctxt: an XML parser context
9094 * @lastlt: pointer to store the last '<' from the input
9095 * @lastgt: pointer to store the last '>' from the input
9096 *
9097 * Lookup the last < and > in the current chunk
9098 */
9099static void
9100xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9101 const xmlChar **lastgt) {
9102 const xmlChar *tmp;
9103
9104 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9105 xmlGenericError(xmlGenericErrorContext,
9106 "Internal error: xmlParseGetLasts\n");
9107 return;
9108 }
9109 if ((ctxt->progressive == 1) && (ctxt->inputNr == 1)) {
9110 tmp = ctxt->input->end;
9111 tmp--;
9112 while ((tmp >= ctxt->input->base) && (*tmp != '<') &&
9113 (*tmp != '>')) tmp--;
9114 if (tmp < ctxt->input->base) {
9115 *lastlt = NULL;
9116 *lastgt = NULL;
9117 } else if (*tmp == '<') {
9118 *lastlt = tmp;
9119 tmp--;
9120 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9121 if (tmp < ctxt->input->base)
9122 *lastgt = NULL;
9123 else
9124 *lastgt = tmp;
9125 } else {
9126 *lastgt = tmp;
9127 tmp--;
9128 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
9129 if (tmp < ctxt->input->base)
9130 *lastlt = NULL;
9131 else
9132 *lastlt = tmp;
9133 }
9134
9135 } else {
9136 *lastlt = NULL;
9137 *lastgt = NULL;
9138 }
9139}
9140/**
Owen Taylor3473f882001-02-23 17:55:21 +00009141 * xmlParseTryOrFinish:
9142 * @ctxt: an XML parser context
9143 * @terminate: last chunk indicator
9144 *
9145 * Try to progress on parsing
9146 *
9147 * Returns zero if no parsing was possible
9148 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009149static int
Owen Taylor3473f882001-02-23 17:55:21 +00009150xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9151 int ret = 0;
9152 int avail;
9153 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009154 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009155
9156#ifdef DEBUG_PUSH
9157 switch (ctxt->instate) {
9158 case XML_PARSER_EOF:
9159 xmlGenericError(xmlGenericErrorContext,
9160 "PP: try EOF\n"); break;
9161 case XML_PARSER_START:
9162 xmlGenericError(xmlGenericErrorContext,
9163 "PP: try START\n"); break;
9164 case XML_PARSER_MISC:
9165 xmlGenericError(xmlGenericErrorContext,
9166 "PP: try MISC\n");break;
9167 case XML_PARSER_COMMENT:
9168 xmlGenericError(xmlGenericErrorContext,
9169 "PP: try COMMENT\n");break;
9170 case XML_PARSER_PROLOG:
9171 xmlGenericError(xmlGenericErrorContext,
9172 "PP: try PROLOG\n");break;
9173 case XML_PARSER_START_TAG:
9174 xmlGenericError(xmlGenericErrorContext,
9175 "PP: try START_TAG\n");break;
9176 case XML_PARSER_CONTENT:
9177 xmlGenericError(xmlGenericErrorContext,
9178 "PP: try CONTENT\n");break;
9179 case XML_PARSER_CDATA_SECTION:
9180 xmlGenericError(xmlGenericErrorContext,
9181 "PP: try CDATA_SECTION\n");break;
9182 case XML_PARSER_END_TAG:
9183 xmlGenericError(xmlGenericErrorContext,
9184 "PP: try END_TAG\n");break;
9185 case XML_PARSER_ENTITY_DECL:
9186 xmlGenericError(xmlGenericErrorContext,
9187 "PP: try ENTITY_DECL\n");break;
9188 case XML_PARSER_ENTITY_VALUE:
9189 xmlGenericError(xmlGenericErrorContext,
9190 "PP: try ENTITY_VALUE\n");break;
9191 case XML_PARSER_ATTRIBUTE_VALUE:
9192 xmlGenericError(xmlGenericErrorContext,
9193 "PP: try ATTRIBUTE_VALUE\n");break;
9194 case XML_PARSER_DTD:
9195 xmlGenericError(xmlGenericErrorContext,
9196 "PP: try DTD\n");break;
9197 case XML_PARSER_EPILOG:
9198 xmlGenericError(xmlGenericErrorContext,
9199 "PP: try EPILOG\n");break;
9200 case XML_PARSER_PI:
9201 xmlGenericError(xmlGenericErrorContext,
9202 "PP: try PI\n");break;
9203 case XML_PARSER_IGNORE:
9204 xmlGenericError(xmlGenericErrorContext,
9205 "PP: try IGNORE\n");break;
9206 }
9207#endif
9208
Daniel Veillarda880b122003-04-21 21:36:41 +00009209 if (ctxt->input->cur - ctxt->input->base > 4096) {
9210 xmlSHRINK(ctxt);
9211 ctxt->checkIndex = 0;
9212 }
9213 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009214
Daniel Veillarda880b122003-04-21 21:36:41 +00009215 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009216 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9217 return(0);
9218
9219
Owen Taylor3473f882001-02-23 17:55:21 +00009220 /*
9221 * Pop-up of finished entities.
9222 */
9223 while ((RAW == 0) && (ctxt->inputNr > 1))
9224 xmlPopInput(ctxt);
9225
9226 if (ctxt->input ==NULL) break;
9227 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009228 avail = ctxt->input->length -
9229 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009230 else {
9231 /*
9232 * If we are operating on converted input, try to flush
9233 * remainng chars to avoid them stalling in the non-converted
9234 * buffer.
9235 */
9236 if ((ctxt->input->buf->raw != NULL) &&
9237 (ctxt->input->buf->raw->use > 0)) {
9238 int base = ctxt->input->base -
9239 ctxt->input->buf->buffer->content;
9240 int current = ctxt->input->cur - ctxt->input->base;
9241
9242 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9243 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9244 ctxt->input->cur = ctxt->input->base + current;
9245 ctxt->input->end =
9246 &ctxt->input->buf->buffer->content[
9247 ctxt->input->buf->buffer->use];
9248 }
9249 avail = ctxt->input->buf->buffer->use -
9250 (ctxt->input->cur - ctxt->input->base);
9251 }
Owen Taylor3473f882001-02-23 17:55:21 +00009252 if (avail < 1)
9253 goto done;
9254 switch (ctxt->instate) {
9255 case XML_PARSER_EOF:
9256 /*
9257 * Document parsing is done !
9258 */
9259 goto done;
9260 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009261 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9262 xmlChar start[4];
9263 xmlCharEncoding enc;
9264
9265 /*
9266 * Very first chars read from the document flow.
9267 */
9268 if (avail < 4)
9269 goto done;
9270
9271 /*
9272 * Get the 4 first bytes and decode the charset
9273 * if enc != XML_CHAR_ENCODING_NONE
9274 * plug some encoding conversion routines.
9275 */
9276 start[0] = RAW;
9277 start[1] = NXT(1);
9278 start[2] = NXT(2);
9279 start[3] = NXT(3);
9280 enc = xmlDetectCharEncoding(start, 4);
9281 if (enc != XML_CHAR_ENCODING_NONE) {
9282 xmlSwitchEncoding(ctxt, enc);
9283 }
9284 break;
9285 }
Owen Taylor3473f882001-02-23 17:55:21 +00009286
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009287 if (avail < 2)
9288 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009289 cur = ctxt->input->cur[0];
9290 next = ctxt->input->cur[1];
9291 if (cur == 0) {
9292 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9293 ctxt->sax->setDocumentLocator(ctxt->userData,
9294 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009295 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009296 ctxt->instate = XML_PARSER_EOF;
9297#ifdef DEBUG_PUSH
9298 xmlGenericError(xmlGenericErrorContext,
9299 "PP: entering EOF\n");
9300#endif
9301 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9302 ctxt->sax->endDocument(ctxt->userData);
9303 goto done;
9304 }
9305 if ((cur == '<') && (next == '?')) {
9306 /* PI or XML decl */
9307 if (avail < 5) return(ret);
9308 if ((!terminate) &&
9309 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9310 return(ret);
9311 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9312 ctxt->sax->setDocumentLocator(ctxt->userData,
9313 &xmlDefaultSAXLocator);
9314 if ((ctxt->input->cur[2] == 'x') &&
9315 (ctxt->input->cur[3] == 'm') &&
9316 (ctxt->input->cur[4] == 'l') &&
9317 (IS_BLANK(ctxt->input->cur[5]))) {
9318 ret += 5;
9319#ifdef DEBUG_PUSH
9320 xmlGenericError(xmlGenericErrorContext,
9321 "PP: Parsing XML Decl\n");
9322#endif
9323 xmlParseXMLDecl(ctxt);
9324 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9325 /*
9326 * The XML REC instructs us to stop parsing right
9327 * here
9328 */
9329 ctxt->instate = XML_PARSER_EOF;
9330 return(0);
9331 }
9332 ctxt->standalone = ctxt->input->standalone;
9333 if ((ctxt->encoding == NULL) &&
9334 (ctxt->input->encoding != NULL))
9335 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9336 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9337 (!ctxt->disableSAX))
9338 ctxt->sax->startDocument(ctxt->userData);
9339 ctxt->instate = XML_PARSER_MISC;
9340#ifdef DEBUG_PUSH
9341 xmlGenericError(xmlGenericErrorContext,
9342 "PP: entering MISC\n");
9343#endif
9344 } else {
9345 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9346 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9347 (!ctxt->disableSAX))
9348 ctxt->sax->startDocument(ctxt->userData);
9349 ctxt->instate = XML_PARSER_MISC;
9350#ifdef DEBUG_PUSH
9351 xmlGenericError(xmlGenericErrorContext,
9352 "PP: entering MISC\n");
9353#endif
9354 }
9355 } else {
9356 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9357 ctxt->sax->setDocumentLocator(ctxt->userData,
9358 &xmlDefaultSAXLocator);
9359 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9360 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9361 (!ctxt->disableSAX))
9362 ctxt->sax->startDocument(ctxt->userData);
9363 ctxt->instate = XML_PARSER_MISC;
9364#ifdef DEBUG_PUSH
9365 xmlGenericError(xmlGenericErrorContext,
9366 "PP: entering MISC\n");
9367#endif
9368 }
9369 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009370 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009371 const xmlChar *name;
9372 const xmlChar *prefix;
9373 const xmlChar *URI;
9374 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009375
9376 if ((avail < 2) && (ctxt->inputNr == 1))
9377 goto done;
9378 cur = ctxt->input->cur[0];
9379 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009380 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009381 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009382 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9383 ctxt->sax->endDocument(ctxt->userData);
9384 goto done;
9385 }
9386 if (!terminate) {
9387 if (ctxt->progressive) {
9388 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9389 goto done;
9390 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9391 goto done;
9392 }
9393 }
9394 if (ctxt->spaceNr == 0)
9395 spacePush(ctxt, -1);
9396 else
9397 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009398#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009399 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009400#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009401 name = xmlParseStartTag2(ctxt, &prefix, &URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009402#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009403 else
9404 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009405#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009406 if (name == NULL) {
9407 spacePop(ctxt);
9408 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009409 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9410 ctxt->sax->endDocument(ctxt->userData);
9411 goto done;
9412 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009413#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009414 /*
9415 * [ VC: Root Element Type ]
9416 * The Name in the document type declaration must match
9417 * the element type of the root element.
9418 */
9419 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9420 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9421 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009422#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009423
9424 /*
9425 * Check for an Empty Element.
9426 */
9427 if ((RAW == '/') && (NXT(1) == '>')) {
9428 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009429
9430 if (ctxt->sax2) {
9431 if ((ctxt->sax != NULL) &&
9432 (ctxt->sax->endElementNs != NULL) &&
9433 (!ctxt->disableSAX))
9434 ctxt->sax->endElementNs(ctxt->userData, name,
9435 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009436#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009437 } else {
9438 if ((ctxt->sax != NULL) &&
9439 (ctxt->sax->endElement != NULL) &&
9440 (!ctxt->disableSAX))
9441 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009442#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009443 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009444 spacePop(ctxt);
9445 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009446 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009447 } else {
9448 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009449 }
9450 break;
9451 }
9452 if (RAW == '>') {
9453 NEXT;
9454 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009455 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009456 "Couldn't find end of Start Tag %s\n",
9457 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009458 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009459 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009460 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009461 if (ctxt->sax2)
9462 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009463#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009464 else
9465 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009466#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009467
Daniel Veillarda880b122003-04-21 21:36:41 +00009468 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009469 break;
9470 }
9471 case XML_PARSER_CONTENT: {
9472 const xmlChar *test;
9473 unsigned int cons;
9474 if ((avail < 2) && (ctxt->inputNr == 1))
9475 goto done;
9476 cur = ctxt->input->cur[0];
9477 next = ctxt->input->cur[1];
9478
9479 test = CUR_PTR;
9480 cons = ctxt->input->consumed;
9481 if ((cur == '<') && (next == '/')) {
9482 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009483 break;
9484 } else if ((cur == '<') && (next == '?')) {
9485 if ((!terminate) &&
9486 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9487 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009488 xmlParsePI(ctxt);
9489 } else if ((cur == '<') && (next != '!')) {
9490 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009491 break;
9492 } else if ((cur == '<') && (next == '!') &&
9493 (ctxt->input->cur[2] == '-') &&
9494 (ctxt->input->cur[3] == '-')) {
9495 if ((!terminate) &&
9496 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9497 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009498 xmlParseComment(ctxt);
9499 ctxt->instate = XML_PARSER_CONTENT;
9500 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9501 (ctxt->input->cur[2] == '[') &&
9502 (ctxt->input->cur[3] == 'C') &&
9503 (ctxt->input->cur[4] == 'D') &&
9504 (ctxt->input->cur[5] == 'A') &&
9505 (ctxt->input->cur[6] == 'T') &&
9506 (ctxt->input->cur[7] == 'A') &&
9507 (ctxt->input->cur[8] == '[')) {
9508 SKIP(9);
9509 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009510 break;
9511 } else if ((cur == '<') && (next == '!') &&
9512 (avail < 9)) {
9513 goto done;
9514 } else if (cur == '&') {
9515 if ((!terminate) &&
9516 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9517 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009518 xmlParseReference(ctxt);
9519 } else {
9520 /* TODO Avoid the extra copy, handle directly !!! */
9521 /*
9522 * Goal of the following test is:
9523 * - minimize calls to the SAX 'character' callback
9524 * when they are mergeable
9525 * - handle an problem for isBlank when we only parse
9526 * a sequence of blank chars and the next one is
9527 * not available to check against '<' presence.
9528 * - tries to homogenize the differences in SAX
9529 * callbacks between the push and pull versions
9530 * of the parser.
9531 */
9532 if ((ctxt->inputNr == 1) &&
9533 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9534 if (!terminate) {
9535 if (ctxt->progressive) {
9536 if ((lastlt == NULL) ||
9537 (ctxt->input->cur > lastlt))
9538 goto done;
9539 } else if (xmlParseLookupSequence(ctxt,
9540 '<', 0, 0) < 0) {
9541 goto done;
9542 }
9543 }
9544 }
9545 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009546 xmlParseCharData(ctxt, 0);
9547 }
9548 /*
9549 * Pop-up of finished entities.
9550 */
9551 while ((RAW == 0) && (ctxt->inputNr > 1))
9552 xmlPopInput(ctxt);
9553 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009554 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9555 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009556 ctxt->instate = XML_PARSER_EOF;
9557 break;
9558 }
9559 break;
9560 }
9561 case XML_PARSER_END_TAG:
9562 if (avail < 2)
9563 goto done;
9564 if (!terminate) {
9565 if (ctxt->progressive) {
9566 if ((lastgt == NULL) || (ctxt->input->cur > lastgt))
9567 goto done;
9568 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9569 goto done;
9570 }
9571 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009572 if (ctxt->sax2) {
9573 xmlParseEndTag2(ctxt,
9574 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9575 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
9576 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1]);
9577 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009578 }
9579#ifdef LIBXML_SAX1_ENABLED
9580 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009581 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009582#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009583 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009584 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009585 } else {
9586 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009587 }
9588 break;
9589 case XML_PARSER_CDATA_SECTION: {
9590 /*
9591 * The Push mode need to have the SAX callback for
9592 * cdataBlock merge back contiguous callbacks.
9593 */
9594 int base;
9595
9596 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9597 if (base < 0) {
9598 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9599 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9600 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009601 ctxt->sax->cdataBlock(ctxt->userData,
9602 ctxt->input->cur,
9603 XML_PARSER_BIG_BUFFER_SIZE);
9604 else if (ctxt->sax->characters != NULL)
9605 ctxt->sax->characters(ctxt->userData,
9606 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009607 XML_PARSER_BIG_BUFFER_SIZE);
9608 }
9609 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
9610 ctxt->checkIndex = 0;
9611 }
9612 goto done;
9613 } else {
9614 if ((ctxt->sax != NULL) && (base > 0) &&
9615 (!ctxt->disableSAX)) {
9616 if (ctxt->sax->cdataBlock != NULL)
9617 ctxt->sax->cdataBlock(ctxt->userData,
9618 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009619 else if (ctxt->sax->characters != NULL)
9620 ctxt->sax->characters(ctxt->userData,
9621 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009622 }
9623 SKIP(base + 3);
9624 ctxt->checkIndex = 0;
9625 ctxt->instate = XML_PARSER_CONTENT;
9626#ifdef DEBUG_PUSH
9627 xmlGenericError(xmlGenericErrorContext,
9628 "PP: entering CONTENT\n");
9629#endif
9630 }
9631 break;
9632 }
Owen Taylor3473f882001-02-23 17:55:21 +00009633 case XML_PARSER_MISC:
9634 SKIP_BLANKS;
9635 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009636 avail = ctxt->input->length -
9637 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009638 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009639 avail = ctxt->input->buf->buffer->use -
9640 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009641 if (avail < 2)
9642 goto done;
9643 cur = ctxt->input->cur[0];
9644 next = ctxt->input->cur[1];
9645 if ((cur == '<') && (next == '?')) {
9646 if ((!terminate) &&
9647 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9648 goto done;
9649#ifdef DEBUG_PUSH
9650 xmlGenericError(xmlGenericErrorContext,
9651 "PP: Parsing PI\n");
9652#endif
9653 xmlParsePI(ctxt);
9654 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009655 (ctxt->input->cur[2] == '-') &&
9656 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009657 if ((!terminate) &&
9658 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9659 goto done;
9660#ifdef DEBUG_PUSH
9661 xmlGenericError(xmlGenericErrorContext,
9662 "PP: Parsing Comment\n");
9663#endif
9664 xmlParseComment(ctxt);
9665 ctxt->instate = XML_PARSER_MISC;
9666 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009667 (ctxt->input->cur[2] == 'D') &&
9668 (ctxt->input->cur[3] == 'O') &&
9669 (ctxt->input->cur[4] == 'C') &&
9670 (ctxt->input->cur[5] == 'T') &&
9671 (ctxt->input->cur[6] == 'Y') &&
9672 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009673 (ctxt->input->cur[8] == 'E')) {
9674 if ((!terminate) &&
9675 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9676 goto done;
9677#ifdef DEBUG_PUSH
9678 xmlGenericError(xmlGenericErrorContext,
9679 "PP: Parsing internal subset\n");
9680#endif
9681 ctxt->inSubset = 1;
9682 xmlParseDocTypeDecl(ctxt);
9683 if (RAW == '[') {
9684 ctxt->instate = XML_PARSER_DTD;
9685#ifdef DEBUG_PUSH
9686 xmlGenericError(xmlGenericErrorContext,
9687 "PP: entering DTD\n");
9688#endif
9689 } else {
9690 /*
9691 * Create and update the external subset.
9692 */
9693 ctxt->inSubset = 2;
9694 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9695 (ctxt->sax->externalSubset != NULL))
9696 ctxt->sax->externalSubset(ctxt->userData,
9697 ctxt->intSubName, ctxt->extSubSystem,
9698 ctxt->extSubURI);
9699 ctxt->inSubset = 0;
9700 ctxt->instate = XML_PARSER_PROLOG;
9701#ifdef DEBUG_PUSH
9702 xmlGenericError(xmlGenericErrorContext,
9703 "PP: entering PROLOG\n");
9704#endif
9705 }
9706 } else if ((cur == '<') && (next == '!') &&
9707 (avail < 9)) {
9708 goto done;
9709 } else {
9710 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009711 ctxt->progressive = 1;
9712 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009713#ifdef DEBUG_PUSH
9714 xmlGenericError(xmlGenericErrorContext,
9715 "PP: entering START_TAG\n");
9716#endif
9717 }
9718 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009719 case XML_PARSER_PROLOG:
9720 SKIP_BLANKS;
9721 if (ctxt->input->buf == NULL)
9722 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9723 else
9724 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9725 if (avail < 2)
9726 goto done;
9727 cur = ctxt->input->cur[0];
9728 next = ctxt->input->cur[1];
9729 if ((cur == '<') && (next == '?')) {
9730 if ((!terminate) &&
9731 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9732 goto done;
9733#ifdef DEBUG_PUSH
9734 xmlGenericError(xmlGenericErrorContext,
9735 "PP: Parsing PI\n");
9736#endif
9737 xmlParsePI(ctxt);
9738 } else if ((cur == '<') && (next == '!') &&
9739 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9740 if ((!terminate) &&
9741 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9742 goto done;
9743#ifdef DEBUG_PUSH
9744 xmlGenericError(xmlGenericErrorContext,
9745 "PP: Parsing Comment\n");
9746#endif
9747 xmlParseComment(ctxt);
9748 ctxt->instate = XML_PARSER_PROLOG;
9749 } else if ((cur == '<') && (next == '!') &&
9750 (avail < 4)) {
9751 goto done;
9752 } else {
9753 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009754 ctxt->progressive = 1;
9755 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009756#ifdef DEBUG_PUSH
9757 xmlGenericError(xmlGenericErrorContext,
9758 "PP: entering START_TAG\n");
9759#endif
9760 }
9761 break;
9762 case XML_PARSER_EPILOG:
9763 SKIP_BLANKS;
9764 if (ctxt->input->buf == NULL)
9765 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9766 else
9767 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9768 if (avail < 2)
9769 goto done;
9770 cur = ctxt->input->cur[0];
9771 next = ctxt->input->cur[1];
9772 if ((cur == '<') && (next == '?')) {
9773 if ((!terminate) &&
9774 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9775 goto done;
9776#ifdef DEBUG_PUSH
9777 xmlGenericError(xmlGenericErrorContext,
9778 "PP: Parsing PI\n");
9779#endif
9780 xmlParsePI(ctxt);
9781 ctxt->instate = XML_PARSER_EPILOG;
9782 } else if ((cur == '<') && (next == '!') &&
9783 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9784 if ((!terminate) &&
9785 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9786 goto done;
9787#ifdef DEBUG_PUSH
9788 xmlGenericError(xmlGenericErrorContext,
9789 "PP: Parsing Comment\n");
9790#endif
9791 xmlParseComment(ctxt);
9792 ctxt->instate = XML_PARSER_EPILOG;
9793 } else if ((cur == '<') && (next == '!') &&
9794 (avail < 4)) {
9795 goto done;
9796 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009797 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009798 ctxt->instate = XML_PARSER_EOF;
9799#ifdef DEBUG_PUSH
9800 xmlGenericError(xmlGenericErrorContext,
9801 "PP: entering EOF\n");
9802#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009803 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009804 ctxt->sax->endDocument(ctxt->userData);
9805 goto done;
9806 }
9807 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009808 case XML_PARSER_DTD: {
9809 /*
9810 * Sorry but progressive parsing of the internal subset
9811 * is not expected to be supported. We first check that
9812 * the full content of the internal subset is available and
9813 * the parsing is launched only at that point.
9814 * Internal subset ends up with "']' S? '>'" in an unescaped
9815 * section and not in a ']]>' sequence which are conditional
9816 * sections (whoever argued to keep that crap in XML deserve
9817 * a place in hell !).
9818 */
9819 int base, i;
9820 xmlChar *buf;
9821 xmlChar quote = 0;
9822
9823 base = ctxt->input->cur - ctxt->input->base;
9824 if (base < 0) return(0);
9825 if (ctxt->checkIndex > base)
9826 base = ctxt->checkIndex;
9827 buf = ctxt->input->buf->buffer->content;
9828 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9829 base++) {
9830 if (quote != 0) {
9831 if (buf[base] == quote)
9832 quote = 0;
9833 continue;
9834 }
9835 if (buf[base] == '"') {
9836 quote = '"';
9837 continue;
9838 }
9839 if (buf[base] == '\'') {
9840 quote = '\'';
9841 continue;
9842 }
9843 if (buf[base] == ']') {
9844 if ((unsigned int) base +1 >=
9845 ctxt->input->buf->buffer->use)
9846 break;
9847 if (buf[base + 1] == ']') {
9848 /* conditional crap, skip both ']' ! */
9849 base++;
9850 continue;
9851 }
9852 for (i = 0;
9853 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9854 i++) {
9855 if (buf[base + i] == '>')
9856 goto found_end_int_subset;
9857 }
9858 break;
9859 }
9860 }
9861 /*
9862 * We didn't found the end of the Internal subset
9863 */
9864 if (quote == 0)
9865 ctxt->checkIndex = base;
9866#ifdef DEBUG_PUSH
9867 if (next == 0)
9868 xmlGenericError(xmlGenericErrorContext,
9869 "PP: lookup of int subset end filed\n");
9870#endif
9871 goto done;
9872
9873found_end_int_subset:
9874 xmlParseInternalSubset(ctxt);
9875 ctxt->inSubset = 2;
9876 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9877 (ctxt->sax->externalSubset != NULL))
9878 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9879 ctxt->extSubSystem, ctxt->extSubURI);
9880 ctxt->inSubset = 0;
9881 ctxt->instate = XML_PARSER_PROLOG;
9882 ctxt->checkIndex = 0;
9883#ifdef DEBUG_PUSH
9884 xmlGenericError(xmlGenericErrorContext,
9885 "PP: entering PROLOG\n");
9886#endif
9887 break;
9888 }
9889 case XML_PARSER_COMMENT:
9890 xmlGenericError(xmlGenericErrorContext,
9891 "PP: internal error, state == COMMENT\n");
9892 ctxt->instate = XML_PARSER_CONTENT;
9893#ifdef DEBUG_PUSH
9894 xmlGenericError(xmlGenericErrorContext,
9895 "PP: entering CONTENT\n");
9896#endif
9897 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009898 case XML_PARSER_IGNORE:
9899 xmlGenericError(xmlGenericErrorContext,
9900 "PP: internal error, state == IGNORE");
9901 ctxt->instate = XML_PARSER_DTD;
9902#ifdef DEBUG_PUSH
9903 xmlGenericError(xmlGenericErrorContext,
9904 "PP: entering DTD\n");
9905#endif
9906 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009907 case XML_PARSER_PI:
9908 xmlGenericError(xmlGenericErrorContext,
9909 "PP: internal error, state == PI\n");
9910 ctxt->instate = XML_PARSER_CONTENT;
9911#ifdef DEBUG_PUSH
9912 xmlGenericError(xmlGenericErrorContext,
9913 "PP: entering CONTENT\n");
9914#endif
9915 break;
9916 case XML_PARSER_ENTITY_DECL:
9917 xmlGenericError(xmlGenericErrorContext,
9918 "PP: internal error, state == ENTITY_DECL\n");
9919 ctxt->instate = XML_PARSER_DTD;
9920#ifdef DEBUG_PUSH
9921 xmlGenericError(xmlGenericErrorContext,
9922 "PP: entering DTD\n");
9923#endif
9924 break;
9925 case XML_PARSER_ENTITY_VALUE:
9926 xmlGenericError(xmlGenericErrorContext,
9927 "PP: internal error, state == ENTITY_VALUE\n");
9928 ctxt->instate = XML_PARSER_CONTENT;
9929#ifdef DEBUG_PUSH
9930 xmlGenericError(xmlGenericErrorContext,
9931 "PP: entering DTD\n");
9932#endif
9933 break;
9934 case XML_PARSER_ATTRIBUTE_VALUE:
9935 xmlGenericError(xmlGenericErrorContext,
9936 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9937 ctxt->instate = XML_PARSER_START_TAG;
9938#ifdef DEBUG_PUSH
9939 xmlGenericError(xmlGenericErrorContext,
9940 "PP: entering START_TAG\n");
9941#endif
9942 break;
9943 case XML_PARSER_SYSTEM_LITERAL:
9944 xmlGenericError(xmlGenericErrorContext,
9945 "PP: internal error, state == SYSTEM_LITERAL\n");
9946 ctxt->instate = XML_PARSER_START_TAG;
9947#ifdef DEBUG_PUSH
9948 xmlGenericError(xmlGenericErrorContext,
9949 "PP: entering START_TAG\n");
9950#endif
9951 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009952 case XML_PARSER_PUBLIC_LITERAL:
9953 xmlGenericError(xmlGenericErrorContext,
9954 "PP: internal error, state == PUBLIC_LITERAL\n");
9955 ctxt->instate = XML_PARSER_START_TAG;
9956#ifdef DEBUG_PUSH
9957 xmlGenericError(xmlGenericErrorContext,
9958 "PP: entering START_TAG\n");
9959#endif
9960 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009961 }
9962 }
9963done:
9964#ifdef DEBUG_PUSH
9965 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9966#endif
9967 return(ret);
9968}
9969
9970/**
Owen Taylor3473f882001-02-23 17:55:21 +00009971 * xmlParseChunk:
9972 * @ctxt: an XML parser context
9973 * @chunk: an char array
9974 * @size: the size in byte of the chunk
9975 * @terminate: last chunk indicator
9976 *
9977 * Parse a Chunk of memory
9978 *
9979 * Returns zero if no error, the xmlParserErrors otherwise.
9980 */
9981int
9982xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9983 int terminate) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009984 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9985 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009986 if (ctxt->instate == XML_PARSER_START)
9987 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009988 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9989 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9990 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9991 int cur = ctxt->input->cur - ctxt->input->base;
9992
9993 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9994 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9995 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009996 ctxt->input->end =
9997 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009998#ifdef DEBUG_PUSH
9999 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10000#endif
10001
Owen Taylor3473f882001-02-23 17:55:21 +000010002 } else if (ctxt->instate != XML_PARSER_EOF) {
10003 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10004 xmlParserInputBufferPtr in = ctxt->input->buf;
10005 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10006 (in->raw != NULL)) {
10007 int nbchars;
10008
10009 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10010 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010011 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010012 xmlGenericError(xmlGenericErrorContext,
10013 "xmlParseChunk: encoder error\n");
10014 return(XML_ERR_INVALID_ENCODING);
10015 }
10016 }
10017 }
10018 }
10019 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010020 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
10021 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010022 if (terminate) {
10023 /*
10024 * Check for termination
10025 */
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010026 int avail = 0;
10027 if (ctxt->input->buf == NULL)
10028 avail = ctxt->input->length -
10029 (ctxt->input->cur - ctxt->input->base);
10030 else
10031 avail = ctxt->input->buf->buffer->use -
10032 (ctxt->input->cur - ctxt->input->base);
10033
Owen Taylor3473f882001-02-23 17:55:21 +000010034 if ((ctxt->instate != XML_PARSER_EOF) &&
10035 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010036 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010037 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010038 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010039 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010040 }
Owen Taylor3473f882001-02-23 17:55:21 +000010041 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010042 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010043 ctxt->sax->endDocument(ctxt->userData);
10044 }
10045 ctxt->instate = XML_PARSER_EOF;
10046 }
10047 return((xmlParserErrors) ctxt->errNo);
10048}
10049
10050/************************************************************************
10051 * *
10052 * I/O front end functions to the parser *
10053 * *
10054 ************************************************************************/
10055
10056/**
10057 * xmlStopParser:
10058 * @ctxt: an XML parser context
10059 *
10060 * Blocks further parser processing
10061 */
10062void
10063xmlStopParser(xmlParserCtxtPtr ctxt) {
10064 ctxt->instate = XML_PARSER_EOF;
10065 if (ctxt->input != NULL)
10066 ctxt->input->cur = BAD_CAST"";
10067}
10068
10069/**
10070 * xmlCreatePushParserCtxt:
10071 * @sax: a SAX handler
10072 * @user_data: The user data returned on SAX callbacks
10073 * @chunk: a pointer to an array of chars
10074 * @size: number of chars in the array
10075 * @filename: an optional file name or URI
10076 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010077 * Create a parser context for using the XML parser in push mode.
10078 * If @buffer and @size are non-NULL, the data is used to detect
10079 * the encoding. The remaining characters will be parsed so they
10080 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010081 * To allow content encoding detection, @size should be >= 4
10082 * The value of @filename is used for fetching external entities
10083 * and error/warning reports.
10084 *
10085 * Returns the new parser context or NULL
10086 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010087
Owen Taylor3473f882001-02-23 17:55:21 +000010088xmlParserCtxtPtr
10089xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10090 const char *chunk, int size, const char *filename) {
10091 xmlParserCtxtPtr ctxt;
10092 xmlParserInputPtr inputStream;
10093 xmlParserInputBufferPtr buf;
10094 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10095
10096 /*
10097 * plug some encoding conversion routines
10098 */
10099 if ((chunk != NULL) && (size >= 4))
10100 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10101
10102 buf = xmlAllocParserInputBuffer(enc);
10103 if (buf == NULL) return(NULL);
10104
10105 ctxt = xmlNewParserCtxt();
10106 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010107 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010108 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010109 return(NULL);
10110 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010111 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10112 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010113 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010114 xmlFreeParserInputBuffer(buf);
10115 xmlFreeParserCtxt(ctxt);
10116 return(NULL);
10117 }
Owen Taylor3473f882001-02-23 17:55:21 +000010118 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010119#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010120 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010121#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010122 xmlFree(ctxt->sax);
10123 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10124 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010125 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010126 xmlFreeParserInputBuffer(buf);
10127 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010128 return(NULL);
10129 }
10130 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10131 if (user_data != NULL)
10132 ctxt->userData = user_data;
10133 }
10134 if (filename == NULL) {
10135 ctxt->directory = NULL;
10136 } else {
10137 ctxt->directory = xmlParserGetDirectory(filename);
10138 }
10139
10140 inputStream = xmlNewInputStream(ctxt);
10141 if (inputStream == NULL) {
10142 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010143 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010144 return(NULL);
10145 }
10146
10147 if (filename == NULL)
10148 inputStream->filename = NULL;
10149 else
Daniel Veillardf4862f02002-09-10 11:13:43 +000010150 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010151 xmlCanonicPath((const xmlChar *) filename);
Owen Taylor3473f882001-02-23 17:55:21 +000010152 inputStream->buf = buf;
10153 inputStream->base = inputStream->buf->buffer->content;
10154 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010155 inputStream->end =
10156 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010157
10158 inputPush(ctxt, inputStream);
10159
10160 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10161 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010162 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10163 int cur = ctxt->input->cur - ctxt->input->base;
10164
Owen Taylor3473f882001-02-23 17:55:21 +000010165 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010166
10167 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10168 ctxt->input->cur = ctxt->input->base + cur;
10169 ctxt->input->end =
10170 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010171#ifdef DEBUG_PUSH
10172 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10173#endif
10174 }
10175
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010176 if (enc != XML_CHAR_ENCODING_NONE) {
10177 xmlSwitchEncoding(ctxt, enc);
10178 }
10179
Owen Taylor3473f882001-02-23 17:55:21 +000010180 return(ctxt);
10181}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010182#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010183
10184/**
10185 * xmlCreateIOParserCtxt:
10186 * @sax: a SAX handler
10187 * @user_data: The user data returned on SAX callbacks
10188 * @ioread: an I/O read function
10189 * @ioclose: an I/O close function
10190 * @ioctx: an I/O handler
10191 * @enc: the charset encoding if known
10192 *
10193 * Create a parser context for using the XML parser with an existing
10194 * I/O stream
10195 *
10196 * Returns the new parser context or NULL
10197 */
10198xmlParserCtxtPtr
10199xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10200 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10201 void *ioctx, xmlCharEncoding enc) {
10202 xmlParserCtxtPtr ctxt;
10203 xmlParserInputPtr inputStream;
10204 xmlParserInputBufferPtr buf;
10205
10206 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10207 if (buf == NULL) return(NULL);
10208
10209 ctxt = xmlNewParserCtxt();
10210 if (ctxt == NULL) {
10211 xmlFree(buf);
10212 return(NULL);
10213 }
10214 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010215#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010216 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010217#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010218 xmlFree(ctxt->sax);
10219 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10220 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010221 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010222 xmlFree(ctxt);
10223 return(NULL);
10224 }
10225 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10226 if (user_data != NULL)
10227 ctxt->userData = user_data;
10228 }
10229
10230 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10231 if (inputStream == NULL) {
10232 xmlFreeParserCtxt(ctxt);
10233 return(NULL);
10234 }
10235 inputPush(ctxt, inputStream);
10236
10237 return(ctxt);
10238}
10239
Daniel Veillard4432df22003-09-28 18:58:27 +000010240#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010241/************************************************************************
10242 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010243 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010244 * *
10245 ************************************************************************/
10246
10247/**
10248 * xmlIOParseDTD:
10249 * @sax: the SAX handler block or NULL
10250 * @input: an Input Buffer
10251 * @enc: the charset encoding if known
10252 *
10253 * Load and parse a DTD
10254 *
10255 * Returns the resulting xmlDtdPtr or NULL in case of error.
10256 * @input will be freed at parsing end.
10257 */
10258
10259xmlDtdPtr
10260xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10261 xmlCharEncoding enc) {
10262 xmlDtdPtr ret = NULL;
10263 xmlParserCtxtPtr ctxt;
10264 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010265 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010266
10267 if (input == NULL)
10268 return(NULL);
10269
10270 ctxt = xmlNewParserCtxt();
10271 if (ctxt == NULL) {
10272 return(NULL);
10273 }
10274
10275 /*
10276 * Set-up the SAX context
10277 */
10278 if (sax != NULL) {
10279 if (ctxt->sax != NULL)
10280 xmlFree(ctxt->sax);
10281 ctxt->sax = sax;
10282 ctxt->userData = NULL;
10283 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010284 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010285
10286 /*
10287 * generate a parser input from the I/O handler
10288 */
10289
10290 pinput = xmlNewIOInputStream(ctxt, input, enc);
10291 if (pinput == NULL) {
10292 if (sax != NULL) ctxt->sax = NULL;
10293 xmlFreeParserCtxt(ctxt);
10294 return(NULL);
10295 }
10296
10297 /*
10298 * plug some encoding conversion routines here.
10299 */
10300 xmlPushInput(ctxt, pinput);
10301
10302 pinput->filename = NULL;
10303 pinput->line = 1;
10304 pinput->col = 1;
10305 pinput->base = ctxt->input->cur;
10306 pinput->cur = ctxt->input->cur;
10307 pinput->free = NULL;
10308
10309 /*
10310 * let's parse that entity knowing it's an external subset.
10311 */
10312 ctxt->inSubset = 2;
10313 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10314 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10315 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010316
10317 if (enc == XML_CHAR_ENCODING_NONE) {
10318 /*
10319 * Get the 4 first bytes and decode the charset
10320 * if enc != XML_CHAR_ENCODING_NONE
10321 * plug some encoding conversion routines.
10322 */
10323 start[0] = RAW;
10324 start[1] = NXT(1);
10325 start[2] = NXT(2);
10326 start[3] = NXT(3);
10327 enc = xmlDetectCharEncoding(start, 4);
10328 if (enc != XML_CHAR_ENCODING_NONE) {
10329 xmlSwitchEncoding(ctxt, enc);
10330 }
10331 }
10332
Owen Taylor3473f882001-02-23 17:55:21 +000010333 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10334
10335 if (ctxt->myDoc != NULL) {
10336 if (ctxt->wellFormed) {
10337 ret = ctxt->myDoc->extSubset;
10338 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010339 if (ret != NULL) {
10340 xmlNodePtr tmp;
10341
10342 ret->doc = NULL;
10343 tmp = ret->children;
10344 while (tmp != NULL) {
10345 tmp->doc = NULL;
10346 tmp = tmp->next;
10347 }
10348 }
Owen Taylor3473f882001-02-23 17:55:21 +000010349 } else {
10350 ret = NULL;
10351 }
10352 xmlFreeDoc(ctxt->myDoc);
10353 ctxt->myDoc = NULL;
10354 }
10355 if (sax != NULL) ctxt->sax = NULL;
10356 xmlFreeParserCtxt(ctxt);
10357
10358 return(ret);
10359}
10360
10361/**
10362 * xmlSAXParseDTD:
10363 * @sax: the SAX handler block
10364 * @ExternalID: a NAME* containing the External ID of the DTD
10365 * @SystemID: a NAME* containing the URL to the DTD
10366 *
10367 * Load and parse an external subset.
10368 *
10369 * Returns the resulting xmlDtdPtr or NULL in case of error.
10370 */
10371
10372xmlDtdPtr
10373xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10374 const xmlChar *SystemID) {
10375 xmlDtdPtr ret = NULL;
10376 xmlParserCtxtPtr ctxt;
10377 xmlParserInputPtr input = NULL;
10378 xmlCharEncoding enc;
10379
10380 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10381
10382 ctxt = xmlNewParserCtxt();
10383 if (ctxt == NULL) {
10384 return(NULL);
10385 }
10386
10387 /*
10388 * Set-up the SAX context
10389 */
10390 if (sax != NULL) {
10391 if (ctxt->sax != NULL)
10392 xmlFree(ctxt->sax);
10393 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010394 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010395 }
10396
10397 /*
10398 * Ask the Entity resolver to load the damn thing
10399 */
10400
10401 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillardc6abc3d2003-04-26 13:27:30 +000010402 input = ctxt->sax->resolveEntity(ctxt, ExternalID, SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010403 if (input == NULL) {
10404 if (sax != NULL) ctxt->sax = NULL;
10405 xmlFreeParserCtxt(ctxt);
10406 return(NULL);
10407 }
10408
10409 /*
10410 * plug some encoding conversion routines here.
10411 */
10412 xmlPushInput(ctxt, input);
10413 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10414 xmlSwitchEncoding(ctxt, enc);
10415
10416 if (input->filename == NULL)
Daniel Veillard85095e22003-04-23 13:56:44 +000010417 input->filename = (char *) xmlCanonicPath(SystemID);
Owen Taylor3473f882001-02-23 17:55:21 +000010418 input->line = 1;
10419 input->col = 1;
10420 input->base = ctxt->input->cur;
10421 input->cur = ctxt->input->cur;
10422 input->free = NULL;
10423
10424 /*
10425 * let's parse that entity knowing it's an external subset.
10426 */
10427 ctxt->inSubset = 2;
10428 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10429 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10430 ExternalID, SystemID);
10431 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10432
10433 if (ctxt->myDoc != NULL) {
10434 if (ctxt->wellFormed) {
10435 ret = ctxt->myDoc->extSubset;
10436 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010437 if (ret != NULL) {
10438 xmlNodePtr tmp;
10439
10440 ret->doc = NULL;
10441 tmp = ret->children;
10442 while (tmp != NULL) {
10443 tmp->doc = NULL;
10444 tmp = tmp->next;
10445 }
10446 }
Owen Taylor3473f882001-02-23 17:55:21 +000010447 } else {
10448 ret = NULL;
10449 }
10450 xmlFreeDoc(ctxt->myDoc);
10451 ctxt->myDoc = NULL;
10452 }
10453 if (sax != NULL) ctxt->sax = NULL;
10454 xmlFreeParserCtxt(ctxt);
10455
10456 return(ret);
10457}
10458
Daniel Veillard4432df22003-09-28 18:58:27 +000010459
Owen Taylor3473f882001-02-23 17:55:21 +000010460/**
10461 * xmlParseDTD:
10462 * @ExternalID: a NAME* containing the External ID of the DTD
10463 * @SystemID: a NAME* containing the URL to the DTD
10464 *
10465 * Load and parse an external subset.
10466 *
10467 * Returns the resulting xmlDtdPtr or NULL in case of error.
10468 */
10469
10470xmlDtdPtr
10471xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10472 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10473}
Daniel Veillard4432df22003-09-28 18:58:27 +000010474#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010475
10476/************************************************************************
10477 * *
10478 * Front ends when parsing an Entity *
10479 * *
10480 ************************************************************************/
10481
10482/**
Owen Taylor3473f882001-02-23 17:55:21 +000010483 * xmlParseCtxtExternalEntity:
10484 * @ctx: the existing parsing context
10485 * @URL: the URL for the entity to load
10486 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010487 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010488 *
10489 * Parse an external general entity within an existing parsing context
10490 * An external general parsed entity is well-formed if it matches the
10491 * production labeled extParsedEnt.
10492 *
10493 * [78] extParsedEnt ::= TextDecl? content
10494 *
10495 * Returns 0 if the entity is well formed, -1 in case of args problem and
10496 * the parser error code otherwise
10497 */
10498
10499int
10500xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010501 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010502 xmlParserCtxtPtr ctxt;
10503 xmlDocPtr newDoc;
10504 xmlSAXHandlerPtr oldsax = NULL;
10505 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010506 xmlChar start[4];
10507 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010508
10509 if (ctx->depth > 40) {
10510 return(XML_ERR_ENTITY_LOOP);
10511 }
10512
Daniel Veillardcda96922001-08-21 10:56:31 +000010513 if (lst != NULL)
10514 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010515 if ((URL == NULL) && (ID == NULL))
10516 return(-1);
10517 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10518 return(-1);
10519
10520
10521 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10522 if (ctxt == NULL) return(-1);
10523 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010524 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010525 oldsax = ctxt->sax;
10526 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010527 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010528 newDoc = xmlNewDoc(BAD_CAST "1.0");
10529 if (newDoc == NULL) {
10530 xmlFreeParserCtxt(ctxt);
10531 return(-1);
10532 }
10533 if (ctx->myDoc != NULL) {
10534 newDoc->intSubset = ctx->myDoc->intSubset;
10535 newDoc->extSubset = ctx->myDoc->extSubset;
10536 }
10537 if (ctx->myDoc->URL != NULL) {
10538 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10539 }
10540 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10541 if (newDoc->children == NULL) {
10542 ctxt->sax = oldsax;
10543 xmlFreeParserCtxt(ctxt);
10544 newDoc->intSubset = NULL;
10545 newDoc->extSubset = NULL;
10546 xmlFreeDoc(newDoc);
10547 return(-1);
10548 }
10549 nodePush(ctxt, newDoc->children);
10550 if (ctx->myDoc == NULL) {
10551 ctxt->myDoc = newDoc;
10552 } else {
10553 ctxt->myDoc = ctx->myDoc;
10554 newDoc->children->doc = ctx->myDoc;
10555 }
10556
Daniel Veillard87a764e2001-06-20 17:41:10 +000010557 /*
10558 * Get the 4 first bytes and decode the charset
10559 * if enc != XML_CHAR_ENCODING_NONE
10560 * plug some encoding conversion routines.
10561 */
10562 GROW
10563 start[0] = RAW;
10564 start[1] = NXT(1);
10565 start[2] = NXT(2);
10566 start[3] = NXT(3);
10567 enc = xmlDetectCharEncoding(start, 4);
10568 if (enc != XML_CHAR_ENCODING_NONE) {
10569 xmlSwitchEncoding(ctxt, enc);
10570 }
10571
Owen Taylor3473f882001-02-23 17:55:21 +000010572 /*
10573 * Parse a possible text declaration first
10574 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010575 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010576 xmlParseTextDecl(ctxt);
10577 }
10578
10579 /*
10580 * Doing validity checking on chunk doesn't make sense
10581 */
10582 ctxt->instate = XML_PARSER_CONTENT;
10583 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010584 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010585 ctxt->loadsubset = ctx->loadsubset;
10586 ctxt->depth = ctx->depth + 1;
10587 ctxt->replaceEntities = ctx->replaceEntities;
10588 if (ctxt->validate) {
10589 ctxt->vctxt.error = ctx->vctxt.error;
10590 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010591 } else {
10592 ctxt->vctxt.error = NULL;
10593 ctxt->vctxt.warning = NULL;
10594 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010595 ctxt->vctxt.nodeTab = NULL;
10596 ctxt->vctxt.nodeNr = 0;
10597 ctxt->vctxt.nodeMax = 0;
10598 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010599
10600 xmlParseContent(ctxt);
10601
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010602 ctx->validate = ctxt->validate;
10603 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010604 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010605 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010606 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010607 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010608 }
10609 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010610 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010611 }
10612
10613 if (!ctxt->wellFormed) {
10614 if (ctxt->errNo == 0)
10615 ret = 1;
10616 else
10617 ret = ctxt->errNo;
10618 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010619 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010620 xmlNodePtr cur;
10621
10622 /*
10623 * Return the newly created nodeset after unlinking it from
10624 * they pseudo parent.
10625 */
10626 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010627 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010628 while (cur != NULL) {
10629 cur->parent = NULL;
10630 cur = cur->next;
10631 }
10632 newDoc->children->children = NULL;
10633 }
10634 ret = 0;
10635 }
10636 ctxt->sax = oldsax;
10637 xmlFreeParserCtxt(ctxt);
10638 newDoc->intSubset = NULL;
10639 newDoc->extSubset = NULL;
10640 xmlFreeDoc(newDoc);
10641
10642 return(ret);
10643}
10644
10645/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010646 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010647 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010648 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010649 * @sax: the SAX handler bloc (possibly NULL)
10650 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10651 * @depth: Used for loop detection, use 0
10652 * @URL: the URL for the entity to load
10653 * @ID: the System ID for the entity to load
10654 * @list: the return value for the set of parsed nodes
10655 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010656 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010657 *
10658 * Returns 0 if the entity is well formed, -1 in case of args problem and
10659 * the parser error code otherwise
10660 */
10661
Daniel Veillard7d515752003-09-26 19:12:37 +000010662static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010663xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10664 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010665 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010666 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010667 xmlParserCtxtPtr ctxt;
10668 xmlDocPtr newDoc;
10669 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010670 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010671 xmlChar start[4];
10672 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010673
10674 if (depth > 40) {
10675 return(XML_ERR_ENTITY_LOOP);
10676 }
10677
10678
10679
10680 if (list != NULL)
10681 *list = NULL;
10682 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010683 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010684 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010685 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010686
10687
10688 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010689 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010690 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010691 if (oldctxt != NULL) {
10692 ctxt->_private = oldctxt->_private;
10693 ctxt->loadsubset = oldctxt->loadsubset;
10694 ctxt->validate = oldctxt->validate;
10695 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010696 ctxt->record_info = oldctxt->record_info;
10697 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10698 ctxt->node_seq.length = oldctxt->node_seq.length;
10699 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010700 } else {
10701 /*
10702 * Doing validity checking on chunk without context
10703 * doesn't make sense
10704 */
10705 ctxt->_private = NULL;
10706 ctxt->validate = 0;
10707 ctxt->external = 2;
10708 ctxt->loadsubset = 0;
10709 }
Owen Taylor3473f882001-02-23 17:55:21 +000010710 if (sax != NULL) {
10711 oldsax = ctxt->sax;
10712 ctxt->sax = sax;
10713 if (user_data != NULL)
10714 ctxt->userData = user_data;
10715 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010716 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010717 newDoc = xmlNewDoc(BAD_CAST "1.0");
10718 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010719 ctxt->node_seq.maximum = 0;
10720 ctxt->node_seq.length = 0;
10721 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010722 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010723 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010724 }
10725 if (doc != NULL) {
10726 newDoc->intSubset = doc->intSubset;
10727 newDoc->extSubset = doc->extSubset;
10728 }
10729 if (doc->URL != NULL) {
10730 newDoc->URL = xmlStrdup(doc->URL);
10731 }
10732 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10733 if (newDoc->children == NULL) {
10734 if (sax != NULL)
10735 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010736 ctxt->node_seq.maximum = 0;
10737 ctxt->node_seq.length = 0;
10738 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010739 xmlFreeParserCtxt(ctxt);
10740 newDoc->intSubset = NULL;
10741 newDoc->extSubset = NULL;
10742 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010743 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010744 }
10745 nodePush(ctxt, newDoc->children);
10746 if (doc == NULL) {
10747 ctxt->myDoc = newDoc;
10748 } else {
10749 ctxt->myDoc = doc;
10750 newDoc->children->doc = doc;
10751 }
10752
Daniel Veillard87a764e2001-06-20 17:41:10 +000010753 /*
10754 * Get the 4 first bytes and decode the charset
10755 * if enc != XML_CHAR_ENCODING_NONE
10756 * plug some encoding conversion routines.
10757 */
10758 GROW;
10759 start[0] = RAW;
10760 start[1] = NXT(1);
10761 start[2] = NXT(2);
10762 start[3] = NXT(3);
10763 enc = xmlDetectCharEncoding(start, 4);
10764 if (enc != XML_CHAR_ENCODING_NONE) {
10765 xmlSwitchEncoding(ctxt, enc);
10766 }
10767
Owen Taylor3473f882001-02-23 17:55:21 +000010768 /*
10769 * Parse a possible text declaration first
10770 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010771 if ((memcmp(CUR_PTR, "<?xml", 5) == 0) && (IS_BLANK(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010772 xmlParseTextDecl(ctxt);
10773 }
10774
Owen Taylor3473f882001-02-23 17:55:21 +000010775 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010776 ctxt->depth = depth;
10777
10778 xmlParseContent(ctxt);
10779
Daniel Veillard561b7f82002-03-20 21:55:57 +000010780 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010781 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010782 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010783 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010784 }
10785 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010786 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010787 }
10788
10789 if (!ctxt->wellFormed) {
10790 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010791 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010792 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010793 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010794 } else {
10795 if (list != NULL) {
10796 xmlNodePtr cur;
10797
10798 /*
10799 * Return the newly created nodeset after unlinking it from
10800 * they pseudo parent.
10801 */
10802 cur = newDoc->children->children;
10803 *list = cur;
10804 while (cur != NULL) {
10805 cur->parent = NULL;
10806 cur = cur->next;
10807 }
10808 newDoc->children->children = NULL;
10809 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010810 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010811 }
10812 if (sax != NULL)
10813 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010814 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10815 oldctxt->node_seq.length = ctxt->node_seq.length;
10816 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010817 ctxt->node_seq.maximum = 0;
10818 ctxt->node_seq.length = 0;
10819 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010820 xmlFreeParserCtxt(ctxt);
10821 newDoc->intSubset = NULL;
10822 newDoc->extSubset = NULL;
10823 xmlFreeDoc(newDoc);
10824
10825 return(ret);
10826}
10827
Daniel Veillard81273902003-09-30 00:43:48 +000010828#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010829/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010830 * xmlParseExternalEntity:
10831 * @doc: the document the chunk pertains to
10832 * @sax: the SAX handler bloc (possibly NULL)
10833 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10834 * @depth: Used for loop detection, use 0
10835 * @URL: the URL for the entity to load
10836 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010837 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010838 *
10839 * Parse an external general entity
10840 * An external general parsed entity is well-formed if it matches the
10841 * production labeled extParsedEnt.
10842 *
10843 * [78] extParsedEnt ::= TextDecl? content
10844 *
10845 * Returns 0 if the entity is well formed, -1 in case of args problem and
10846 * the parser error code otherwise
10847 */
10848
10849int
10850xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010851 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010852 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010853 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010854}
10855
10856/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010857 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010858 * @doc: the document the chunk pertains to
10859 * @sax: the SAX handler bloc (possibly NULL)
10860 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10861 * @depth: Used for loop detection, use 0
10862 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010863 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010864 *
10865 * Parse a well-balanced chunk of an XML document
10866 * called by the parser
10867 * The allowed sequence for the Well Balanced Chunk is the one defined by
10868 * the content production in the XML grammar:
10869 *
10870 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10871 *
10872 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10873 * the parser error code otherwise
10874 */
10875
10876int
10877xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010878 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010879 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10880 depth, string, lst, 0 );
10881}
Daniel Veillard81273902003-09-30 00:43:48 +000010882#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010883
10884/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010885 * xmlParseBalancedChunkMemoryInternal:
10886 * @oldctxt: the existing parsing context
10887 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10888 * @user_data: the user data field for the parser context
10889 * @lst: the return value for the set of parsed nodes
10890 *
10891 *
10892 * Parse a well-balanced chunk of an XML document
10893 * called by the parser
10894 * The allowed sequence for the Well Balanced Chunk is the one defined by
10895 * the content production in the XML grammar:
10896 *
10897 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10898 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010899 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10900 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010901 *
10902 * In case recover is set to 1, the nodelist will not be empty even if
10903 * the parsed chunk is not well balanced.
10904 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010905static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010906xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10907 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10908 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010909 xmlDocPtr newDoc = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010910 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010911 xmlNodePtr content = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010912 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010913 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010914
10915 if (oldctxt->depth > 40) {
10916 return(XML_ERR_ENTITY_LOOP);
10917 }
10918
10919
10920 if (lst != NULL)
10921 *lst = NULL;
10922 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010923 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010924
10925 size = xmlStrlen(string);
10926
10927 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010928 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010929 if (user_data != NULL)
10930 ctxt->userData = user_data;
10931 else
10932 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010933 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10934 ctxt->dict = oldctxt->dict;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010935
10936 oldsax = ctxt->sax;
10937 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010938 xmlDetectSAX2(ctxt);
10939
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010940 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010941 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010942 newDoc = xmlNewDoc(BAD_CAST "1.0");
10943 if (newDoc == NULL) {
10944 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010945 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010946 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010947 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010948 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010949 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010950 } else {
10951 ctxt->myDoc = oldctxt->myDoc;
10952 content = ctxt->myDoc->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010953 }
Daniel Veillard9bc53102002-11-25 13:20:04 +000010954 ctxt->myDoc->children = xmlNewDocNode(ctxt->myDoc, NULL,
Daniel Veillard68e9e742002-11-16 15:35:11 +000010955 BAD_CAST "pseudoroot", NULL);
10956 if (ctxt->myDoc->children == NULL) {
10957 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010958 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010959 xmlFreeParserCtxt(ctxt);
10960 if (newDoc != NULL)
10961 xmlFreeDoc(newDoc);
William M. Brack7b9154b2003-09-27 19:23:50 +000010962 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010963 }
10964 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010965 ctxt->instate = XML_PARSER_CONTENT;
10966 ctxt->depth = oldctxt->depth + 1;
10967
Daniel Veillard328f48c2002-11-15 15:24:34 +000010968 ctxt->validate = 0;
10969 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010970 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10971 /*
10972 * ID/IDREF registration will be done in xmlValidateElement below
10973 */
10974 ctxt->loadsubset |= XML_SKIP_IDS;
10975 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010976 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010977
Daniel Veillard68e9e742002-11-16 15:35:11 +000010978 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010979 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010980 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010981 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010982 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010983 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010984 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010985 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010986 }
10987
10988 if (!ctxt->wellFormed) {
10989 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010990 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010991 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010992 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010993 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010994 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010995 }
10996
William M. Brack7b9154b2003-09-27 19:23:50 +000010997 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010998 xmlNodePtr cur;
10999
11000 /*
11001 * Return the newly created nodeset after unlinking it from
11002 * they pseudo parent.
11003 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011004 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011005 *lst = cur;
11006 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011007#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011008 if (oldctxt->validate && oldctxt->wellFormed &&
11009 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11010 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11011 oldctxt->myDoc, cur);
11012 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011013#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011014 cur->parent = NULL;
11015 cur = cur->next;
11016 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011017 ctxt->myDoc->children->children = NULL;
11018 }
11019 if (ctxt->myDoc != NULL) {
11020 xmlFreeNode(ctxt->myDoc->children);
11021 ctxt->myDoc->children = content;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011022 }
11023
11024 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011025 ctxt->dict = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011026 xmlFreeParserCtxt(ctxt);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011027 if (newDoc != NULL)
11028 xmlFreeDoc(newDoc);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011029
11030 return(ret);
11031}
11032
Daniel Veillard81273902003-09-30 00:43:48 +000011033#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011034/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011035 * xmlParseBalancedChunkMemoryRecover:
11036 * @doc: the document the chunk pertains to
11037 * @sax: the SAX handler bloc (possibly NULL)
11038 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11039 * @depth: Used for loop detection, use 0
11040 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11041 * @lst: the return value for the set of parsed nodes
11042 * @recover: return nodes even if the data is broken (use 0)
11043 *
11044 *
11045 * Parse a well-balanced chunk of an XML document
11046 * called by the parser
11047 * The allowed sequence for the Well Balanced Chunk is the one defined by
11048 * the content production in the XML grammar:
11049 *
11050 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11051 *
11052 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11053 * the parser error code otherwise
11054 *
11055 * In case recover is set to 1, the nodelist will not be empty even if
11056 * the parsed chunk is not well balanced.
11057 */
11058int
11059xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11060 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11061 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011062 xmlParserCtxtPtr ctxt;
11063 xmlDocPtr newDoc;
11064 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard935494a2002-10-22 14:22:46 +000011065 xmlNodePtr content;
Owen Taylor3473f882001-02-23 17:55:21 +000011066 int size;
11067 int ret = 0;
11068
11069 if (depth > 40) {
11070 return(XML_ERR_ENTITY_LOOP);
11071 }
11072
11073
Daniel Veillardcda96922001-08-21 10:56:31 +000011074 if (lst != NULL)
11075 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011076 if (string == NULL)
11077 return(-1);
11078
11079 size = xmlStrlen(string);
11080
11081 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11082 if (ctxt == NULL) return(-1);
11083 ctxt->userData = ctxt;
11084 if (sax != NULL) {
11085 oldsax = ctxt->sax;
11086 ctxt->sax = sax;
11087 if (user_data != NULL)
11088 ctxt->userData = user_data;
11089 }
11090 newDoc = xmlNewDoc(BAD_CAST "1.0");
11091 if (newDoc == NULL) {
11092 xmlFreeParserCtxt(ctxt);
11093 return(-1);
11094 }
11095 if (doc != NULL) {
11096 newDoc->intSubset = doc->intSubset;
11097 newDoc->extSubset = doc->extSubset;
11098 }
11099 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11100 if (newDoc->children == NULL) {
11101 if (sax != NULL)
11102 ctxt->sax = oldsax;
11103 xmlFreeParserCtxt(ctxt);
11104 newDoc->intSubset = NULL;
11105 newDoc->extSubset = NULL;
11106 xmlFreeDoc(newDoc);
11107 return(-1);
11108 }
11109 nodePush(ctxt, newDoc->children);
11110 if (doc == NULL) {
11111 ctxt->myDoc = newDoc;
11112 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011113 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011114 newDoc->children->doc = doc;
11115 }
11116 ctxt->instate = XML_PARSER_CONTENT;
11117 ctxt->depth = depth;
11118
11119 /*
11120 * Doing validity checking on chunk doesn't make sense
11121 */
11122 ctxt->validate = 0;
11123 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011124 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011125
Daniel Veillardb39bc392002-10-26 19:29:51 +000011126 if ( doc != NULL ){
11127 content = doc->children;
11128 doc->children = NULL;
11129 xmlParseContent(ctxt);
11130 doc->children = content;
11131 }
11132 else {
11133 xmlParseContent(ctxt);
11134 }
Owen Taylor3473f882001-02-23 17:55:21 +000011135 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011136 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011137 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011138 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011139 }
11140 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011141 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011142 }
11143
11144 if (!ctxt->wellFormed) {
11145 if (ctxt->errNo == 0)
11146 ret = 1;
11147 else
11148 ret = ctxt->errNo;
11149 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011150 ret = 0;
11151 }
11152
11153 if (lst != NULL && (ret == 0 || recover == 1)) {
11154 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011155
11156 /*
11157 * Return the newly created nodeset after unlinking it from
11158 * they pseudo parent.
11159 */
11160 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011161 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011162 while (cur != NULL) {
11163 cur->parent = NULL;
11164 cur = cur->next;
11165 }
11166 newDoc->children->children = NULL;
11167 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011168
Owen Taylor3473f882001-02-23 17:55:21 +000011169 if (sax != NULL)
11170 ctxt->sax = oldsax;
11171 xmlFreeParserCtxt(ctxt);
11172 newDoc->intSubset = NULL;
11173 newDoc->extSubset = NULL;
11174 xmlFreeDoc(newDoc);
11175
11176 return(ret);
11177}
11178
11179/**
11180 * xmlSAXParseEntity:
11181 * @sax: the SAX handler block
11182 * @filename: the filename
11183 *
11184 * parse an XML external entity out of context and build a tree.
11185 * It use the given SAX function block to handle the parsing callback.
11186 * If sax is NULL, fallback to the default DOM tree building routines.
11187 *
11188 * [78] extParsedEnt ::= TextDecl? content
11189 *
11190 * This correspond to a "Well Balanced" chunk
11191 *
11192 * Returns the resulting document tree
11193 */
11194
11195xmlDocPtr
11196xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11197 xmlDocPtr ret;
11198 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011199
11200 ctxt = xmlCreateFileParserCtxt(filename);
11201 if (ctxt == NULL) {
11202 return(NULL);
11203 }
11204 if (sax != NULL) {
11205 if (ctxt->sax != NULL)
11206 xmlFree(ctxt->sax);
11207 ctxt->sax = sax;
11208 ctxt->userData = NULL;
11209 }
11210
Owen Taylor3473f882001-02-23 17:55:21 +000011211 xmlParseExtParsedEnt(ctxt);
11212
11213 if (ctxt->wellFormed)
11214 ret = ctxt->myDoc;
11215 else {
11216 ret = NULL;
11217 xmlFreeDoc(ctxt->myDoc);
11218 ctxt->myDoc = NULL;
11219 }
11220 if (sax != NULL)
11221 ctxt->sax = NULL;
11222 xmlFreeParserCtxt(ctxt);
11223
11224 return(ret);
11225}
11226
11227/**
11228 * xmlParseEntity:
11229 * @filename: the filename
11230 *
11231 * parse an XML external entity out of context and build a tree.
11232 *
11233 * [78] extParsedEnt ::= TextDecl? content
11234 *
11235 * This correspond to a "Well Balanced" chunk
11236 *
11237 * Returns the resulting document tree
11238 */
11239
11240xmlDocPtr
11241xmlParseEntity(const char *filename) {
11242 return(xmlSAXParseEntity(NULL, filename));
11243}
Daniel Veillard81273902003-09-30 00:43:48 +000011244#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011245
11246/**
11247 * xmlCreateEntityParserCtxt:
11248 * @URL: the entity URL
11249 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011250 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011251 *
11252 * Create a parser context for an external entity
11253 * Automatic support for ZLIB/Compress compressed document is provided
11254 * by default if found at compile-time.
11255 *
11256 * Returns the new parser context or NULL
11257 */
11258xmlParserCtxtPtr
11259xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11260 const xmlChar *base) {
11261 xmlParserCtxtPtr ctxt;
11262 xmlParserInputPtr inputStream;
11263 char *directory = NULL;
11264 xmlChar *uri;
11265
11266 ctxt = xmlNewParserCtxt();
11267 if (ctxt == NULL) {
11268 return(NULL);
11269 }
11270
11271 uri = xmlBuildURI(URL, base);
11272
11273 if (uri == NULL) {
11274 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11275 if (inputStream == NULL) {
11276 xmlFreeParserCtxt(ctxt);
11277 return(NULL);
11278 }
11279
11280 inputPush(ctxt, inputStream);
11281
11282 if ((ctxt->directory == NULL) && (directory == NULL))
11283 directory = xmlParserGetDirectory((char *)URL);
11284 if ((ctxt->directory == NULL) && (directory != NULL))
11285 ctxt->directory = directory;
11286 } else {
11287 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11288 if (inputStream == NULL) {
11289 xmlFree(uri);
11290 xmlFreeParserCtxt(ctxt);
11291 return(NULL);
11292 }
11293
11294 inputPush(ctxt, inputStream);
11295
11296 if ((ctxt->directory == NULL) && (directory == NULL))
11297 directory = xmlParserGetDirectory((char *)uri);
11298 if ((ctxt->directory == NULL) && (directory != NULL))
11299 ctxt->directory = directory;
11300 xmlFree(uri);
11301 }
Owen Taylor3473f882001-02-23 17:55:21 +000011302 return(ctxt);
11303}
11304
11305/************************************************************************
11306 * *
11307 * Front ends when parsing from a file *
11308 * *
11309 ************************************************************************/
11310
11311/**
11312 * xmlCreateFileParserCtxt:
11313 * @filename: the filename
11314 *
11315 * Create a parser context for a file content.
11316 * Automatic support for ZLIB/Compress compressed document is provided
11317 * by default if found at compile-time.
11318 *
11319 * Returns the new parser context or NULL
11320 */
11321xmlParserCtxtPtr
11322xmlCreateFileParserCtxt(const char *filename)
11323{
11324 xmlParserCtxtPtr ctxt;
11325 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011326 char *directory = NULL;
11327
Owen Taylor3473f882001-02-23 17:55:21 +000011328 ctxt = xmlNewParserCtxt();
11329 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011330 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011331 return(NULL);
11332 }
11333
Igor Zlatkovicce076162003-02-23 13:39:39 +000011334
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011335 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011336 if (inputStream == NULL) {
11337 xmlFreeParserCtxt(ctxt);
11338 return(NULL);
11339 }
11340
Owen Taylor3473f882001-02-23 17:55:21 +000011341 inputPush(ctxt, inputStream);
11342 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011343 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011344 if ((ctxt->directory == NULL) && (directory != NULL))
11345 ctxt->directory = directory;
11346
11347 return(ctxt);
11348}
11349
Daniel Veillard81273902003-09-30 00:43:48 +000011350#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011351/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011352 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011353 * @sax: the SAX handler block
11354 * @filename: the filename
11355 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11356 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011357 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011358 *
11359 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11360 * compressed document is provided by default if found at compile-time.
11361 * It use the given SAX function block to handle the parsing callback.
11362 * If sax is NULL, fallback to the default DOM tree building routines.
11363 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011364 * User data (void *) is stored within the parser context in the
11365 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011366 *
Owen Taylor3473f882001-02-23 17:55:21 +000011367 * Returns the resulting document tree
11368 */
11369
11370xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011371xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11372 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011373 xmlDocPtr ret;
11374 xmlParserCtxtPtr ctxt;
11375 char *directory = NULL;
11376
Daniel Veillard635ef722001-10-29 11:48:19 +000011377 xmlInitParser();
11378
Owen Taylor3473f882001-02-23 17:55:21 +000011379 ctxt = xmlCreateFileParserCtxt(filename);
11380 if (ctxt == NULL) {
11381 return(NULL);
11382 }
11383 if (sax != NULL) {
11384 if (ctxt->sax != NULL)
11385 xmlFree(ctxt->sax);
11386 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011387 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011388 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011389 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011390 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011391 }
Owen Taylor3473f882001-02-23 17:55:21 +000011392
11393 if ((ctxt->directory == NULL) && (directory == NULL))
11394 directory = xmlParserGetDirectory(filename);
11395 if ((ctxt->directory == NULL) && (directory != NULL))
11396 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11397
Daniel Veillarddad3f682002-11-17 16:47:27 +000011398 ctxt->recovery = recovery;
11399
Owen Taylor3473f882001-02-23 17:55:21 +000011400 xmlParseDocument(ctxt);
11401
William M. Brackc07329e2003-09-08 01:57:30 +000011402 if ((ctxt->wellFormed) || recovery) {
11403 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011404 if (ret != NULL) {
11405 if (ctxt->input->buf->compressed > 0)
11406 ret->compression = 9;
11407 else
11408 ret->compression = ctxt->input->buf->compressed;
11409 }
William M. Brackc07329e2003-09-08 01:57:30 +000011410 }
Owen Taylor3473f882001-02-23 17:55:21 +000011411 else {
11412 ret = NULL;
11413 xmlFreeDoc(ctxt->myDoc);
11414 ctxt->myDoc = NULL;
11415 }
11416 if (sax != NULL)
11417 ctxt->sax = NULL;
11418 xmlFreeParserCtxt(ctxt);
11419
11420 return(ret);
11421}
11422
11423/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011424 * xmlSAXParseFile:
11425 * @sax: the SAX handler block
11426 * @filename: the filename
11427 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11428 * documents
11429 *
11430 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11431 * compressed document is provided by default if found at compile-time.
11432 * It use the given SAX function block to handle the parsing callback.
11433 * If sax is NULL, fallback to the default DOM tree building routines.
11434 *
11435 * Returns the resulting document tree
11436 */
11437
11438xmlDocPtr
11439xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11440 int recovery) {
11441 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11442}
11443
11444/**
Owen Taylor3473f882001-02-23 17:55:21 +000011445 * xmlRecoverDoc:
11446 * @cur: a pointer to an array of xmlChar
11447 *
11448 * parse an XML in-memory document and build a tree.
11449 * In the case the document is not Well Formed, a tree is built anyway
11450 *
11451 * Returns the resulting document tree
11452 */
11453
11454xmlDocPtr
11455xmlRecoverDoc(xmlChar *cur) {
11456 return(xmlSAXParseDoc(NULL, cur, 1));
11457}
11458
11459/**
11460 * xmlParseFile:
11461 * @filename: the filename
11462 *
11463 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11464 * compressed document is provided by default if found at compile-time.
11465 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011466 * Returns the resulting document tree if the file was wellformed,
11467 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011468 */
11469
11470xmlDocPtr
11471xmlParseFile(const char *filename) {
11472 return(xmlSAXParseFile(NULL, filename, 0));
11473}
11474
11475/**
11476 * xmlRecoverFile:
11477 * @filename: the filename
11478 *
11479 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11480 * compressed document is provided by default if found at compile-time.
11481 * In the case the document is not Well Formed, a tree is built anyway
11482 *
11483 * Returns the resulting document tree
11484 */
11485
11486xmlDocPtr
11487xmlRecoverFile(const char *filename) {
11488 return(xmlSAXParseFile(NULL, filename, 1));
11489}
11490
11491
11492/**
11493 * xmlSetupParserForBuffer:
11494 * @ctxt: an XML parser context
11495 * @buffer: a xmlChar * buffer
11496 * @filename: a file name
11497 *
11498 * Setup the parser context to parse a new buffer; Clears any prior
11499 * contents from the parser context. The buffer parameter must not be
11500 * NULL, but the filename parameter can be
11501 */
11502void
11503xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11504 const char* filename)
11505{
11506 xmlParserInputPtr input;
11507
11508 input = xmlNewInputStream(ctxt);
11509 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011510 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +000011511 xmlFree(ctxt);
11512 return;
11513 }
11514
11515 xmlClearParserCtxt(ctxt);
11516 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011517 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011518 input->base = buffer;
11519 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011520 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011521 inputPush(ctxt, input);
11522}
11523
11524/**
11525 * xmlSAXUserParseFile:
11526 * @sax: a SAX handler
11527 * @user_data: The user data returned on SAX callbacks
11528 * @filename: a file name
11529 *
11530 * parse an XML file and call the given SAX handler routines.
11531 * Automatic support for ZLIB/Compress compressed document is provided
11532 *
11533 * Returns 0 in case of success or a error number otherwise
11534 */
11535int
11536xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11537 const char *filename) {
11538 int ret = 0;
11539 xmlParserCtxtPtr ctxt;
11540
11541 ctxt = xmlCreateFileParserCtxt(filename);
11542 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011543#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011544 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011545#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011546 xmlFree(ctxt->sax);
11547 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011548 xmlDetectSAX2(ctxt);
11549
Owen Taylor3473f882001-02-23 17:55:21 +000011550 if (user_data != NULL)
11551 ctxt->userData = user_data;
11552
11553 xmlParseDocument(ctxt);
11554
11555 if (ctxt->wellFormed)
11556 ret = 0;
11557 else {
11558 if (ctxt->errNo != 0)
11559 ret = ctxt->errNo;
11560 else
11561 ret = -1;
11562 }
11563 if (sax != NULL)
11564 ctxt->sax = NULL;
11565 xmlFreeParserCtxt(ctxt);
11566
11567 return ret;
11568}
Daniel Veillard81273902003-09-30 00:43:48 +000011569#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011570
11571/************************************************************************
11572 * *
11573 * Front ends when parsing from memory *
11574 * *
11575 ************************************************************************/
11576
11577/**
11578 * xmlCreateMemoryParserCtxt:
11579 * @buffer: a pointer to a char array
11580 * @size: the size of the array
11581 *
11582 * Create a parser context for an XML in-memory document.
11583 *
11584 * Returns the new parser context or NULL
11585 */
11586xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011587xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011588 xmlParserCtxtPtr ctxt;
11589 xmlParserInputPtr input;
11590 xmlParserInputBufferPtr buf;
11591
11592 if (buffer == NULL)
11593 return(NULL);
11594 if (size <= 0)
11595 return(NULL);
11596
11597 ctxt = xmlNewParserCtxt();
11598 if (ctxt == NULL)
11599 return(NULL);
11600
Daniel Veillard53350552003-09-18 13:35:51 +000011601 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011602 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011603 if (buf == NULL) {
11604 xmlFreeParserCtxt(ctxt);
11605 return(NULL);
11606 }
Owen Taylor3473f882001-02-23 17:55:21 +000011607
11608 input = xmlNewInputStream(ctxt);
11609 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011610 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011611 xmlFreeParserCtxt(ctxt);
11612 return(NULL);
11613 }
11614
11615 input->filename = NULL;
11616 input->buf = buf;
11617 input->base = input->buf->buffer->content;
11618 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011619 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011620
11621 inputPush(ctxt, input);
11622 return(ctxt);
11623}
11624
Daniel Veillard81273902003-09-30 00:43:48 +000011625#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011626/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011627 * xmlSAXParseMemoryWithData:
11628 * @sax: the SAX handler block
11629 * @buffer: an pointer to a char array
11630 * @size: the size of the array
11631 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11632 * documents
11633 * @data: the userdata
11634 *
11635 * parse an XML in-memory block and use the given SAX function block
11636 * to handle the parsing callback. If sax is NULL, fallback to the default
11637 * DOM tree building routines.
11638 *
11639 * User data (void *) is stored within the parser context in the
11640 * context's _private member, so it is available nearly everywhere in libxml
11641 *
11642 * Returns the resulting document tree
11643 */
11644
11645xmlDocPtr
11646xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11647 int size, int recovery, void *data) {
11648 xmlDocPtr ret;
11649 xmlParserCtxtPtr ctxt;
11650
11651 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11652 if (ctxt == NULL) return(NULL);
11653 if (sax != NULL) {
11654 if (ctxt->sax != NULL)
11655 xmlFree(ctxt->sax);
11656 ctxt->sax = sax;
11657 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011658 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011659 if (data!=NULL) {
11660 ctxt->_private=data;
11661 }
11662
Daniel Veillardadba5f12003-04-04 16:09:01 +000011663 ctxt->recovery = recovery;
11664
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011665 xmlParseDocument(ctxt);
11666
11667 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11668 else {
11669 ret = NULL;
11670 xmlFreeDoc(ctxt->myDoc);
11671 ctxt->myDoc = NULL;
11672 }
11673 if (sax != NULL)
11674 ctxt->sax = NULL;
11675 xmlFreeParserCtxt(ctxt);
11676
11677 return(ret);
11678}
11679
11680/**
Owen Taylor3473f882001-02-23 17:55:21 +000011681 * xmlSAXParseMemory:
11682 * @sax: the SAX handler block
11683 * @buffer: an pointer to a char array
11684 * @size: the size of the array
11685 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11686 * documents
11687 *
11688 * parse an XML in-memory block and use the given SAX function block
11689 * to handle the parsing callback. If sax is NULL, fallback to the default
11690 * DOM tree building routines.
11691 *
11692 * Returns the resulting document tree
11693 */
11694xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011695xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11696 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011697 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011698}
11699
11700/**
11701 * xmlParseMemory:
11702 * @buffer: an pointer to a char array
11703 * @size: the size of the array
11704 *
11705 * parse an XML in-memory block and build a tree.
11706 *
11707 * Returns the resulting document tree
11708 */
11709
Daniel Veillard50822cb2001-07-26 20:05:51 +000011710xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011711 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11712}
11713
11714/**
11715 * xmlRecoverMemory:
11716 * @buffer: an pointer to a char array
11717 * @size: the size of the array
11718 *
11719 * parse an XML in-memory block and build a tree.
11720 * In the case the document is not Well Formed, a tree is built anyway
11721 *
11722 * Returns the resulting document tree
11723 */
11724
Daniel Veillard50822cb2001-07-26 20:05:51 +000011725xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011726 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11727}
11728
11729/**
11730 * xmlSAXUserParseMemory:
11731 * @sax: a SAX handler
11732 * @user_data: The user data returned on SAX callbacks
11733 * @buffer: an in-memory XML document input
11734 * @size: the length of the XML document in bytes
11735 *
11736 * A better SAX parsing routine.
11737 * parse an XML in-memory buffer and call the given SAX handler routines.
11738 *
11739 * Returns 0 in case of success or a error number otherwise
11740 */
11741int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011742 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011743 int ret = 0;
11744 xmlParserCtxtPtr ctxt;
11745 xmlSAXHandlerPtr oldsax = NULL;
11746
Daniel Veillard9e923512002-08-14 08:48:52 +000011747 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011748 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11749 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011750 oldsax = ctxt->sax;
11751 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011752 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011753 if (user_data != NULL)
11754 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011755
11756 xmlParseDocument(ctxt);
11757
11758 if (ctxt->wellFormed)
11759 ret = 0;
11760 else {
11761 if (ctxt->errNo != 0)
11762 ret = ctxt->errNo;
11763 else
11764 ret = -1;
11765 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011766 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000011767 xmlFreeParserCtxt(ctxt);
11768
11769 return ret;
11770}
Daniel Veillard81273902003-09-30 00:43:48 +000011771#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011772
11773/**
11774 * xmlCreateDocParserCtxt:
11775 * @cur: a pointer to an array of xmlChar
11776 *
11777 * Creates a parser context for an XML in-memory document.
11778 *
11779 * Returns the new parser context or NULL
11780 */
11781xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011782xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011783 int len;
11784
11785 if (cur == NULL)
11786 return(NULL);
11787 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011788 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011789}
11790
Daniel Veillard81273902003-09-30 00:43:48 +000011791#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011792/**
11793 * xmlSAXParseDoc:
11794 * @sax: the SAX handler block
11795 * @cur: a pointer to an array of xmlChar
11796 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11797 * documents
11798 *
11799 * parse an XML in-memory document and build a tree.
11800 * It use the given SAX function block to handle the parsing callback.
11801 * If sax is NULL, fallback to the default DOM tree building routines.
11802 *
11803 * Returns the resulting document tree
11804 */
11805
11806xmlDocPtr
11807xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
11808 xmlDocPtr ret;
11809 xmlParserCtxtPtr ctxt;
11810
11811 if (cur == NULL) return(NULL);
11812
11813
11814 ctxt = xmlCreateDocParserCtxt(cur);
11815 if (ctxt == NULL) return(NULL);
11816 if (sax != NULL) {
11817 ctxt->sax = sax;
11818 ctxt->userData = NULL;
11819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011820 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011821
11822 xmlParseDocument(ctxt);
11823 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11824 else {
11825 ret = NULL;
11826 xmlFreeDoc(ctxt->myDoc);
11827 ctxt->myDoc = NULL;
11828 }
11829 if (sax != NULL)
11830 ctxt->sax = NULL;
11831 xmlFreeParserCtxt(ctxt);
11832
11833 return(ret);
11834}
11835
11836/**
11837 * xmlParseDoc:
11838 * @cur: a pointer to an array of xmlChar
11839 *
11840 * parse an XML in-memory document and build a tree.
11841 *
11842 * Returns the resulting document tree
11843 */
11844
11845xmlDocPtr
11846xmlParseDoc(xmlChar *cur) {
11847 return(xmlSAXParseDoc(NULL, cur, 0));
11848}
Daniel Veillard81273902003-09-30 00:43:48 +000011849#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011850
Daniel Veillard81273902003-09-30 00:43:48 +000011851#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000011852/************************************************************************
11853 * *
11854 * Specific function to keep track of entities references *
11855 * and used by the XSLT debugger *
11856 * *
11857 ************************************************************************/
11858
11859static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
11860
11861/**
11862 * xmlAddEntityReference:
11863 * @ent : A valid entity
11864 * @firstNode : A valid first node for children of entity
11865 * @lastNode : A valid last node of children entity
11866 *
11867 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
11868 */
11869static void
11870xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
11871 xmlNodePtr lastNode)
11872{
11873 if (xmlEntityRefFunc != NULL) {
11874 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
11875 }
11876}
11877
11878
11879/**
11880 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000011881 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000011882 *
11883 * Set the function to call call back when a xml reference has been made
11884 */
11885void
11886xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
11887{
11888 xmlEntityRefFunc = func;
11889}
Daniel Veillard81273902003-09-30 00:43:48 +000011890#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011891
11892/************************************************************************
11893 * *
11894 * Miscellaneous *
11895 * *
11896 ************************************************************************/
11897
11898#ifdef LIBXML_XPATH_ENABLED
11899#include <libxml/xpath.h>
11900#endif
11901
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011902extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000011903static int xmlParserInitialized = 0;
11904
11905/**
11906 * xmlInitParser:
11907 *
11908 * Initialization function for the XML parser.
11909 * This is not reentrant. Call once before processing in case of
11910 * use in multithreaded programs.
11911 */
11912
11913void
11914xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000011915 if (xmlParserInitialized != 0)
11916 return;
Owen Taylor3473f882001-02-23 17:55:21 +000011917
Daniel Veillarddb5850a2002-01-18 11:49:26 +000011918 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
11919 (xmlGenericError == NULL))
11920 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011921 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000011922 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000011923 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000011924 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000011925 xmlDefaultSAXHandlerInit();
11926 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011927#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011928 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000011929#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011930#ifdef LIBXML_HTML_ENABLED
11931 htmlInitAutoClose();
11932 htmlDefaultSAXHandlerInit();
11933#endif
11934#ifdef LIBXML_XPATH_ENABLED
11935 xmlXPathInit();
11936#endif
11937 xmlParserInitialized = 1;
11938}
11939
11940/**
11941 * xmlCleanupParser:
11942 *
11943 * Cleanup function for the XML parser. It tries to reclaim all
11944 * parsing related global memory allocated for the parser processing.
11945 * It doesn't deallocate any document related memory. Calling this
11946 * function should not prevent reusing the parser.
Daniel Veillard7424eb62003-01-24 14:14:52 +000011947 * One should call xmlCleanupParser() only when the process has
11948 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000011949 */
11950
11951void
11952xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000011953 if (!xmlParserInitialized)
11954 return;
11955
Owen Taylor3473f882001-02-23 17:55:21 +000011956 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000011957#ifdef LIBXML_CATALOG_ENABLED
11958 xmlCatalogCleanup();
11959#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000011960 xmlCleanupThreads();
Daniel Veillard781ac8b2003-05-15 22:11:36 +000011961 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011962 xmlResetLastError();
Daniel Veillardd0463562001-10-13 09:15:48 +000011963 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011964}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011965
11966/************************************************************************
11967 * *
11968 * New set (2.6.0) of simpler and more flexible APIs *
11969 * *
11970 ************************************************************************/
11971
11972/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011973 * DICT_FREE:
11974 * @str: a string
11975 *
11976 * Free a string if it is not owned by the "dict" dictionnary in the
11977 * current scope
11978 */
11979#define DICT_FREE(str) \
11980 if ((str) && ((!dict) || \
11981 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
11982 xmlFree((char *)(str));
11983
11984/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011985 * xmlCtxtReset:
11986 * @ctxt: an XML parser context
11987 *
11988 * Reset a parser context
11989 */
11990void
11991xmlCtxtReset(xmlParserCtxtPtr ctxt)
11992{
11993 xmlParserInputPtr input;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000011994 xmlDictPtr dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011995
11996 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
11997 xmlFreeInputStream(input);
11998 }
11999 ctxt->inputNr = 0;
12000 ctxt->input = NULL;
12001
12002 ctxt->spaceNr = 0;
12003 ctxt->spaceTab[0] = -1;
12004 ctxt->space = &ctxt->spaceTab[0];
12005
12006
12007 ctxt->nodeNr = 0;
12008 ctxt->node = NULL;
12009
12010 ctxt->nameNr = 0;
12011 ctxt->name = NULL;
12012
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012013 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012014 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012015 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012016 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012017 DICT_FREE(ctxt->directory);
12018 ctxt->directory = NULL;
12019 DICT_FREE(ctxt->extSubURI);
12020 ctxt->extSubURI = NULL;
12021 DICT_FREE(ctxt->extSubSystem);
12022 ctxt->extSubSystem = NULL;
12023 if (ctxt->myDoc != NULL)
12024 xmlFreeDoc(ctxt->myDoc);
12025 ctxt->myDoc = NULL;
12026
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012027 ctxt->standalone = -1;
12028 ctxt->hasExternalSubset = 0;
12029 ctxt->hasPErefs = 0;
12030 ctxt->html = 0;
12031 ctxt->external = 0;
12032 ctxt->instate = XML_PARSER_START;
12033 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012034
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012035 ctxt->wellFormed = 1;
12036 ctxt->nsWellFormed = 1;
12037 ctxt->valid = 1;
12038 ctxt->vctxt.userData = ctxt;
12039 ctxt->vctxt.error = xmlParserValidityError;
12040 ctxt->vctxt.warning = xmlParserValidityWarning;
12041 ctxt->record_info = 0;
12042 ctxt->nbChars = 0;
12043 ctxt->checkIndex = 0;
12044 ctxt->inSubset = 0;
12045 ctxt->errNo = XML_ERR_OK;
12046 ctxt->depth = 0;
12047 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12048 ctxt->catalogs = NULL;
12049 xmlInitNodeInfoSeq(&ctxt->node_seq);
12050
12051 if (ctxt->attsDefault != NULL) {
12052 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12053 ctxt->attsDefault = NULL;
12054 }
12055 if (ctxt->attsSpecial != NULL) {
12056 xmlHashFree(ctxt->attsSpecial, NULL);
12057 ctxt->attsSpecial = NULL;
12058 }
12059
Daniel Veillard4432df22003-09-28 18:58:27 +000012060#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012061 if (ctxt->catalogs != NULL)
12062 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012063#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012064}
12065
12066/**
12067 * xmlCtxtUseOptions:
12068 * @ctxt: an XML parser context
12069 * @options: a combination of xmlParserOption(s)
12070 *
12071 * Applies the options to the parser context
12072 *
12073 * Returns 0 in case of success, the set of unknown or unimplemented options
12074 * in case of error.
12075 */
12076int
12077xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12078{
12079 if (options & XML_PARSE_RECOVER) {
12080 ctxt->recovery = 1;
12081 options -= XML_PARSE_RECOVER;
12082 } else
12083 ctxt->recovery = 0;
12084 if (options & XML_PARSE_DTDLOAD) {
12085 ctxt->loadsubset = XML_DETECT_IDS;
12086 options -= XML_PARSE_DTDLOAD;
12087 } else
12088 ctxt->loadsubset = 0;
12089 if (options & XML_PARSE_DTDATTR) {
12090 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12091 options -= XML_PARSE_DTDATTR;
12092 }
12093 if (options & XML_PARSE_NOENT) {
12094 ctxt->replaceEntities = 1;
12095 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12096 options -= XML_PARSE_NOENT;
12097 } else
12098 ctxt->replaceEntities = 0;
12099 if (options & XML_PARSE_NOWARNING) {
12100 ctxt->sax->warning = NULL;
12101 options -= XML_PARSE_NOWARNING;
12102 }
12103 if (options & XML_PARSE_NOERROR) {
12104 ctxt->sax->error = NULL;
12105 ctxt->sax->fatalError = NULL;
12106 options -= XML_PARSE_NOERROR;
12107 }
12108 if (options & XML_PARSE_PEDANTIC) {
12109 ctxt->pedantic = 1;
12110 options -= XML_PARSE_PEDANTIC;
12111 } else
12112 ctxt->pedantic = 0;
12113 if (options & XML_PARSE_NOBLANKS) {
12114 ctxt->keepBlanks = 0;
12115 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12116 options -= XML_PARSE_NOBLANKS;
12117 } else
12118 ctxt->keepBlanks = 1;
12119 if (options & XML_PARSE_DTDVALID) {
12120 ctxt->validate = 1;
12121 if (options & XML_PARSE_NOWARNING)
12122 ctxt->vctxt.warning = NULL;
12123 if (options & XML_PARSE_NOERROR)
12124 ctxt->vctxt.error = NULL;
12125 options -= XML_PARSE_DTDVALID;
12126 } else
12127 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012128#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012129 if (options & XML_PARSE_SAX1) {
12130 ctxt->sax->startElement = xmlSAX2StartElement;
12131 ctxt->sax->endElement = xmlSAX2EndElement;
12132 ctxt->sax->startElementNs = NULL;
12133 ctxt->sax->endElementNs = NULL;
12134 ctxt->sax->initialized = 1;
12135 options -= XML_PARSE_SAX1;
12136 }
Daniel Veillard81273902003-09-30 00:43:48 +000012137#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012138 if (options & XML_PARSE_NODICT) {
12139 ctxt->dictNames = 0;
12140 options -= XML_PARSE_NODICT;
12141 } else {
12142 ctxt->dictNames = 1;
12143 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012144 if (options & XML_PARSE_NOCDATA) {
12145 ctxt->sax->cdataBlock = NULL;
12146 options -= XML_PARSE_NOCDATA;
12147 }
12148 if (options & XML_PARSE_NSCLEAN) {
12149 ctxt->options |= XML_PARSE_NSCLEAN;
12150 options -= XML_PARSE_NSCLEAN;
12151 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012152 return (options);
12153}
12154
12155/**
12156 * xmlDoRead:
12157 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012158 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012159 * @encoding: the document encoding, or NULL
12160 * @options: a combination of xmlParserOption(s)
12161 * @reuse: keep the context for reuse
12162 *
12163 * Common front-end for the xmlRead functions
12164 *
12165 * Returns the resulting document tree or NULL
12166 */
12167static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012168xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12169 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012170{
12171 xmlDocPtr ret;
12172
12173 xmlCtxtUseOptions(ctxt, options);
12174 if (encoding != NULL) {
12175 xmlCharEncodingHandlerPtr hdlr;
12176
12177 hdlr = xmlFindCharEncodingHandler(encoding);
12178 if (hdlr != NULL)
12179 xmlSwitchToEncoding(ctxt, hdlr);
12180 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012181 if ((URL != NULL) && (ctxt->input != NULL) &&
12182 (ctxt->input->filename == NULL))
12183 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012184 xmlParseDocument(ctxt);
12185 if ((ctxt->wellFormed) || ctxt->recovery)
12186 ret = ctxt->myDoc;
12187 else {
12188 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012189 if (ctxt->myDoc != NULL) {
Daniel Veillard9d8c1df2003-09-26 23:27:25 +000012190 if ((ctxt->dictNames) &&
12191 (ctxt->myDoc->dict == ctxt->dict))
12192 xmlDictReference(ctxt->dict);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012193 xmlFreeDoc(ctxt->myDoc);
12194 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012195 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012196 ctxt->myDoc = NULL;
12197 if (!reuse) {
12198 if ((ctxt->dictNames) &&
12199 (ret != NULL) &&
12200 (ret->dict == ctxt->dict))
12201 ctxt->dict = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012202 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012203 } else {
12204 /* Must duplicate the reference to the dictionary */
12205 if ((ctxt->dictNames) &&
12206 (ret != NULL) &&
12207 (ret->dict == ctxt->dict))
12208 xmlDictReference(ctxt->dict);
12209 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012210
12211 return (ret);
12212}
12213
12214/**
12215 * xmlReadDoc:
12216 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012217 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012218 * @encoding: the document encoding, or NULL
12219 * @options: a combination of xmlParserOption(s)
12220 *
12221 * parse an XML in-memory document and build a tree.
12222 *
12223 * Returns the resulting document tree
12224 */
12225xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012226xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012227{
12228 xmlParserCtxtPtr ctxt;
12229
12230 if (cur == NULL)
12231 return (NULL);
12232
12233 ctxt = xmlCreateDocParserCtxt(cur);
12234 if (ctxt == NULL)
12235 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012236 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012237}
12238
12239/**
12240 * xmlReadFile:
12241 * @filename: a file or URL
12242 * @encoding: the document encoding, or NULL
12243 * @options: a combination of xmlParserOption(s)
12244 *
12245 * parse an XML file from the filesystem or the network.
12246 *
12247 * Returns the resulting document tree
12248 */
12249xmlDocPtr
12250xmlReadFile(const char *filename, const char *encoding, int options)
12251{
12252 xmlParserCtxtPtr ctxt;
12253
12254 ctxt = xmlCreateFileParserCtxt(filename);
12255 if (ctxt == NULL)
12256 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012257 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012258}
12259
12260/**
12261 * xmlReadMemory:
12262 * @buffer: a pointer to a char array
12263 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012264 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012265 * @encoding: the document encoding, or NULL
12266 * @options: a combination of xmlParserOption(s)
12267 *
12268 * parse an XML in-memory document and build a tree.
12269 *
12270 * Returns the resulting document tree
12271 */
12272xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012273xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012274{
12275 xmlParserCtxtPtr ctxt;
12276
12277 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12278 if (ctxt == NULL)
12279 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012280 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012281}
12282
12283/**
12284 * xmlReadFd:
12285 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012286 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012287 * @encoding: the document encoding, or NULL
12288 * @options: a combination of xmlParserOption(s)
12289 *
12290 * parse an XML from a file descriptor and build a tree.
12291 *
12292 * Returns the resulting document tree
12293 */
12294xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012295xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012296{
12297 xmlParserCtxtPtr ctxt;
12298 xmlParserInputBufferPtr input;
12299 xmlParserInputPtr stream;
12300
12301 if (fd < 0)
12302 return (NULL);
12303
12304 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12305 if (input == NULL)
12306 return (NULL);
12307 ctxt = xmlNewParserCtxt();
12308 if (ctxt == NULL) {
12309 xmlFreeParserInputBuffer(input);
12310 return (NULL);
12311 }
12312 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12313 if (stream == NULL) {
12314 xmlFreeParserInputBuffer(input);
12315 xmlFreeParserCtxt(ctxt);
12316 return (NULL);
12317 }
12318 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012319 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012320}
12321
12322/**
12323 * xmlReadIO:
12324 * @ioread: an I/O read function
12325 * @ioclose: an I/O close function
12326 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012327 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012328 * @encoding: the document encoding, or NULL
12329 * @options: a combination of xmlParserOption(s)
12330 *
12331 * parse an XML document from I/O functions and source and build a tree.
12332 *
12333 * Returns the resulting document tree
12334 */
12335xmlDocPtr
12336xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012337 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012338{
12339 xmlParserCtxtPtr ctxt;
12340 xmlParserInputBufferPtr input;
12341 xmlParserInputPtr stream;
12342
12343 if (ioread == NULL)
12344 return (NULL);
12345
12346 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12347 XML_CHAR_ENCODING_NONE);
12348 if (input == NULL)
12349 return (NULL);
12350 ctxt = xmlNewParserCtxt();
12351 if (ctxt == NULL) {
12352 xmlFreeParserInputBuffer(input);
12353 return (NULL);
12354 }
12355 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12356 if (stream == NULL) {
12357 xmlFreeParserInputBuffer(input);
12358 xmlFreeParserCtxt(ctxt);
12359 return (NULL);
12360 }
12361 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012362 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012363}
12364
12365/**
12366 * xmlCtxtReadDoc:
12367 * @ctxt: an XML parser context
12368 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012369 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012370 * @encoding: the document encoding, or NULL
12371 * @options: a combination of xmlParserOption(s)
12372 *
12373 * parse an XML in-memory document and build a tree.
12374 * This reuses the existing @ctxt parser context
12375 *
12376 * Returns the resulting document tree
12377 */
12378xmlDocPtr
12379xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012380 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012381{
12382 xmlParserInputPtr stream;
12383
12384 if (cur == NULL)
12385 return (NULL);
12386 if (ctxt == NULL)
12387 return (NULL);
12388
12389 xmlCtxtReset(ctxt);
12390
12391 stream = xmlNewStringInputStream(ctxt, cur);
12392 if (stream == NULL) {
12393 return (NULL);
12394 }
12395 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012396 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012397}
12398
12399/**
12400 * xmlCtxtReadFile:
12401 * @ctxt: an XML parser context
12402 * @filename: a file or URL
12403 * @encoding: the document encoding, or NULL
12404 * @options: a combination of xmlParserOption(s)
12405 *
12406 * parse an XML file from the filesystem or the network.
12407 * This reuses the existing @ctxt parser context
12408 *
12409 * Returns the resulting document tree
12410 */
12411xmlDocPtr
12412xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12413 const char *encoding, int options)
12414{
12415 xmlParserInputPtr stream;
12416
12417 if (filename == NULL)
12418 return (NULL);
12419 if (ctxt == NULL)
12420 return (NULL);
12421
12422 xmlCtxtReset(ctxt);
12423
12424 stream = xmlNewInputFromFile(ctxt, filename);
12425 if (stream == NULL) {
12426 return (NULL);
12427 }
12428 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012429 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012430}
12431
12432/**
12433 * xmlCtxtReadMemory:
12434 * @ctxt: an XML parser context
12435 * @buffer: a pointer to a char array
12436 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012437 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012438 * @encoding: the document encoding, or NULL
12439 * @options: a combination of xmlParserOption(s)
12440 *
12441 * parse an XML in-memory document and build a tree.
12442 * This reuses the existing @ctxt parser context
12443 *
12444 * Returns the resulting document tree
12445 */
12446xmlDocPtr
12447xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012448 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012449{
12450 xmlParserInputBufferPtr input;
12451 xmlParserInputPtr stream;
12452
12453 if (ctxt == NULL)
12454 return (NULL);
12455 if (buffer == NULL)
12456 return (NULL);
12457
12458 xmlCtxtReset(ctxt);
12459
12460 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12461 if (input == NULL) {
12462 return(NULL);
12463 }
12464
12465 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12466 if (stream == NULL) {
12467 xmlFreeParserInputBuffer(input);
12468 return(NULL);
12469 }
12470
12471 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012472 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012473}
12474
12475/**
12476 * xmlCtxtReadFd:
12477 * @ctxt: an XML parser context
12478 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012479 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012480 * @encoding: the document encoding, or NULL
12481 * @options: a combination of xmlParserOption(s)
12482 *
12483 * parse an XML from a file descriptor and build a tree.
12484 * This reuses the existing @ctxt parser context
12485 *
12486 * Returns the resulting document tree
12487 */
12488xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012489xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12490 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012491{
12492 xmlParserInputBufferPtr input;
12493 xmlParserInputPtr stream;
12494
12495 if (fd < 0)
12496 return (NULL);
12497 if (ctxt == NULL)
12498 return (NULL);
12499
12500 xmlCtxtReset(ctxt);
12501
12502
12503 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12504 if (input == NULL)
12505 return (NULL);
12506 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12507 if (stream == NULL) {
12508 xmlFreeParserInputBuffer(input);
12509 return (NULL);
12510 }
12511 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012512 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012513}
12514
12515/**
12516 * xmlCtxtReadIO:
12517 * @ctxt: an XML parser context
12518 * @ioread: an I/O read function
12519 * @ioclose: an I/O close function
12520 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012521 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012522 * @encoding: the document encoding, or NULL
12523 * @options: a combination of xmlParserOption(s)
12524 *
12525 * parse an XML document from I/O functions and source and build a tree.
12526 * This reuses the existing @ctxt parser context
12527 *
12528 * Returns the resulting document tree
12529 */
12530xmlDocPtr
12531xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12532 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012533 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012534 const char *encoding, int options)
12535{
12536 xmlParserInputBufferPtr input;
12537 xmlParserInputPtr stream;
12538
12539 if (ioread == NULL)
12540 return (NULL);
12541 if (ctxt == NULL)
12542 return (NULL);
12543
12544 xmlCtxtReset(ctxt);
12545
12546 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12547 XML_CHAR_ENCODING_NONE);
12548 if (input == NULL)
12549 return (NULL);
12550 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12551 if (stream == NULL) {
12552 xmlFreeParserInputBuffer(input);
12553 return (NULL);
12554 }
12555 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012556 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012557}