blob: 9db664fd7255307109b45714a6fe15d5afc4d186 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
86/************************************************************************
87 * *
88 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
89 * *
90 ************************************************************************/
91
92#define XML_PARSER_BIG_ENTITY 1000
93#define XML_PARSER_LOT_ENTITY 5000
94
95/*
96 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
97 * replacement over the size in byte of the input indicates that you have
98 * and eponential behaviour. A value of 10 correspond to at least 3 entity
99 * replacement per byte of input.
100 */
101#define XML_PARSER_NON_LINEAR 10
102
103/*
104 * xmlParserEntityCheck
105 *
106 * Function to check non-linear entity expansion behaviour
107 * This is here to detect and stop exponential linear entity expansion
108 * This is not a limitation of the parser but a safety
109 * boundary feature. It can be disabled with the XML_PARSE_HUGE
110 * parser option.
111 */
112static int
113xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
114 xmlEntityPtr ent)
115{
Daniel Veillardcba68392008-08-29 12:43:40 +0000116 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000117
118 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
119 return (0);
120 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
121 return (1);
122 if (size != 0) {
123 /*
124 * Do the check based on the replacement size of the entity
125 */
126 if (size < XML_PARSER_BIG_ENTITY)
127 return(0);
128
129 /*
130 * A limit on the amount of text data reasonably used
131 */
132 if (ctxt->input != NULL) {
133 consumed = ctxt->input->consumed +
134 (ctxt->input->cur - ctxt->input->base);
135 }
136 consumed += ctxt->sizeentities;
137
138 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
139 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
140 return (0);
141 } else if (ent != NULL) {
142 /*
143 * use the number of parsed entities in the replacement
144 */
145 size = ent->checked;
146
147 /*
148 * The amount of data parsed counting entities size only once
149 */
150 if (ctxt->input != NULL) {
151 consumed = ctxt->input->consumed +
152 (ctxt->input->cur - ctxt->input->base);
153 }
154 consumed += ctxt->sizeentities;
155
156 /*
157 * Check the density of entities for the amount of data
158 * knowing an entity reference will take at least 3 bytes
159 */
160 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
161 return (0);
162 } else {
163 /*
164 * strange we got no data for checking just return
165 */
166 return (0);
167 }
168
169 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
170 return (1);
171}
172
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000173/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000174 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000175 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000176 * arbitrary depth limit for the XML documents that we allow to
177 * process. This is not a limitation of the parser but a safety
178 * boundary feature. It can be disabled with the XML_PARSE_HUGE
179 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000180 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000181unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000182
Daniel Veillard0fb18932003-09-07 09:14:37 +0000183
Daniel Veillard0161e632008-08-28 15:36:32 +0000184
185#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000186#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000187#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000188#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
189
Owen Taylor3473f882001-02-23 17:55:21 +0000190/*
Owen Taylor3473f882001-02-23 17:55:21 +0000191 * List of XML prefixed PI allowed by W3C specs
192 */
193
Daniel Veillardb44025c2001-10-11 22:55:55 +0000194static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000195 "xml-stylesheet",
196 NULL
197};
198
Daniel Veillarda07050d2003-10-19 14:46:32 +0000199
Owen Taylor3473f882001-02-23 17:55:21 +0000200/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000201xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
202 const xmlChar **str);
203
Daniel Veillard7d515752003-09-26 19:12:37 +0000204static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000205xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
206 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000207 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000208 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard37334572008-07-31 08:20:02 +0000210static int
211xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
212 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000213#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000214static void
215xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
216 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000218
Daniel Veillard7d515752003-09-26 19:12:37 +0000219static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000220xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
221 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000222
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000223static int
224xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
225
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226/************************************************************************
227 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000228 * Some factorized error routines *
229 * *
230 ************************************************************************/
231
232/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000233 * xmlErrAttributeDup:
234 * @ctxt: an XML parser context
235 * @prefix: the attribute prefix
236 * @localname: the attribute localname
237 *
238 * Handle a redefinition of attribute error
239 */
240static void
241xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
242 const xmlChar * localname)
243{
Daniel Veillard157fee02003-10-31 10:36:03 +0000244 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
245 (ctxt->instate == XML_PARSER_EOF))
246 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000247 if (ctxt != NULL)
248 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000249 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000250 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
252 (const char *) localname, NULL, NULL, 0, 0,
253 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000254 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000256 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
257 (const char *) prefix, (const char *) localname,
258 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
259 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000260 if (ctxt != NULL) {
261 ctxt->wellFormed = 0;
262 if (ctxt->recovery == 0)
263 ctxt->disableSAX = 1;
264 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265}
266
267/**
268 * xmlFatalErr:
269 * @ctxt: an XML parser context
270 * @error: the error number
271 * @extra: extra information string
272 *
273 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
274 */
275static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000276xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277{
278 const char *errmsg;
279
Daniel Veillard157fee02003-10-31 10:36:03 +0000280 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
281 (ctxt->instate == XML_PARSER_EOF))
282 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 switch (error) {
284 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000285 errmsg = "CharRef: invalid hexadecimal value\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "CharRef: invalid decimal value\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "CharRef: invalid value\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "internal error";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "PEReference at end of document\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "PEReference in prolog\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "PEReference in epilog\n";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "PEReference: no name\n";
307 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000309 errmsg = "PEReference: expecting ';'\n";
310 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000311 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000312 errmsg = "Detected an entity reference loop\n";
313 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000314 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000315 errmsg = "EntityValue: \" or ' expected\n";
316 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000318 errmsg = "PEReferences forbidden in internal subset\n";
319 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000321 errmsg = "EntityValue: \" or ' expected\n";
322 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000323 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000324 errmsg = "AttValue: \" or ' expected\n";
325 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000326 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 errmsg = "Unescaped '<' not allowed in attributes values\n";
328 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000329 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 errmsg = "SystemLiteral \" or ' expected\n";
331 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000332 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 errmsg = "Unfinished System or Public ID \" or ' expected\n";
334 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000335 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000336 errmsg = "Sequence ']]>' not allowed in content\n";
337 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000338 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
340 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000341 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 errmsg = "PUBLIC, the Public Identifier is missing\n";
343 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000344 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 errmsg = "Comment must not contain '--' (double-hyphen)\n";
346 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000347 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 errmsg = "xmlParsePI : no target name\n";
349 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000350 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 errmsg = "Invalid PI name\n";
352 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000353 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 errmsg = "NOTATION: Name expected here\n";
355 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 errmsg = "'>' required to close NOTATION declaration\n";
358 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000359 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 errmsg = "Entity value required\n";
361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 errmsg = "Fragment not allowed";
364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 errmsg = "'(' required to start ATTLIST enumeration\n";
367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 errmsg = "NmToken expected in ATTLIST enumeration\n";
370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 errmsg = "')' required to finish ATTLIST enumeration\n";
373 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000374 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
379 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 errmsg = "ContentDecl : Name or '(' expected\n";
382 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
385 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000386 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 errmsg =
388 "PEReference: forbidden within markup decl in internal subset\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg = "expected '>'\n";
392 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000393 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 errmsg = "XML conditional section '[' expected\n";
395 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 errmsg = "Content error in the external subset\n";
398 break;
399 case XML_ERR_CONDSEC_INVALID_KEYWORD:
400 errmsg =
401 "conditional section INCLUDE or IGNORE keyword expected\n";
402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "XML conditional section not closed\n";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 errmsg = "Text declaration '<?xml' required\n";
408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000410 errmsg = "parsing XML declaration: '?>' expected\n";
411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000413 errmsg = "external parsed entities cannot be standalone\n";
414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000416 errmsg = "EntityRef: expecting ';'\n";
417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000419 errmsg = "DOCTYPE improperly terminated\n";
420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000422 errmsg = "EndTag: '</' not found\n";
423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000425 errmsg = "expected '='\n";
426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg = "String not closed expecting \" or '\n";
429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000431 errmsg = "String not started expecting ' or \"\n";
432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000434 errmsg = "Invalid XML encoding name\n";
435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000437 errmsg = "standalone accepts only 'yes' or 'no'\n";
438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000440 errmsg = "Document is empty\n";
441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 errmsg = "Extra content at the end of the document\n";
444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 errmsg = "chunk is not well balanced\n";
447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 errmsg = "extra content at the end of well balanced chunk\n";
450 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000451 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 errmsg = "Malformed declaration expecting version\n";
453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 case:
456 errmsg = "\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 default:
460 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000461 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000462 if (ctxt != NULL)
463 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000464 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
466 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000467 if (ctxt != NULL) {
468 ctxt->wellFormed = 0;
469 if (ctxt->recovery == 0)
470 ctxt->disableSAX = 1;
471 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000472}
473
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474/**
475 * xmlFatalErrMsg:
476 * @ctxt: an XML parser context
477 * @error: the error number
478 * @msg: the error message
479 *
480 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
481 */
482static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
484 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000485{
Daniel Veillard157fee02003-10-31 10:36:03 +0000486 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
487 (ctxt->instate == XML_PARSER_EOF))
488 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL)
490 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000491 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000493 if (ctxt != NULL) {
494 ctxt->wellFormed = 0;
495 if (ctxt->recovery == 0)
496 ctxt->disableSAX = 1;
497 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000498}
499
500/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000501 * xmlWarningMsg:
502 * @ctxt: an XML parser context
503 * @error: the error number
504 * @msg: the error message
505 * @str1: extra data
506 * @str2: extra data
507 *
508 * Handle a warning.
509 */
510static void
511xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, const xmlChar *str2)
513{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000514 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000515
Daniel Veillard157fee02003-10-31 10:36:03 +0000516 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
517 (ctxt->instate == XML_PARSER_EOF))
518 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000519 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
520 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000521 schannel = ctxt->sax->serror;
522 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 (ctxt->sax) ? ctxt->sax->warning : NULL,
524 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000525 ctxt, NULL, XML_FROM_PARSER, error,
526 XML_ERR_WARNING, NULL, 0,
527 (const char *) str1, (const char *) str2, NULL, 0, 0,
528 msg, (const char *) str1, (const char *) str2);
529}
530
531/**
532 * xmlValidityError:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @str1: extra data
537 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000538 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000539 */
540static void
541xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000542 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000543{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000544 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000545
546 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
547 (ctxt->instate == XML_PARSER_EOF))
548 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000549 if (ctxt != NULL) {
550 ctxt->errNo = error;
551 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
552 schannel = ctxt->sax->serror;
553 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000554 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000555 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000556 ctxt, NULL, XML_FROM_DTD, error,
557 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000558 (const char *) str2, NULL, 0, 0,
559 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000560 if (ctxt != NULL) {
561 ctxt->valid = 0;
562 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000563}
564
565/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000566 * xmlFatalErrMsgInt:
567 * @ctxt: an XML parser context
568 * @error: the error number
569 * @msg: the error message
570 * @val: an integer value
571 *
572 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
573 */
574static void
575xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000577{
Daniel Veillard157fee02003-10-31 10:36:03 +0000578 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
579 (ctxt->instate == XML_PARSER_EOF))
580 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000581 if (ctxt != NULL)
582 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000583 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000584 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
585 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000586 if (ctxt != NULL) {
587 ctxt->wellFormed = 0;
588 if (ctxt->recovery == 0)
589 ctxt->disableSAX = 1;
590 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000591}
592
593/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000594 * xmlFatalErrMsgStrIntStr:
595 * @ctxt: an XML parser context
596 * @error: the error number
597 * @msg: the error message
598 * @str1: an string info
599 * @val: an integer value
600 * @str2: an string info
601 *
602 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
603 */
604static void
605xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606 const char *msg, const xmlChar *str1, int val,
607 const xmlChar *str2)
608{
Daniel Veillard157fee02003-10-31 10:36:03 +0000609 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
610 (ctxt->instate == XML_PARSER_EOF))
611 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000612 if (ctxt != NULL)
613 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000614 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000615 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
616 NULL, 0, (const char *) str1, (const char *) str2,
617 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000618 if (ctxt != NULL) {
619 ctxt->wellFormed = 0;
620 if (ctxt->recovery == 0)
621 ctxt->disableSAX = 1;
622 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000623}
624
625/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000626 * xmlFatalErrMsgStr:
627 * @ctxt: an XML parser context
628 * @error: the error number
629 * @msg: the error message
630 * @val: a string value
631 *
632 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
633 */
634static void
635xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000636 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000637{
Daniel Veillard157fee02003-10-31 10:36:03 +0000638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000641 if (ctxt != NULL)
642 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000643 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000644 XML_FROM_PARSER, error, XML_ERR_FATAL,
645 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
646 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000647 if (ctxt != NULL) {
648 ctxt->wellFormed = 0;
649 if (ctxt->recovery == 0)
650 ctxt->disableSAX = 1;
651 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000652}
653
654/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000655 * xmlErrMsgStr:
656 * @ctxt: an XML parser context
657 * @error: the error number
658 * @msg: the error message
659 * @val: a string value
660 *
661 * Handle a non fatal parser error
662 */
663static void
664xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
665 const char *msg, const xmlChar * val)
666{
Daniel Veillard157fee02003-10-31 10:36:03 +0000667 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
668 (ctxt->instate == XML_PARSER_EOF))
669 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000670 if (ctxt != NULL)
671 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000672 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000673 XML_FROM_PARSER, error, XML_ERR_ERROR,
674 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
675 val);
676}
677
678/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000679 * xmlNsErr:
680 * @ctxt: an XML parser context
681 * @error: the error number
682 * @msg: the message
683 * @info1: extra information string
684 * @info2: extra information string
685 *
686 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
687 */
688static void
689xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
690 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000691 const xmlChar * info1, const xmlChar * info2,
692 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000693{
Daniel Veillard157fee02003-10-31 10:36:03 +0000694 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
695 (ctxt->instate == XML_PARSER_EOF))
696 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000697 if (ctxt != NULL)
698 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000699 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000700 XML_ERR_ERROR, NULL, 0, (const char *) info1,
701 (const char *) info2, (const char *) info3, 0, 0, msg,
702 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000705}
706
Daniel Veillard37334572008-07-31 08:20:02 +0000707/**
708 * xmlNsWarn
709 * @ctxt: an XML parser context
710 * @error: the error number
711 * @msg: the message
712 * @info1: extra information string
713 * @info2: extra information string
714 *
715 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
716 */
717static void
718xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
719 const char *msg,
720 const xmlChar * info1, const xmlChar * info2,
721 const xmlChar * info3)
722{
723 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
724 (ctxt->instate == XML_PARSER_EOF))
725 return;
726 if (ctxt != NULL)
727 ctxt->errNo = error;
728 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
729 XML_ERR_WARNING, NULL, 0, (const char *) info1,
730 (const char *) info2, (const char *) info3, 0, 0, msg,
731 info1, info2, info3);
732}
733
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000734/************************************************************************
735 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736 * Library wide options *
737 * *
738 ************************************************************************/
739
740/**
741 * xmlHasFeature:
742 * @feature: the feature to be examined
743 *
744 * Examines if the library has been compiled with a given feature.
745 *
746 * Returns a non-zero value if the feature exist, otherwise zero.
747 * Returns zero (0) if the feature does not exist or an unknown
748 * unknown feature is requested, non-zero otherwise.
749 */
750int
751xmlHasFeature(xmlFeature feature)
752{
753 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_THREAD_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_TREE_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_OUTPUT_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef LIBXML_PUSH_ENABLED
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_READER_ENABLED
780 return(1);
781#else
782 return(0);
783#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000784 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000785#ifdef LIBXML_PATTERN_ENABLED
786 return(1);
787#else
788 return(0);
789#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000790 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000791#ifdef LIBXML_WRITER_ENABLED
792 return(1);
793#else
794 return(0);
795#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000796 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797#ifdef LIBXML_SAX1_ENABLED
798 return(1);
799#else
800 return(0);
801#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000802 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000803#ifdef LIBXML_FTP_ENABLED
804 return(1);
805#else
806 return(0);
807#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000808 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000809#ifdef LIBXML_HTTP_ENABLED
810 return(1);
811#else
812 return(0);
813#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_VALID_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_HTML_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_LEGACY_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_C14N_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_CATALOG_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_XPATH_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_XPTR_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_XINCLUDE_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_ICONV_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_ISO8859X_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_UNICODE_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_REGEXP_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_AUTOMATA_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_EXPR_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_SCHEMAS_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_SCHEMATRON_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_MODULES_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_DEBUG_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef DEBUG_MEMORY_LOCATION
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_DEBUG_RUNTIME
930 return(1);
931#else
932 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000933#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000934 case XML_WITH_ZLIB:
935#ifdef LIBXML_ZLIB_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940 default:
941 break;
942 }
943 return(0);
944}
945
946/************************************************************************
947 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000948 * SAX2 defaulted attributes handling *
949 * *
950 ************************************************************************/
951
952/**
953 * xmlDetectSAX2:
954 * @ctxt: an XML parser context
955 *
956 * Do the SAX2 detection and specific intialization
957 */
958static void
959xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
960 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000961#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000962 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
963 ((ctxt->sax->startElementNs != NULL) ||
964 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000965#else
966 ctxt->sax2 = 1;
967#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000968
969 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
970 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
971 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000972 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
973 (ctxt->str_xml_ns == NULL)) {
974 xmlErrMemory(ctxt, NULL);
975 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000976}
977
Daniel Veillarde57ec792003-09-10 10:50:59 +0000978typedef struct _xmlDefAttrs xmlDefAttrs;
979typedef xmlDefAttrs *xmlDefAttrsPtr;
980struct _xmlDefAttrs {
981 int nbAttrs; /* number of defaulted attributes on that element */
982 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000983 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000984};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985
986/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000987 * xmlAttrNormalizeSpace:
988 * @src: the source string
989 * @dst: the target string
990 *
991 * Normalize the space in non CDATA attribute values:
992 * If the attribute type is not CDATA, then the XML processor MUST further
993 * process the normalized attribute value by discarding any leading and
994 * trailing space (#x20) characters, and by replacing sequences of space
995 * (#x20) characters by a single space (#x20) character.
996 * Note that the size of dst need to be at least src, and if one doesn't need
997 * to preserve dst (and it doesn't come from a dictionary or read-only) then
998 * passing src as dst is just fine.
999 *
1000 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1001 * is needed.
1002 */
1003static xmlChar *
1004xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1005{
1006 if ((src == NULL) || (dst == NULL))
1007 return(NULL);
1008
1009 while (*src == 0x20) src++;
1010 while (*src != 0) {
1011 if (*src == 0x20) {
1012 while (*src == 0x20) src++;
1013 if (*src != 0)
1014 *dst++ = 0x20;
1015 } else {
1016 *dst++ = *src++;
1017 }
1018 }
1019 *dst = 0;
1020 if (dst == src)
1021 return(NULL);
1022 return(dst);
1023}
1024
1025/**
1026 * xmlAttrNormalizeSpace2:
1027 * @src: the source string
1028 *
1029 * Normalize the space in non CDATA attribute values, a slightly more complex
1030 * front end to avoid allocation problems when running on attribute values
1031 * coming from the input.
1032 *
1033 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1034 * is needed.
1035 */
1036static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001037xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001038{
1039 int i;
1040 int remove_head = 0;
1041 int need_realloc = 0;
1042 const xmlChar *cur;
1043
1044 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1045 return(NULL);
1046 i = *len;
1047 if (i <= 0)
1048 return(NULL);
1049
1050 cur = src;
1051 while (*cur == 0x20) {
1052 cur++;
1053 remove_head++;
1054 }
1055 while (*cur != 0) {
1056 if (*cur == 0x20) {
1057 cur++;
1058 if ((*cur == 0x20) || (*cur == 0)) {
1059 need_realloc = 1;
1060 break;
1061 }
1062 } else
1063 cur++;
1064 }
1065 if (need_realloc) {
1066 xmlChar *ret;
1067
1068 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1069 if (ret == NULL) {
1070 xmlErrMemory(ctxt, NULL);
1071 return(NULL);
1072 }
1073 xmlAttrNormalizeSpace(ret, ret);
1074 *len = (int) strlen((const char *)ret);
1075 return(ret);
1076 } else if (remove_head) {
1077 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001078 memmove(src, src + remove_head, 1 + *len);
1079 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001080 }
1081 return(NULL);
1082}
1083
1084/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001085 * xmlAddDefAttrs:
1086 * @ctxt: an XML parser context
1087 * @fullname: the element fullname
1088 * @fullattr: the attribute fullname
1089 * @value: the attribute value
1090 *
1091 * Add a defaulted attribute for an element
1092 */
1093static void
1094xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1095 const xmlChar *fullname,
1096 const xmlChar *fullattr,
1097 const xmlChar *value) {
1098 xmlDefAttrsPtr defaults;
1099 int len;
1100 const xmlChar *name;
1101 const xmlChar *prefix;
1102
Daniel Veillard6a31b832008-03-26 14:06:44 +00001103 /*
1104 * Allows to detect attribute redefinitions
1105 */
1106 if (ctxt->attsSpecial != NULL) {
1107 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1108 return;
1109 }
1110
Daniel Veillarde57ec792003-09-10 10:50:59 +00001111 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001112 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001113 if (ctxt->attsDefault == NULL)
1114 goto mem_error;
1115 }
1116
1117 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001118 * split the element name into prefix:localname , the string found
1119 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 */
1121 name = xmlSplitQName3(fullname, &len);
1122 if (name == NULL) {
1123 name = xmlDictLookup(ctxt->dict, fullname, -1);
1124 prefix = NULL;
1125 } else {
1126 name = xmlDictLookup(ctxt->dict, name, -1);
1127 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1128 }
1129
1130 /*
1131 * make sure there is some storage
1132 */
1133 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1134 if (defaults == NULL) {
1135 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001136 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001137 if (defaults == NULL)
1138 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001139 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001140 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001141 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1142 defaults, NULL) < 0) {
1143 xmlFree(defaults);
1144 goto mem_error;
1145 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001146 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001147 xmlDefAttrsPtr temp;
1148
1149 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001151 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001152 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001153 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001155 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1156 defaults, NULL) < 0) {
1157 xmlFree(defaults);
1158 goto mem_error;
1159 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001160 }
1161
1162 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001163 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001164 * are within the DTD and hen not associated to namespace names.
1165 */
1166 name = xmlSplitQName3(fullattr, &len);
1167 if (name == NULL) {
1168 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1169 prefix = NULL;
1170 } else {
1171 name = xmlDictLookup(ctxt->dict, name, -1);
1172 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1173 }
1174
Daniel Veillardae0765b2008-07-31 19:54:59 +00001175 defaults->values[5 * defaults->nbAttrs] = name;
1176 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 /* intern the string and precompute the end */
1178 len = xmlStrlen(value);
1179 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001180 defaults->values[5 * defaults->nbAttrs + 2] = value;
1181 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1182 if (ctxt->external)
1183 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1184 else
1185 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001186 defaults->nbAttrs++;
1187
1188 return;
1189
1190mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001191 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 return;
1193}
1194
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001195/**
1196 * xmlAddSpecialAttr:
1197 * @ctxt: an XML parser context
1198 * @fullname: the element fullname
1199 * @fullattr: the attribute fullname
1200 * @type: the attribute type
1201 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001202 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001203 */
1204static void
1205xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1206 const xmlChar *fullname,
1207 const xmlChar *fullattr,
1208 int type)
1209{
1210 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001211 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001212 if (ctxt->attsSpecial == NULL)
1213 goto mem_error;
1214 }
1215
Daniel Veillardac4118d2008-01-11 05:27:32 +00001216 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1217 return;
1218
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001219 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1220 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001221 return;
1222
1223mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001224 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001225 return;
1226}
1227
Daniel Veillard4432df22003-09-28 18:58:27 +00001228/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001229 * xmlCleanSpecialAttrCallback:
1230 *
1231 * Removes CDATA attributes from the special attribute table
1232 */
1233static void
1234xmlCleanSpecialAttrCallback(void *payload, void *data,
1235 const xmlChar *fullname, const xmlChar *fullattr,
1236 const xmlChar *unused ATTRIBUTE_UNUSED) {
1237 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1238
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001239 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001240 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1241 }
1242}
1243
1244/**
1245 * xmlCleanSpecialAttr:
1246 * @ctxt: an XML parser context
1247 *
1248 * Trim the list of attributes defined to remove all those of type
1249 * CDATA as they are not special. This call should be done when finishing
1250 * to parse the DTD and before starting to parse the document root.
1251 */
1252static void
1253xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1254{
1255 if (ctxt->attsSpecial == NULL)
1256 return;
1257
1258 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1259
1260 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1261 xmlHashFree(ctxt->attsSpecial, NULL);
1262 ctxt->attsSpecial = NULL;
1263 }
1264 return;
1265}
1266
1267/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001268 * xmlCheckLanguageID:
1269 * @lang: pointer to the string value
1270 *
1271 * Checks that the value conforms to the LanguageID production:
1272 *
1273 * NOTE: this is somewhat deprecated, those productions were removed from
1274 * the XML Second edition.
1275 *
1276 * [33] LanguageID ::= Langcode ('-' Subcode)*
1277 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1278 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1279 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1280 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1281 * [38] Subcode ::= ([a-z] | [A-Z])+
1282 *
1283 * Returns 1 if correct 0 otherwise
1284 **/
1285int
1286xmlCheckLanguageID(const xmlChar * lang)
1287{
1288 const xmlChar *cur = lang;
1289
1290 if (cur == NULL)
1291 return (0);
1292 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1293 ((cur[0] == 'I') && (cur[1] == '-'))) {
1294 /*
1295 * IANA code
1296 */
1297 cur += 2;
1298 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1299 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1300 cur++;
1301 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1302 ((cur[0] == 'X') && (cur[1] == '-'))) {
1303 /*
1304 * User code
1305 */
1306 cur += 2;
1307 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1308 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1309 cur++;
1310 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1311 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1312 /*
1313 * ISO639
1314 */
1315 cur++;
1316 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1317 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1318 cur++;
1319 else
1320 return (0);
1321 } else
1322 return (0);
1323 while (cur[0] != 0) { /* non input consuming */
1324 if (cur[0] != '-')
1325 return (0);
1326 cur++;
1327 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1328 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1329 cur++;
1330 else
1331 return (0);
1332 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1333 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1334 cur++;
1335 }
1336 return (1);
1337}
1338
Owen Taylor3473f882001-02-23 17:55:21 +00001339/************************************************************************
1340 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001341 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001342 * *
1343 ************************************************************************/
1344
1345xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1346 const xmlChar ** str);
1347
Daniel Veillard0fb18932003-09-07 09:14:37 +00001348#ifdef SAX2
1349/**
1350 * nsPush:
1351 * @ctxt: an XML parser context
1352 * @prefix: the namespace prefix or NULL
1353 * @URL: the namespace name
1354 *
1355 * Pushes a new parser namespace on top of the ns stack
1356 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001357 * Returns -1 in case of error, -2 if the namespace should be discarded
1358 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001359 */
1360static int
1361nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1362{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001363 if (ctxt->options & XML_PARSE_NSCLEAN) {
1364 int i;
1365 for (i = 0;i < ctxt->nsNr;i += 2) {
1366 if (ctxt->nsTab[i] == prefix) {
1367 /* in scope */
1368 if (ctxt->nsTab[i + 1] == URL)
1369 return(-2);
1370 /* out of scope keep it */
1371 break;
1372 }
1373 }
1374 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001375 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1376 ctxt->nsMax = 10;
1377 ctxt->nsNr = 0;
1378 ctxt->nsTab = (const xmlChar **)
1379 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1380 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001381 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001382 ctxt->nsMax = 0;
1383 return (-1);
1384 }
1385 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001386 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001387 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001388 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1389 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1390 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001391 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001392 ctxt->nsMax /= 2;
1393 return (-1);
1394 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001395 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001396 }
1397 ctxt->nsTab[ctxt->nsNr++] = prefix;
1398 ctxt->nsTab[ctxt->nsNr++] = URL;
1399 return (ctxt->nsNr);
1400}
1401/**
1402 * nsPop:
1403 * @ctxt: an XML parser context
1404 * @nr: the number to pop
1405 *
1406 * Pops the top @nr parser prefix/namespace from the ns stack
1407 *
1408 * Returns the number of namespaces removed
1409 */
1410static int
1411nsPop(xmlParserCtxtPtr ctxt, int nr)
1412{
1413 int i;
1414
1415 if (ctxt->nsTab == NULL) return(0);
1416 if (ctxt->nsNr < nr) {
1417 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1418 nr = ctxt->nsNr;
1419 }
1420 if (ctxt->nsNr <= 0)
1421 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001422
Daniel Veillard0fb18932003-09-07 09:14:37 +00001423 for (i = 0;i < nr;i++) {
1424 ctxt->nsNr--;
1425 ctxt->nsTab[ctxt->nsNr] = NULL;
1426 }
1427 return(nr);
1428}
1429#endif
1430
1431static int
1432xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1433 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001434 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001435 int maxatts;
1436
1437 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001438 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001439 atts = (const xmlChar **)
1440 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001441 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001442 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001443 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1444 if (attallocs == NULL) goto mem_error;
1445 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001446 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001447 } else if (nr + 5 > ctxt->maxatts) {
1448 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001449 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1450 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001451 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001452 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001453 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1454 (maxatts / 5) * sizeof(int));
1455 if (attallocs == NULL) goto mem_error;
1456 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001457 ctxt->maxatts = maxatts;
1458 }
1459 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001460mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001461 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001462 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001463}
1464
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001465/**
1466 * inputPush:
1467 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001468 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001469 *
1470 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001471 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001472 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001473 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001474int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001475inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1476{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001477 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001478 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001479 if (ctxt->inputNr >= ctxt->inputMax) {
1480 ctxt->inputMax *= 2;
1481 ctxt->inputTab =
1482 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1483 ctxt->inputMax *
1484 sizeof(ctxt->inputTab[0]));
1485 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001486 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001487 xmlFreeInputStream(value);
1488 ctxt->inputMax /= 2;
1489 value = NULL;
1490 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001491 }
1492 }
1493 ctxt->inputTab[ctxt->inputNr] = value;
1494 ctxt->input = value;
1495 return (ctxt->inputNr++);
1496}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001497/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001498 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001499 * @ctxt: an XML parser context
1500 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001501 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001502 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001503 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001504 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001505xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001506inputPop(xmlParserCtxtPtr ctxt)
1507{
1508 xmlParserInputPtr ret;
1509
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001510 if (ctxt == NULL)
1511 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001512 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001513 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001514 ctxt->inputNr--;
1515 if (ctxt->inputNr > 0)
1516 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1517 else
1518 ctxt->input = NULL;
1519 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001520 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001521 return (ret);
1522}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001523/**
1524 * nodePush:
1525 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001526 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001527 *
1528 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001529 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001530 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001531 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001532int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001533nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1534{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001535 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001536 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001537 xmlNodePtr *tmp;
1538
1539 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1540 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001541 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001542 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001543 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001544 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001545 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001546 ctxt->nodeTab = tmp;
1547 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001548 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001549 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1550 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001551 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001552 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001553 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001554 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001555 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001556 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001557 ctxt->nodeTab[ctxt->nodeNr] = value;
1558 ctxt->node = value;
1559 return (ctxt->nodeNr++);
1560}
Daniel Veillard8915c152008-08-26 13:05:34 +00001561
Daniel Veillard1c732d22002-11-30 11:22:59 +00001562/**
1563 * nodePop:
1564 * @ctxt: an XML parser context
1565 *
1566 * Pops the top element node from the node stack
1567 *
1568 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001569 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001570xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001571nodePop(xmlParserCtxtPtr ctxt)
1572{
1573 xmlNodePtr ret;
1574
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001575 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001576 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001577 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 ctxt->nodeNr--;
1579 if (ctxt->nodeNr > 0)
1580 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1581 else
1582 ctxt->node = NULL;
1583 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001584 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001585 return (ret);
1586}
Daniel Veillarda2351322004-06-27 12:08:10 +00001587
1588#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001589/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001590 * nameNsPush:
1591 * @ctxt: an XML parser context
1592 * @value: the element name
1593 * @prefix: the element prefix
1594 * @URI: the element namespace name
1595 *
1596 * Pushes a new element name/prefix/URL on top of the name stack
1597 *
1598 * Returns -1 in case of error, the index in the stack otherwise
1599 */
1600static int
1601nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1602 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1603{
1604 if (ctxt->nameNr >= ctxt->nameMax) {
1605 const xmlChar * *tmp;
1606 void **tmp2;
1607 ctxt->nameMax *= 2;
1608 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1609 ctxt->nameMax *
1610 sizeof(ctxt->nameTab[0]));
1611 if (tmp == NULL) {
1612 ctxt->nameMax /= 2;
1613 goto mem_error;
1614 }
1615 ctxt->nameTab = tmp;
1616 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1617 ctxt->nameMax * 3 *
1618 sizeof(ctxt->pushTab[0]));
1619 if (tmp2 == NULL) {
1620 ctxt->nameMax /= 2;
1621 goto mem_error;
1622 }
1623 ctxt->pushTab = tmp2;
1624 }
1625 ctxt->nameTab[ctxt->nameNr] = value;
1626 ctxt->name = value;
1627 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1628 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001629 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001630 return (ctxt->nameNr++);
1631mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001632 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001633 return (-1);
1634}
1635/**
1636 * nameNsPop:
1637 * @ctxt: an XML parser context
1638 *
1639 * Pops the top element/prefix/URI name from the name stack
1640 *
1641 * Returns the name just removed
1642 */
1643static const xmlChar *
1644nameNsPop(xmlParserCtxtPtr ctxt)
1645{
1646 const xmlChar *ret;
1647
1648 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001649 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001650 ctxt->nameNr--;
1651 if (ctxt->nameNr > 0)
1652 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1653 else
1654 ctxt->name = NULL;
1655 ret = ctxt->nameTab[ctxt->nameNr];
1656 ctxt->nameTab[ctxt->nameNr] = NULL;
1657 return (ret);
1658}
Daniel Veillarda2351322004-06-27 12:08:10 +00001659#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001660
1661/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001662 * namePush:
1663 * @ctxt: an XML parser context
1664 * @value: the element name
1665 *
1666 * Pushes a new element name on top of the name stack
1667 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001668 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001670int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001671namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001672{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001673 if (ctxt == NULL) return (-1);
1674
Daniel Veillard1c732d22002-11-30 11:22:59 +00001675 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001676 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001677 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 ctxt->nameMax *
1680 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001681 if (tmp == NULL) {
1682 ctxt->nameMax /= 2;
1683 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001684 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001685 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 }
1687 ctxt->nameTab[ctxt->nameNr] = value;
1688 ctxt->name = value;
1689 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001690mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001691 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001692 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001693}
1694/**
1695 * namePop:
1696 * @ctxt: an XML parser context
1697 *
1698 * Pops the top element name from the name stack
1699 *
1700 * Returns the name just removed
1701 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001702const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001703namePop(xmlParserCtxtPtr ctxt)
1704{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001705 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001707 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1708 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001709 ctxt->nameNr--;
1710 if (ctxt->nameNr > 0)
1711 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1712 else
1713 ctxt->name = NULL;
1714 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001715 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001716 return (ret);
1717}
Owen Taylor3473f882001-02-23 17:55:21 +00001718
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001719static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001720 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001721 int *tmp;
1722
Owen Taylor3473f882001-02-23 17:55:21 +00001723 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001724 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1725 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1726 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001727 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001728 ctxt->spaceMax /=2;
1729 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001730 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001731 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001732 }
1733 ctxt->spaceTab[ctxt->spaceNr] = val;
1734 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1735 return(ctxt->spaceNr++);
1736}
1737
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001738static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001739 int ret;
1740 if (ctxt->spaceNr <= 0) return(0);
1741 ctxt->spaceNr--;
1742 if (ctxt->spaceNr > 0)
1743 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1744 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001745 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001746 ret = ctxt->spaceTab[ctxt->spaceNr];
1747 ctxt->spaceTab[ctxt->spaceNr] = -1;
1748 return(ret);
1749}
1750
1751/*
1752 * Macros for accessing the content. Those should be used only by the parser,
1753 * and not exported.
1754 *
1755 * Dirty macros, i.e. one often need to make assumption on the context to
1756 * use them
1757 *
1758 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1759 * To be used with extreme caution since operations consuming
1760 * characters may move the input buffer to a different location !
1761 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1762 * This should be used internally by the parser
1763 * only to compare to ASCII values otherwise it would break when
1764 * running with UTF-8 encoding.
1765 * RAW same as CUR but in the input buffer, bypass any token
1766 * extraction that may have been done
1767 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1768 * to compare on ASCII based substring.
1769 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001770 * strings without newlines within the parser.
1771 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1772 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001773 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1774 *
1775 * NEXT Skip to the next character, this does the proper decoding
1776 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001777 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001778 * CUR_CHAR(l) returns the current unicode character (int), set l
1779 * to the number of xmlChars used for the encoding [0-5].
1780 * CUR_SCHAR same but operate on a string instead of the context
1781 * COPY_BUF copy the current unicode char to the target buffer, increment
1782 * the index
1783 * GROW, SHRINK handling of input buffers
1784 */
1785
Daniel Veillardfdc91562002-07-01 21:52:03 +00001786#define RAW (*ctxt->input->cur)
1787#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001788#define NXT(val) ctxt->input->cur[(val)]
1789#define CUR_PTR ctxt->input->cur
1790
Daniel Veillarda07050d2003-10-19 14:46:32 +00001791#define CMP4( s, c1, c2, c3, c4 ) \
1792 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1793 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1794#define CMP5( s, c1, c2, c3, c4, c5 ) \
1795 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1796#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1797 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1798#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1799 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1800#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1801 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1802#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1803 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1804 ((unsigned char *) s)[ 8 ] == c9 )
1805#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1806 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1807 ((unsigned char *) s)[ 9 ] == c10 )
1808
Owen Taylor3473f882001-02-23 17:55:21 +00001809#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001810 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001811 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001812 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001813 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1814 xmlPopInput(ctxt); \
1815 } while (0)
1816
Daniel Veillard0b787f32004-03-26 17:29:53 +00001817#define SKIPL(val) do { \
1818 int skipl; \
1819 for(skipl=0; skipl<val; skipl++) { \
1820 if (*(ctxt->input->cur) == '\n') { \
1821 ctxt->input->line++; ctxt->input->col = 1; \
1822 } else ctxt->input->col++; \
1823 ctxt->nbChars++; \
1824 ctxt->input->cur++; \
1825 } \
1826 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1827 if ((*ctxt->input->cur == 0) && \
1828 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1829 xmlPopInput(ctxt); \
1830 } while (0)
1831
Daniel Veillarda880b122003-04-21 21:36:41 +00001832#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001833 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1834 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001835 xmlSHRINK (ctxt);
1836
1837static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1838 xmlParserInputShrink(ctxt->input);
1839 if ((*ctxt->input->cur == 0) &&
1840 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1841 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001842 }
Owen Taylor3473f882001-02-23 17:55:21 +00001843
Daniel Veillarda880b122003-04-21 21:36:41 +00001844#define GROW if ((ctxt->progressive == 0) && \
1845 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001846 xmlGROW (ctxt);
1847
1848static void xmlGROW (xmlParserCtxtPtr ctxt) {
1849 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1850 if ((*ctxt->input->cur == 0) &&
1851 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1852 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001853}
Owen Taylor3473f882001-02-23 17:55:21 +00001854
1855#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1856
1857#define NEXT xmlNextChar(ctxt)
1858
Daniel Veillard21a0f912001-02-25 19:54:14 +00001859#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001860 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001861 ctxt->input->cur++; \
1862 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001863 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001864 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1865 }
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867#define NEXTL(l) do { \
1868 if (*(ctxt->input->cur) == '\n') { \
1869 ctxt->input->line++; ctxt->input->col = 1; \
1870 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001871 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001872 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001873 } while (0)
1874
1875#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1876#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1877
1878#define COPY_BUF(l,b,i,v) \
1879 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001880 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001881
1882/**
1883 * xmlSkipBlankChars:
1884 * @ctxt: the XML parser context
1885 *
1886 * skip all blanks character found at that point in the input streams.
1887 * It pops up finished entities in the process if allowable at that point.
1888 *
1889 * Returns the number of space chars skipped
1890 */
1891
1892int
1893xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001894 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001895
1896 /*
1897 * It's Okay to use CUR/NEXT here since all the blanks are on
1898 * the ASCII range.
1899 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001900 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1901 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001902 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001903 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001904 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001905 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001906 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001907 if (*cur == '\n') {
1908 ctxt->input->line++; ctxt->input->col = 1;
1909 }
1910 cur++;
1911 res++;
1912 if (*cur == 0) {
1913 ctxt->input->cur = cur;
1914 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1915 cur = ctxt->input->cur;
1916 }
1917 }
1918 ctxt->input->cur = cur;
1919 } else {
1920 int cur;
1921 do {
1922 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001923 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001924 NEXT;
1925 cur = CUR;
1926 res++;
1927 }
1928 while ((cur == 0) && (ctxt->inputNr > 1) &&
1929 (ctxt->instate != XML_PARSER_COMMENT)) {
1930 xmlPopInput(ctxt);
1931 cur = CUR;
1932 }
1933 /*
1934 * Need to handle support of entities branching here
1935 */
1936 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1937 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1938 }
Owen Taylor3473f882001-02-23 17:55:21 +00001939 return(res);
1940}
1941
1942/************************************************************************
1943 * *
1944 * Commodity functions to handle entities *
1945 * *
1946 ************************************************************************/
1947
1948/**
1949 * xmlPopInput:
1950 * @ctxt: an XML parser context
1951 *
1952 * xmlPopInput: the current input pointed by ctxt->input came to an end
1953 * pop it and return the next char.
1954 *
1955 * Returns the current xmlChar in the parser context
1956 */
1957xmlChar
1958xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001959 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001960 if (xmlParserDebugEntities)
1961 xmlGenericError(xmlGenericErrorContext,
1962 "Popping input %d\n", ctxt->inputNr);
1963 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001964 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001965 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1966 return(xmlPopInput(ctxt));
1967 return(CUR);
1968}
1969
1970/**
1971 * xmlPushInput:
1972 * @ctxt: an XML parser context
1973 * @input: an XML parser input fragment (entity, XML fragment ...).
1974 *
1975 * xmlPushInput: switch to a new input stream which is stacked on top
1976 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001977 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001978 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001979int
Owen Taylor3473f882001-02-23 17:55:21 +00001980xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001981 int ret;
1982 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001983
1984 if (xmlParserDebugEntities) {
1985 if ((ctxt->input != NULL) && (ctxt->input->filename))
1986 xmlGenericError(xmlGenericErrorContext,
1987 "%s(%d): ", ctxt->input->filename,
1988 ctxt->input->line);
1989 xmlGenericError(xmlGenericErrorContext,
1990 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1991 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001992 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001994 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001995}
1996
1997/**
1998 * xmlParseCharRef:
1999 * @ctxt: an XML parser context
2000 *
2001 * parse Reference declarations
2002 *
2003 * [66] CharRef ::= '&#' [0-9]+ ';' |
2004 * '&#x' [0-9a-fA-F]+ ';'
2005 *
2006 * [ WFC: Legal Character ]
2007 * Characters referred to using character references must match the
2008 * production for Char.
2009 *
2010 * Returns the value parsed (as an int), 0 in case of error
2011 */
2012int
2013xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002014 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002015 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002016 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002017
Owen Taylor3473f882001-02-23 17:55:21 +00002018 /*
2019 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2020 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002021 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002022 (NXT(2) == 'x')) {
2023 SKIP(3);
2024 GROW;
2025 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002026 if (count++ > 20) {
2027 count = 0;
2028 GROW;
2029 }
2030 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002031 val = val * 16 + (CUR - '0');
2032 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2033 val = val * 16 + (CUR - 'a') + 10;
2034 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2035 val = val * 16 + (CUR - 'A') + 10;
2036 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002037 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002038 val = 0;
2039 break;
2040 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002041 if (val > 0x10FFFF)
2042 outofrange = val;
2043
Owen Taylor3473f882001-02-23 17:55:21 +00002044 NEXT;
2045 count++;
2046 }
2047 if (RAW == ';') {
2048 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002049 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002050 ctxt->nbChars ++;
2051 ctxt->input->cur++;
2052 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002053 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002054 SKIP(2);
2055 GROW;
2056 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002057 if (count++ > 20) {
2058 count = 0;
2059 GROW;
2060 }
2061 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002062 val = val * 10 + (CUR - '0');
2063 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002064 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002065 val = 0;
2066 break;
2067 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002068 if (val > 0x10FFFF)
2069 outofrange = val;
2070
Owen Taylor3473f882001-02-23 17:55:21 +00002071 NEXT;
2072 count++;
2073 }
2074 if (RAW == ';') {
2075 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002076 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002077 ctxt->nbChars ++;
2078 ctxt->input->cur++;
2079 }
2080 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002081 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002082 }
2083
2084 /*
2085 * [ WFC: Legal Character ]
2086 * Characters referred to using character references must match the
2087 * production for Char.
2088 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002089 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 return(val);
2091 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002092 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2093 "xmlParseCharRef: invalid xmlChar value %d\n",
2094 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002095 }
2096 return(0);
2097}
2098
2099/**
2100 * xmlParseStringCharRef:
2101 * @ctxt: an XML parser context
2102 * @str: a pointer to an index in the string
2103 *
2104 * parse Reference declarations, variant parsing from a string rather
2105 * than an an input flow.
2106 *
2107 * [66] CharRef ::= '&#' [0-9]+ ';' |
2108 * '&#x' [0-9a-fA-F]+ ';'
2109 *
2110 * [ WFC: Legal Character ]
2111 * Characters referred to using character references must match the
2112 * production for Char.
2113 *
2114 * Returns the value parsed (as an int), 0 in case of error, str will be
2115 * updated to the current value of the index
2116 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002117static int
Owen Taylor3473f882001-02-23 17:55:21 +00002118xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2119 const xmlChar *ptr;
2120 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002121 unsigned int val = 0;
2122 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002123
2124 if ((str == NULL) || (*str == NULL)) return(0);
2125 ptr = *str;
2126 cur = *ptr;
2127 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2128 ptr += 3;
2129 cur = *ptr;
2130 while (cur != ';') { /* Non input consuming loop */
2131 if ((cur >= '0') && (cur <= '9'))
2132 val = val * 16 + (cur - '0');
2133 else if ((cur >= 'a') && (cur <= 'f'))
2134 val = val * 16 + (cur - 'a') + 10;
2135 else if ((cur >= 'A') && (cur <= 'F'))
2136 val = val * 16 + (cur - 'A') + 10;
2137 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002138 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002139 val = 0;
2140 break;
2141 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002142 if (val > 0x10FFFF)
2143 outofrange = val;
2144
Owen Taylor3473f882001-02-23 17:55:21 +00002145 ptr++;
2146 cur = *ptr;
2147 }
2148 if (cur == ';')
2149 ptr++;
2150 } else if ((cur == '&') && (ptr[1] == '#')){
2151 ptr += 2;
2152 cur = *ptr;
2153 while (cur != ';') { /* Non input consuming loops */
2154 if ((cur >= '0') && (cur <= '9'))
2155 val = val * 10 + (cur - '0');
2156 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002157 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002158 val = 0;
2159 break;
2160 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002161 if (val > 0x10FFFF)
2162 outofrange = val;
2163
Owen Taylor3473f882001-02-23 17:55:21 +00002164 ptr++;
2165 cur = *ptr;
2166 }
2167 if (cur == ';')
2168 ptr++;
2169 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002170 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002171 return(0);
2172 }
2173 *str = ptr;
2174
2175 /*
2176 * [ WFC: Legal Character ]
2177 * Characters referred to using character references must match the
2178 * production for Char.
2179 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002180 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002181 return(val);
2182 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002183 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2184 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2185 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002186 }
2187 return(0);
2188}
2189
2190/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002191 * xmlNewBlanksWrapperInputStream:
2192 * @ctxt: an XML parser context
2193 * @entity: an Entity pointer
2194 *
2195 * Create a new input stream for wrapping
2196 * blanks around a PEReference
2197 *
2198 * Returns the new input stream or NULL
2199 */
2200
2201static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2202
Daniel Veillardf4862f02002-09-10 11:13:43 +00002203static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002204xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2205 xmlParserInputPtr input;
2206 xmlChar *buffer;
2207 size_t length;
2208 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002209 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2210 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002211 return(NULL);
2212 }
2213 if (xmlParserDebugEntities)
2214 xmlGenericError(xmlGenericErrorContext,
2215 "new blanks wrapper for entity: %s\n", entity->name);
2216 input = xmlNewInputStream(ctxt);
2217 if (input == NULL) {
2218 return(NULL);
2219 }
2220 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002221 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002222 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002223 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002224 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002225 return(NULL);
2226 }
2227 buffer [0] = ' ';
2228 buffer [1] = '%';
2229 buffer [length-3] = ';';
2230 buffer [length-2] = ' ';
2231 buffer [length-1] = 0;
2232 memcpy(buffer + 2, entity->name, length - 5);
2233 input->free = deallocblankswrapper;
2234 input->base = buffer;
2235 input->cur = buffer;
2236 input->length = length;
2237 input->end = &buffer[length];
2238 return(input);
2239}
2240
2241/**
Owen Taylor3473f882001-02-23 17:55:21 +00002242 * xmlParserHandlePEReference:
2243 * @ctxt: the parser context
2244 *
2245 * [69] PEReference ::= '%' Name ';'
2246 *
2247 * [ WFC: No Recursion ]
2248 * A parsed entity must not contain a recursive
2249 * reference to itself, either directly or indirectly.
2250 *
2251 * [ WFC: Entity Declared ]
2252 * In a document without any DTD, a document with only an internal DTD
2253 * subset which contains no parameter entity references, or a document
2254 * with "standalone='yes'", ... ... The declaration of a parameter
2255 * entity must precede any reference to it...
2256 *
2257 * [ VC: Entity Declared ]
2258 * In a document with an external subset or external parameter entities
2259 * with "standalone='no'", ... ... The declaration of a parameter entity
2260 * must precede any reference to it...
2261 *
2262 * [ WFC: In DTD ]
2263 * Parameter-entity references may only appear in the DTD.
2264 * NOTE: misleading but this is handled.
2265 *
2266 * A PEReference may have been detected in the current input stream
2267 * the handling is done accordingly to
2268 * http://www.w3.org/TR/REC-xml#entproc
2269 * i.e.
2270 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002271 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002272 */
2273void
2274xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002275 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002276 xmlEntityPtr entity = NULL;
2277 xmlParserInputPtr input;
2278
Owen Taylor3473f882001-02-23 17:55:21 +00002279 if (RAW != '%') return;
2280 switch(ctxt->instate) {
2281 case XML_PARSER_CDATA_SECTION:
2282 return;
2283 case XML_PARSER_COMMENT:
2284 return;
2285 case XML_PARSER_START_TAG:
2286 return;
2287 case XML_PARSER_END_TAG:
2288 return;
2289 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002290 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002291 return;
2292 case XML_PARSER_PROLOG:
2293 case XML_PARSER_START:
2294 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002295 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002296 return;
2297 case XML_PARSER_ENTITY_DECL:
2298 case XML_PARSER_CONTENT:
2299 case XML_PARSER_ATTRIBUTE_VALUE:
2300 case XML_PARSER_PI:
2301 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002302 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002303 /* we just ignore it there */
2304 return;
2305 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002306 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002307 return;
2308 case XML_PARSER_ENTITY_VALUE:
2309 /*
2310 * NOTE: in the case of entity values, we don't do the
2311 * substitution here since we need the literal
2312 * entity value to be able to save the internal
2313 * subset of the document.
2314 * This will be handled by xmlStringDecodeEntities
2315 */
2316 return;
2317 case XML_PARSER_DTD:
2318 /*
2319 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2320 * In the internal DTD subset, parameter-entity references
2321 * can occur only where markup declarations can occur, not
2322 * within markup declarations.
2323 * In that case this is handled in xmlParseMarkupDecl
2324 */
2325 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2326 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002327 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002328 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002329 break;
2330 case XML_PARSER_IGNORE:
2331 return;
2332 }
2333
2334 NEXT;
2335 name = xmlParseName(ctxt);
2336 if (xmlParserDebugEntities)
2337 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002338 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002339 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002340 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 } else {
2342 if (RAW == ';') {
2343 NEXT;
2344 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2345 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2346 if (entity == NULL) {
2347
2348 /*
2349 * [ WFC: Entity Declared ]
2350 * In a document without any DTD, a document with only an
2351 * internal DTD subset which contains no parameter entity
2352 * references, or a document with "standalone='yes'", ...
2353 * ... The declaration of a parameter entity must precede
2354 * any reference to it...
2355 */
2356 if ((ctxt->standalone == 1) ||
2357 ((ctxt->hasExternalSubset == 0) &&
2358 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002359 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002360 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002361 } else {
2362 /*
2363 * [ VC: Entity Declared ]
2364 * In a document with an external subset or external
2365 * parameter entities with "standalone='no'", ...
2366 * ... The declaration of a parameter entity must precede
2367 * any reference to it...
2368 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002369 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2370 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2371 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002372 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002373 } else
2374 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2375 "PEReference: %%%s; not found\n",
2376 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002377 ctxt->valid = 0;
2378 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002379 } else if (ctxt->input->free != deallocblankswrapper) {
2380 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002381 if (xmlPushInput(ctxt, input) < 0)
2382 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002383 } else {
2384 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2385 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002386 xmlChar start[4];
2387 xmlCharEncoding enc;
2388
Owen Taylor3473f882001-02-23 17:55:21 +00002389 /*
2390 * handle the extra spaces added before and after
2391 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002392 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002393 */
2394 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002395 if (xmlPushInput(ctxt, input) < 0)
2396 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002397
2398 /*
2399 * Get the 4 first bytes and decode the charset
2400 * if enc != XML_CHAR_ENCODING_NONE
2401 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002402 * Note that, since we may have some non-UTF8
2403 * encoding (like UTF16, bug 135229), the 'length'
2404 * is not known, but we can calculate based upon
2405 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002406 */
2407 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002408 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002409 start[0] = RAW;
2410 start[1] = NXT(1);
2411 start[2] = NXT(2);
2412 start[3] = NXT(3);
2413 enc = xmlDetectCharEncoding(start, 4);
2414 if (enc != XML_CHAR_ENCODING_NONE) {
2415 xmlSwitchEncoding(ctxt, enc);
2416 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002417 }
2418
Owen Taylor3473f882001-02-23 17:55:21 +00002419 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002420 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2421 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002422 xmlParseTextDecl(ctxt);
2423 }
Owen Taylor3473f882001-02-23 17:55:21 +00002424 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002425 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2426 "PEReference: %s is not a parameter entity\n",
2427 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002428 }
2429 }
2430 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002431 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002432 }
Owen Taylor3473f882001-02-23 17:55:21 +00002433 }
2434}
2435
2436/*
2437 * Macro used to grow the current buffer.
2438 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002439#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002440 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002441 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002442 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002443 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002444 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002445 if (tmp == NULL) goto mem_error; \
2446 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002447}
2448
2449/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002450 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002451 * @ctxt: the parser context
2452 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002453 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002454 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2455 * @end: an end marker xmlChar, 0 if none
2456 * @end2: an end marker xmlChar, 0 if none
2457 * @end3: an end marker xmlChar, 0 if none
2458 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002459 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002460 *
2461 * [67] Reference ::= EntityRef | CharRef
2462 *
2463 * [69] PEReference ::= '%' Name ';'
2464 *
2465 * Returns A newly allocated string with the substitution done. The caller
2466 * must deallocate it !
2467 */
2468xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002469xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2470 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002471 xmlChar *buffer = NULL;
2472 int buffer_size = 0;
2473
2474 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002475 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002476 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002477 xmlEntityPtr ent;
2478 int c,l;
2479 int nbchars = 0;
2480
Daniel Veillarda82b1822004-11-08 16:24:57 +00002481 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002482 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002483 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002484
Daniel Veillard0161e632008-08-28 15:36:32 +00002485 if (((ctxt->depth > 40) &&
2486 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2487 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002488 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002489 return(NULL);
2490 }
2491
2492 /*
2493 * allocate a translation buffer.
2494 */
2495 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002496 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002497 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002498
2499 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002500 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002501 * we are operating on already parsed values.
2502 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002503 if (str < last)
2504 c = CUR_SCHAR(str, l);
2505 else
2506 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002507 while ((c != 0) && (c != end) && /* non input consuming loop */
2508 (c != end2) && (c != end3)) {
2509
2510 if (c == 0) break;
2511 if ((c == '&') && (str[1] == '#')) {
2512 int val = xmlParseStringCharRef(ctxt, &str);
2513 if (val != 0) {
2514 COPY_BUF(0,buffer,nbchars,val);
2515 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002516 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002517 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002518 }
Owen Taylor3473f882001-02-23 17:55:21 +00002519 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2520 if (xmlParserDebugEntities)
2521 xmlGenericError(xmlGenericErrorContext,
2522 "String decoding Entity Reference: %.30s\n",
2523 str);
2524 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002525 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2526 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002527 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002528 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002529 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002530 if ((ent != NULL) &&
2531 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2532 if (ent->content != NULL) {
2533 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002534 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002535 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002536 }
Owen Taylor3473f882001-02-23 17:55:21 +00002537 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002538 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2539 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002540 }
2541 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002542 ctxt->depth++;
2543 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2544 0, 0, 0);
2545 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002546
Owen Taylor3473f882001-02-23 17:55:21 +00002547 if (rep != NULL) {
2548 current = rep;
2549 while (*current != 0) { /* non input consuming loop */
2550 buffer[nbchars++] = *current++;
2551 if (nbchars >
2552 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002553 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2554 goto int_error;
2555 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002556 }
2557 }
2558 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002559 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002560 }
2561 } else if (ent != NULL) {
2562 int i = xmlStrlen(ent->name);
2563 const xmlChar *cur = ent->name;
2564
2565 buffer[nbchars++] = '&';
2566 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002567 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002568 }
2569 for (;i > 0;i--)
2570 buffer[nbchars++] = *cur++;
2571 buffer[nbchars++] = ';';
2572 }
2573 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2574 if (xmlParserDebugEntities)
2575 xmlGenericError(xmlGenericErrorContext,
2576 "String decoding PE Reference: %.30s\n", str);
2577 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002578 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2579 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002580 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002581 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002582 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002583 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002584 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002585 }
Owen Taylor3473f882001-02-23 17:55:21 +00002586 ctxt->depth++;
2587 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2588 0, 0, 0);
2589 ctxt->depth--;
2590 if (rep != NULL) {
2591 current = rep;
2592 while (*current != 0) { /* non input consuming loop */
2593 buffer[nbchars++] = *current++;
2594 if (nbchars >
2595 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002596 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2597 goto int_error;
2598 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002599 }
2600 }
2601 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002602 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002603 }
2604 }
2605 } else {
2606 COPY_BUF(l,buffer,nbchars,c);
2607 str += l;
2608 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002609 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002610 }
2611 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002612 if (str < last)
2613 c = CUR_SCHAR(str, l);
2614 else
2615 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002616 }
2617 buffer[nbchars++] = 0;
2618 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002619
2620mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002621 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002622int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002623 if (rep != NULL)
2624 xmlFree(rep);
2625 if (buffer != NULL)
2626 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002627 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002628}
2629
Daniel Veillarde57ec792003-09-10 10:50:59 +00002630/**
2631 * xmlStringDecodeEntities:
2632 * @ctxt: the parser context
2633 * @str: the input string
2634 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2635 * @end: an end marker xmlChar, 0 if none
2636 * @end2: an end marker xmlChar, 0 if none
2637 * @end3: an end marker xmlChar, 0 if none
2638 *
2639 * Takes a entity string content and process to do the adequate substitutions.
2640 *
2641 * [67] Reference ::= EntityRef | CharRef
2642 *
2643 * [69] PEReference ::= '%' Name ';'
2644 *
2645 * Returns A newly allocated string with the substitution done. The caller
2646 * must deallocate it !
2647 */
2648xmlChar *
2649xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2650 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002651 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002652 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2653 end, end2, end3));
2654}
Owen Taylor3473f882001-02-23 17:55:21 +00002655
2656/************************************************************************
2657 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002658 * Commodity functions, cleanup needed ? *
2659 * *
2660 ************************************************************************/
2661
2662/**
2663 * areBlanks:
2664 * @ctxt: an XML parser context
2665 * @str: a xmlChar *
2666 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002667 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002668 *
2669 * Is this a sequence of blank chars that one can ignore ?
2670 *
2671 * Returns 1 if ignorable 0 otherwise.
2672 */
2673
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002674static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2675 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002676 int i, ret;
2677 xmlNodePtr lastChild;
2678
Daniel Veillard05c13a22001-09-09 08:38:09 +00002679 /*
2680 * Don't spend time trying to differentiate them, the same callback is
2681 * used !
2682 */
2683 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002684 return(0);
2685
Owen Taylor3473f882001-02-23 17:55:21 +00002686 /*
2687 * Check for xml:space value.
2688 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002689 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2690 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002691 return(0);
2692
2693 /*
2694 * Check that the string is made of blanks
2695 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002696 if (blank_chars == 0) {
2697 for (i = 0;i < len;i++)
2698 if (!(IS_BLANK_CH(str[i]))) return(0);
2699 }
Owen Taylor3473f882001-02-23 17:55:21 +00002700
2701 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002702 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002703 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002704 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002705 if (ctxt->myDoc != NULL) {
2706 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2707 if (ret == 0) return(1);
2708 if (ret == 1) return(0);
2709 }
2710
2711 /*
2712 * Otherwise, heuristic :-\
2713 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002714 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002715 if ((ctxt->node->children == NULL) &&
2716 (RAW == '<') && (NXT(1) == '/')) return(0);
2717
2718 lastChild = xmlGetLastChild(ctxt->node);
2719 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002720 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2721 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002722 } else if (xmlNodeIsText(lastChild))
2723 return(0);
2724 else if ((ctxt->node->children != NULL) &&
2725 (xmlNodeIsText(ctxt->node->children)))
2726 return(0);
2727 return(1);
2728}
2729
Owen Taylor3473f882001-02-23 17:55:21 +00002730/************************************************************************
2731 * *
2732 * Extra stuff for namespace support *
2733 * Relates to http://www.w3.org/TR/WD-xml-names *
2734 * *
2735 ************************************************************************/
2736
2737/**
2738 * xmlSplitQName:
2739 * @ctxt: an XML parser context
2740 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002741 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002742 *
2743 * parse an UTF8 encoded XML qualified name string
2744 *
2745 * [NS 5] QName ::= (Prefix ':')? LocalPart
2746 *
2747 * [NS 6] Prefix ::= NCName
2748 *
2749 * [NS 7] LocalPart ::= NCName
2750 *
2751 * Returns the local part, and prefix is updated
2752 * to get the Prefix if any.
2753 */
2754
2755xmlChar *
2756xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2757 xmlChar buf[XML_MAX_NAMELEN + 5];
2758 xmlChar *buffer = NULL;
2759 int len = 0;
2760 int max = XML_MAX_NAMELEN;
2761 xmlChar *ret = NULL;
2762 const xmlChar *cur = name;
2763 int c;
2764
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002765 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002766 *prefix = NULL;
2767
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002768 if (cur == NULL) return(NULL);
2769
Owen Taylor3473f882001-02-23 17:55:21 +00002770#ifndef XML_XML_NAMESPACE
2771 /* xml: prefix is not really a namespace */
2772 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2773 (cur[2] == 'l') && (cur[3] == ':'))
2774 return(xmlStrdup(name));
2775#endif
2776
Daniel Veillard597bc482003-07-24 16:08:28 +00002777 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002778 if (cur[0] == ':')
2779 return(xmlStrdup(name));
2780
2781 c = *cur++;
2782 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2783 buf[len++] = c;
2784 c = *cur++;
2785 }
2786 if (len >= max) {
2787 /*
2788 * Okay someone managed to make a huge name, so he's ready to pay
2789 * for the processing speed.
2790 */
2791 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002792
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002793 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002794 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002795 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002796 return(NULL);
2797 }
2798 memcpy(buffer, buf, len);
2799 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2800 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002801 xmlChar *tmp;
2802
Owen Taylor3473f882001-02-23 17:55:21 +00002803 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002804 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002805 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002806 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002807 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002808 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002809 return(NULL);
2810 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002811 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002812 }
2813 buffer[len++] = c;
2814 c = *cur++;
2815 }
2816 buffer[len] = 0;
2817 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002818
Daniel Veillard597bc482003-07-24 16:08:28 +00002819 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002820 if (buffer != NULL)
2821 xmlFree(buffer);
2822 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002823 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002824 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002825
Owen Taylor3473f882001-02-23 17:55:21 +00002826 if (buffer == NULL)
2827 ret = xmlStrndup(buf, len);
2828 else {
2829 ret = buffer;
2830 buffer = NULL;
2831 max = XML_MAX_NAMELEN;
2832 }
2833
2834
2835 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002836 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002837 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002838 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002839 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002840 }
Owen Taylor3473f882001-02-23 17:55:21 +00002841 len = 0;
2842
Daniel Veillardbb284f42002-10-16 18:02:47 +00002843 /*
2844 * Check that the first character is proper to start
2845 * a new name
2846 */
2847 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2848 ((c >= 0x41) && (c <= 0x5A)) ||
2849 (c == '_') || (c == ':'))) {
2850 int l;
2851 int first = CUR_SCHAR(cur, l);
2852
2853 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002854 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002855 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002856 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002857 }
2858 }
2859 cur++;
2860
Owen Taylor3473f882001-02-23 17:55:21 +00002861 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2862 buf[len++] = c;
2863 c = *cur++;
2864 }
2865 if (len >= max) {
2866 /*
2867 * Okay someone managed to make a huge name, so he's ready to pay
2868 * for the processing speed.
2869 */
2870 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002871
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002872 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002873 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002874 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002875 return(NULL);
2876 }
2877 memcpy(buffer, buf, len);
2878 while (c != 0) { /* tested bigname2.xml */
2879 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002880 xmlChar *tmp;
2881
Owen Taylor3473f882001-02-23 17:55:21 +00002882 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002883 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002884 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002885 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002886 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002887 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(NULL);
2889 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002890 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002891 }
2892 buffer[len++] = c;
2893 c = *cur++;
2894 }
2895 buffer[len] = 0;
2896 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002897
Owen Taylor3473f882001-02-23 17:55:21 +00002898 if (buffer == NULL)
2899 ret = xmlStrndup(buf, len);
2900 else {
2901 ret = buffer;
2902 }
2903 }
2904
2905 return(ret);
2906}
2907
2908/************************************************************************
2909 * *
2910 * The parser itself *
2911 * Relates to http://www.w3.org/TR/REC-xml *
2912 * *
2913 ************************************************************************/
2914
Daniel Veillard34e3f642008-07-29 09:02:27 +00002915/************************************************************************
2916 * *
2917 * Routines to parse Name, NCName and NmToken *
2918 * *
2919 ************************************************************************/
2920unsigned long nbParseName = 0;
2921unsigned long nbParseNmToken = 0;
2922unsigned long nbParseNCName = 0;
2923unsigned long nbParseNCNameComplex = 0;
2924unsigned long nbParseNameComplex = 0;
2925unsigned long nbParseStringName = 0;
2926/*
2927 * The two following functions are related to the change of accepted
2928 * characters for Name and NmToken in the Revision 5 of XML-1.0
2929 * They correspond to the modified production [4] and the new production [4a]
2930 * changes in that revision. Also note that the macros used for the
2931 * productions Letter, Digit, CombiningChar and Extender are not needed
2932 * anymore.
2933 * We still keep compatibility to pre-revision5 parsing semantic if the
2934 * new XML_PARSE_OLD10 option is given to the parser.
2935 */
2936static int
2937xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2938 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2939 /*
2940 * Use the new checks of production [4] [4a] amd [5] of the
2941 * Update 5 of XML-1.0
2942 */
2943 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2944 (((c >= 'a') && (c <= 'z')) ||
2945 ((c >= 'A') && (c <= 'Z')) ||
2946 (c == '_') || (c == ':') ||
2947 ((c >= 0xC0) && (c <= 0xD6)) ||
2948 ((c >= 0xD8) && (c <= 0xF6)) ||
2949 ((c >= 0xF8) && (c <= 0x2FF)) ||
2950 ((c >= 0x370) && (c <= 0x37D)) ||
2951 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2952 ((c >= 0x200C) && (c <= 0x200D)) ||
2953 ((c >= 0x2070) && (c <= 0x218F)) ||
2954 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2955 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2956 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2957 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2958 ((c >= 0x10000) && (c <= 0xEFFFF))))
2959 return(1);
2960 } else {
2961 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2962 return(1);
2963 }
2964 return(0);
2965}
2966
2967static int
2968xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2969 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2970 /*
2971 * Use the new checks of production [4] [4a] amd [5] of the
2972 * Update 5 of XML-1.0
2973 */
2974 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2975 (((c >= 'a') && (c <= 'z')) ||
2976 ((c >= 'A') && (c <= 'Z')) ||
2977 ((c >= '0') && (c <= '9')) || /* !start */
2978 (c == '_') || (c == ':') ||
2979 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2980 ((c >= 0xC0) && (c <= 0xD6)) ||
2981 ((c >= 0xD8) && (c <= 0xF6)) ||
2982 ((c >= 0xF8) && (c <= 0x2FF)) ||
2983 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2984 ((c >= 0x370) && (c <= 0x37D)) ||
2985 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2986 ((c >= 0x200C) && (c <= 0x200D)) ||
2987 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2988 ((c >= 0x2070) && (c <= 0x218F)) ||
2989 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2990 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2991 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2992 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2993 ((c >= 0x10000) && (c <= 0xEFFFF))))
2994 return(1);
2995 } else {
2996 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2997 (c == '.') || (c == '-') ||
2998 (c == '_') || (c == ':') ||
2999 (IS_COMBINING(c)) ||
3000 (IS_EXTENDER(c)))
3001 return(1);
3002 }
3003 return(0);
3004}
3005
Daniel Veillarde57ec792003-09-10 10:50:59 +00003006static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003007 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003008
Daniel Veillard34e3f642008-07-29 09:02:27 +00003009static const xmlChar *
3010xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3011 int len = 0, l;
3012 int c;
3013 int count = 0;
3014
3015 nbParseNameComplex++;
3016
3017 /*
3018 * Handler for more complex cases
3019 */
3020 GROW;
3021 c = CUR_CHAR(l);
3022 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3023 /*
3024 * Use the new checks of production [4] [4a] amd [5] of the
3025 * Update 5 of XML-1.0
3026 */
3027 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3028 (!(((c >= 'a') && (c <= 'z')) ||
3029 ((c >= 'A') && (c <= 'Z')) ||
3030 (c == '_') || (c == ':') ||
3031 ((c >= 0xC0) && (c <= 0xD6)) ||
3032 ((c >= 0xD8) && (c <= 0xF6)) ||
3033 ((c >= 0xF8) && (c <= 0x2FF)) ||
3034 ((c >= 0x370) && (c <= 0x37D)) ||
3035 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3036 ((c >= 0x200C) && (c <= 0x200D)) ||
3037 ((c >= 0x2070) && (c <= 0x218F)) ||
3038 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3039 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3040 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3041 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3042 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3043 return(NULL);
3044 }
3045 len += l;
3046 NEXTL(l);
3047 c = CUR_CHAR(l);
3048 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3049 (((c >= 'a') && (c <= 'z')) ||
3050 ((c >= 'A') && (c <= 'Z')) ||
3051 ((c >= '0') && (c <= '9')) || /* !start */
3052 (c == '_') || (c == ':') ||
3053 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3054 ((c >= 0xC0) && (c <= 0xD6)) ||
3055 ((c >= 0xD8) && (c <= 0xF6)) ||
3056 ((c >= 0xF8) && (c <= 0x2FF)) ||
3057 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3058 ((c >= 0x370) && (c <= 0x37D)) ||
3059 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3060 ((c >= 0x200C) && (c <= 0x200D)) ||
3061 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3062 ((c >= 0x2070) && (c <= 0x218F)) ||
3063 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3064 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3065 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3066 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3067 ((c >= 0x10000) && (c <= 0xEFFFF))
3068 )) {
3069 if (count++ > 100) {
3070 count = 0;
3071 GROW;
3072 }
3073 len += l;
3074 NEXTL(l);
3075 c = CUR_CHAR(l);
3076 }
3077 } else {
3078 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3079 (!IS_LETTER(c) && (c != '_') &&
3080 (c != ':'))) {
3081 return(NULL);
3082 }
3083 len += l;
3084 NEXTL(l);
3085 c = CUR_CHAR(l);
3086
3087 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3088 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3089 (c == '.') || (c == '-') ||
3090 (c == '_') || (c == ':') ||
3091 (IS_COMBINING(c)) ||
3092 (IS_EXTENDER(c)))) {
3093 if (count++ > 100) {
3094 count = 0;
3095 GROW;
3096 }
3097 len += l;
3098 NEXTL(l);
3099 c = CUR_CHAR(l);
3100 }
3101 }
3102 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3103 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3104 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3105}
3106
Owen Taylor3473f882001-02-23 17:55:21 +00003107/**
3108 * xmlParseName:
3109 * @ctxt: an XML parser context
3110 *
3111 * parse an XML name.
3112 *
3113 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3114 * CombiningChar | Extender
3115 *
3116 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3117 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003118 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003119 *
3120 * Returns the Name parsed or NULL
3121 */
3122
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003123const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003124xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003125 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003126 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003127 int count = 0;
3128
3129 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003130
Daniel Veillard34e3f642008-07-29 09:02:27 +00003131 nbParseName++;
3132
Daniel Veillard48b2f892001-02-25 16:11:03 +00003133 /*
3134 * Accelerator for simple ASCII names
3135 */
3136 in = ctxt->input->cur;
3137 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3138 ((*in >= 0x41) && (*in <= 0x5A)) ||
3139 (*in == '_') || (*in == ':')) {
3140 in++;
3141 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3142 ((*in >= 0x41) && (*in <= 0x5A)) ||
3143 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003144 (*in == '_') || (*in == '-') ||
3145 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003146 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003147 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003148 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003149 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003150 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003151 ctxt->nbChars += count;
3152 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003153 if (ret == NULL)
3154 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003155 return(ret);
3156 }
3157 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003158 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003159 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003160}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003161
Daniel Veillard34e3f642008-07-29 09:02:27 +00003162static const xmlChar *
3163xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3164 int len = 0, l;
3165 int c;
3166 int count = 0;
3167
3168 nbParseNCNameComplex++;
3169
3170 /*
3171 * Handler for more complex cases
3172 */
3173 GROW;
3174 c = CUR_CHAR(l);
3175 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3176 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3177 return(NULL);
3178 }
3179
3180 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3181 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3182 if (count++ > 100) {
3183 count = 0;
3184 GROW;
3185 }
3186 len += l;
3187 NEXTL(l);
3188 c = CUR_CHAR(l);
3189 }
3190 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3191}
3192
3193/**
3194 * xmlParseNCName:
3195 * @ctxt: an XML parser context
3196 * @len: lenght of the string parsed
3197 *
3198 * parse an XML name.
3199 *
3200 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3201 * CombiningChar | Extender
3202 *
3203 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3204 *
3205 * Returns the Name parsed or NULL
3206 */
3207
3208static const xmlChar *
3209xmlParseNCName(xmlParserCtxtPtr ctxt) {
3210 const xmlChar *in;
3211 const xmlChar *ret;
3212 int count = 0;
3213
3214 nbParseNCName++;
3215
3216 /*
3217 * Accelerator for simple ASCII names
3218 */
3219 in = ctxt->input->cur;
3220 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3221 ((*in >= 0x41) && (*in <= 0x5A)) ||
3222 (*in == '_')) {
3223 in++;
3224 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3225 ((*in >= 0x41) && (*in <= 0x5A)) ||
3226 ((*in >= 0x30) && (*in <= 0x39)) ||
3227 (*in == '_') || (*in == '-') ||
3228 (*in == '.'))
3229 in++;
3230 if ((*in > 0) && (*in < 0x80)) {
3231 count = in - ctxt->input->cur;
3232 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3233 ctxt->input->cur = in;
3234 ctxt->nbChars += count;
3235 ctxt->input->col += count;
3236 if (ret == NULL) {
3237 xmlErrMemory(ctxt, NULL);
3238 }
3239 return(ret);
3240 }
3241 }
3242 return(xmlParseNCNameComplex(ctxt));
3243}
3244
Daniel Veillard46de64e2002-05-29 08:21:33 +00003245/**
3246 * xmlParseNameAndCompare:
3247 * @ctxt: an XML parser context
3248 *
3249 * parse an XML name and compares for match
3250 * (specialized for endtag parsing)
3251 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003252 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3253 * and the name for mismatch
3254 */
3255
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003256static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003257xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003258 register const xmlChar *cmp = other;
3259 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003260 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003261
3262 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003263
Daniel Veillard46de64e2002-05-29 08:21:33 +00003264 in = ctxt->input->cur;
3265 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003266 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003267 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003268 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003269 }
William M. Brack76e95df2003-10-18 16:20:14 +00003270 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003271 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003272 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003273 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003274 }
3275 /* failure (or end of input buffer), check with full function */
3276 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003277 /* strings coming from the dictionnary direct compare possible */
3278 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003279 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003280 }
3281 return ret;
3282}
3283
Owen Taylor3473f882001-02-23 17:55:21 +00003284/**
3285 * xmlParseStringName:
3286 * @ctxt: an XML parser context
3287 * @str: a pointer to the string pointer (IN/OUT)
3288 *
3289 * parse an XML name.
3290 *
3291 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3292 * CombiningChar | Extender
3293 *
3294 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3295 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003296 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003297 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003298 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003299 * is updated to the current location in the string.
3300 */
3301
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003302static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003303xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3304 xmlChar buf[XML_MAX_NAMELEN + 5];
3305 const xmlChar *cur = *str;
3306 int len = 0, l;
3307 int c;
3308
Daniel Veillard34e3f642008-07-29 09:02:27 +00003309 nbParseStringName++;
3310
Owen Taylor3473f882001-02-23 17:55:21 +00003311 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003312 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003313 return(NULL);
3314 }
3315
Daniel Veillard34e3f642008-07-29 09:02:27 +00003316 COPY_BUF(l,buf,len,c);
3317 cur += l;
3318 c = CUR_SCHAR(cur, l);
3319 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003320 COPY_BUF(l,buf,len,c);
3321 cur += l;
3322 c = CUR_SCHAR(cur, l);
3323 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3324 /*
3325 * Okay someone managed to make a huge name, so he's ready to pay
3326 * for the processing speed.
3327 */
3328 xmlChar *buffer;
3329 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003330
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003331 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003332 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003333 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003334 return(NULL);
3335 }
3336 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003337 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003338 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003339 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003340 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003341 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003342 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003343 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003344 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003345 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003346 return(NULL);
3347 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003348 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003349 }
3350 COPY_BUF(l,buffer,len,c);
3351 cur += l;
3352 c = CUR_SCHAR(cur, l);
3353 }
3354 buffer[len] = 0;
3355 *str = cur;
3356 return(buffer);
3357 }
3358 }
3359 *str = cur;
3360 return(xmlStrndup(buf, len));
3361}
3362
3363/**
3364 * xmlParseNmtoken:
3365 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003366 *
Owen Taylor3473f882001-02-23 17:55:21 +00003367 * parse an XML Nmtoken.
3368 *
3369 * [7] Nmtoken ::= (NameChar)+
3370 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003371 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003372 *
3373 * Returns the Nmtoken parsed or NULL
3374 */
3375
3376xmlChar *
3377xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3378 xmlChar buf[XML_MAX_NAMELEN + 5];
3379 int len = 0, l;
3380 int c;
3381 int count = 0;
3382
Daniel Veillard34e3f642008-07-29 09:02:27 +00003383 nbParseNmToken++;
3384
Owen Taylor3473f882001-02-23 17:55:21 +00003385 GROW;
3386 c = CUR_CHAR(l);
3387
Daniel Veillard34e3f642008-07-29 09:02:27 +00003388 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003389 if (count++ > 100) {
3390 count = 0;
3391 GROW;
3392 }
3393 COPY_BUF(l,buf,len,c);
3394 NEXTL(l);
3395 c = CUR_CHAR(l);
3396 if (len >= XML_MAX_NAMELEN) {
3397 /*
3398 * Okay someone managed to make a huge token, so he's ready to pay
3399 * for the processing speed.
3400 */
3401 xmlChar *buffer;
3402 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003403
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003404 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003405 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003406 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003407 return(NULL);
3408 }
3409 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003410 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003411 if (count++ > 100) {
3412 count = 0;
3413 GROW;
3414 }
3415 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003416 xmlChar *tmp;
3417
Owen Taylor3473f882001-02-23 17:55:21 +00003418 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003419 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003420 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003421 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003422 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003423 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003424 return(NULL);
3425 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003426 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003427 }
3428 COPY_BUF(l,buffer,len,c);
3429 NEXTL(l);
3430 c = CUR_CHAR(l);
3431 }
3432 buffer[len] = 0;
3433 return(buffer);
3434 }
3435 }
3436 if (len == 0)
3437 return(NULL);
3438 return(xmlStrndup(buf, len));
3439}
3440
3441/**
3442 * xmlParseEntityValue:
3443 * @ctxt: an XML parser context
3444 * @orig: if non-NULL store a copy of the original entity value
3445 *
3446 * parse a value for ENTITY declarations
3447 *
3448 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3449 * "'" ([^%&'] | PEReference | Reference)* "'"
3450 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003451 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003452 */
3453
3454xmlChar *
3455xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3456 xmlChar *buf = NULL;
3457 int len = 0;
3458 int size = XML_PARSER_BUFFER_SIZE;
3459 int c, l;
3460 xmlChar stop;
3461 xmlChar *ret = NULL;
3462 const xmlChar *cur = NULL;
3463 xmlParserInputPtr input;
3464
3465 if (RAW == '"') stop = '"';
3466 else if (RAW == '\'') stop = '\'';
3467 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003468 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003469 return(NULL);
3470 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003471 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003472 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003473 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003474 return(NULL);
3475 }
3476
3477 /*
3478 * The content of the entity definition is copied in a buffer.
3479 */
3480
3481 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3482 input = ctxt->input;
3483 GROW;
3484 NEXT;
3485 c = CUR_CHAR(l);
3486 /*
3487 * NOTE: 4.4.5 Included in Literal
3488 * When a parameter entity reference appears in a literal entity
3489 * value, ... a single or double quote character in the replacement
3490 * text is always treated as a normal data character and will not
3491 * terminate the literal.
3492 * In practice it means we stop the loop only when back at parsing
3493 * the initial entity and the quote is found
3494 */
William M. Brack871611b2003-10-18 04:53:14 +00003495 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003496 (ctxt->input != input))) {
3497 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003498 xmlChar *tmp;
3499
Owen Taylor3473f882001-02-23 17:55:21 +00003500 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003501 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3502 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003503 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003504 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003505 return(NULL);
3506 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003507 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003508 }
3509 COPY_BUF(l,buf,len,c);
3510 NEXTL(l);
3511 /*
3512 * Pop-up of finished entities.
3513 */
3514 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3515 xmlPopInput(ctxt);
3516
3517 GROW;
3518 c = CUR_CHAR(l);
3519 if (c == 0) {
3520 GROW;
3521 c = CUR_CHAR(l);
3522 }
3523 }
3524 buf[len] = 0;
3525
3526 /*
3527 * Raise problem w.r.t. '&' and '%' being used in non-entities
3528 * reference constructs. Note Charref will be handled in
3529 * xmlStringDecodeEntities()
3530 */
3531 cur = buf;
3532 while (*cur != 0) { /* non input consuming */
3533 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3534 xmlChar *name;
3535 xmlChar tmp = *cur;
3536
3537 cur++;
3538 name = xmlParseStringName(ctxt, &cur);
3539 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003540 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003541 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003542 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003543 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003544 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3545 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003546 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003547 }
3548 if (name != NULL)
3549 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003550 if (*cur == 0)
3551 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003552 }
3553 cur++;
3554 }
3555
3556 /*
3557 * Then PEReference entities are substituted.
3558 */
3559 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003560 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003561 xmlFree(buf);
3562 } else {
3563 NEXT;
3564 /*
3565 * NOTE: 4.4.7 Bypassed
3566 * When a general entity reference appears in the EntityValue in
3567 * an entity declaration, it is bypassed and left as is.
3568 * so XML_SUBSTITUTE_REF is not set here.
3569 */
3570 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3571 0, 0, 0);
3572 if (orig != NULL)
3573 *orig = buf;
3574 else
3575 xmlFree(buf);
3576 }
3577
3578 return(ret);
3579}
3580
3581/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003582 * xmlParseAttValueComplex:
3583 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003584 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003585 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003586 *
3587 * parse a value for an attribute, this is the fallback function
3588 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003589 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003590 *
3591 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3592 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003593static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003594xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003595 xmlChar limit = 0;
3596 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003597 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003598 int len = 0;
3599 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003600 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003601 xmlChar *current = NULL;
3602 xmlEntityPtr ent;
3603
Owen Taylor3473f882001-02-23 17:55:21 +00003604 if (NXT(0) == '"') {
3605 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3606 limit = '"';
3607 NEXT;
3608 } else if (NXT(0) == '\'') {
3609 limit = '\'';
3610 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3611 NEXT;
3612 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003613 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003614 return(NULL);
3615 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003616
Owen Taylor3473f882001-02-23 17:55:21 +00003617 /*
3618 * allocate a translation buffer.
3619 */
3620 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003621 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003622 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003623
3624 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003625 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003626 */
3627 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003628 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003629 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003630 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003631 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003632 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003633 if (NXT(1) == '#') {
3634 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003635
Owen Taylor3473f882001-02-23 17:55:21 +00003636 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003637 if (ctxt->replaceEntities) {
3638 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003639 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003640 }
3641 buf[len++] = '&';
3642 } else {
3643 /*
3644 * The reparsing will be done in xmlStringGetNodeList()
3645 * called by the attribute() function in SAX.c
3646 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003647 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003648 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003649 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003650 buf[len++] = '&';
3651 buf[len++] = '#';
3652 buf[len++] = '3';
3653 buf[len++] = '8';
3654 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003655 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003656 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003657 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003658 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003659 }
Owen Taylor3473f882001-02-23 17:55:21 +00003660 len += xmlCopyChar(0, &buf[len], val);
3661 }
3662 } else {
3663 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003664 ctxt->nbentities++;
3665 if (ent != NULL)
3666 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003667 if ((ent != NULL) &&
3668 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3669 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003670 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003671 }
3672 if ((ctxt->replaceEntities == 0) &&
3673 (ent->content[0] == '&')) {
3674 buf[len++] = '&';
3675 buf[len++] = '#';
3676 buf[len++] = '3';
3677 buf[len++] = '8';
3678 buf[len++] = ';';
3679 } else {
3680 buf[len++] = ent->content[0];
3681 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003682 } else if ((ent != NULL) &&
3683 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003684 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3685 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003686 XML_SUBSTITUTE_REF,
3687 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003688 if (rep != NULL) {
3689 current = rep;
3690 while (*current != 0) { /* non input consuming */
3691 buf[len++] = *current++;
3692 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003693 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003694 }
3695 }
3696 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003697 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003698 }
3699 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003700 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003701 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003702 }
Owen Taylor3473f882001-02-23 17:55:21 +00003703 if (ent->content != NULL)
3704 buf[len++] = ent->content[0];
3705 }
3706 } else if (ent != NULL) {
3707 int i = xmlStrlen(ent->name);
3708 const xmlChar *cur = ent->name;
3709
3710 /*
3711 * This may look absurd but is needed to detect
3712 * entities problems
3713 */
3714 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3715 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003716 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003717 XML_SUBSTITUTE_REF, 0, 0, 0);
3718 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003719 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003720 rep = NULL;
3721 }
Owen Taylor3473f882001-02-23 17:55:21 +00003722 }
3723
3724 /*
3725 * Just output the reference
3726 */
3727 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003728 while (len > buf_size - i - 10) {
3729 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003730 }
3731 for (;i > 0;i--)
3732 buf[len++] = *cur++;
3733 buf[len++] = ';';
3734 }
3735 }
3736 } else {
3737 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003738 if ((len != 0) || (!normalize)) {
3739 if ((!normalize) || (!in_space)) {
3740 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003741 while (len > buf_size - 10) {
3742 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003743 }
3744 }
3745 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003746 }
3747 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003748 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003749 COPY_BUF(l,buf,len,c);
3750 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003751 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003752 }
3753 }
3754 NEXTL(l);
3755 }
3756 GROW;
3757 c = CUR_CHAR(l);
3758 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003759 if ((in_space) && (normalize)) {
3760 while (buf[len - 1] == 0x20) len--;
3761 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003762 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003763 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003764 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003765 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003766 if ((c != 0) && (!IS_CHAR(c))) {
3767 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3768 "invalid character in attribute value\n");
3769 } else {
3770 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3771 "AttValue: ' expected\n");
3772 }
Owen Taylor3473f882001-02-23 17:55:21 +00003773 } else
3774 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003775 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003776 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003777
3778mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003779 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003780 if (buf != NULL)
3781 xmlFree(buf);
3782 if (rep != NULL)
3783 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003784 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003785}
3786
3787/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003788 * xmlParseAttValue:
3789 * @ctxt: an XML parser context
3790 *
3791 * parse a value for an attribute
3792 * Note: the parser won't do substitution of entities here, this
3793 * will be handled later in xmlStringGetNodeList
3794 *
3795 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3796 * "'" ([^<&'] | Reference)* "'"
3797 *
3798 * 3.3.3 Attribute-Value Normalization:
3799 * Before the value of an attribute is passed to the application or
3800 * checked for validity, the XML processor must normalize it as follows:
3801 * - a character reference is processed by appending the referenced
3802 * character to the attribute value
3803 * - an entity reference is processed by recursively processing the
3804 * replacement text of the entity
3805 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3806 * appending #x20 to the normalized value, except that only a single
3807 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3808 * parsed entity or the literal entity value of an internal parsed entity
3809 * - other characters are processed by appending them to the normalized value
3810 * If the declared value is not CDATA, then the XML processor must further
3811 * process the normalized attribute value by discarding any leading and
3812 * trailing space (#x20) characters, and by replacing sequences of space
3813 * (#x20) characters by a single space (#x20) character.
3814 * All attributes for which no declaration has been read should be treated
3815 * by a non-validating parser as if declared CDATA.
3816 *
3817 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3818 */
3819
3820
3821xmlChar *
3822xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003823 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003824 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003825}
3826
3827/**
Owen Taylor3473f882001-02-23 17:55:21 +00003828 * xmlParseSystemLiteral:
3829 * @ctxt: an XML parser context
3830 *
3831 * parse an XML Literal
3832 *
3833 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3834 *
3835 * Returns the SystemLiteral parsed or NULL
3836 */
3837
3838xmlChar *
3839xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3840 xmlChar *buf = NULL;
3841 int len = 0;
3842 int size = XML_PARSER_BUFFER_SIZE;
3843 int cur, l;
3844 xmlChar stop;
3845 int state = ctxt->instate;
3846 int count = 0;
3847
3848 SHRINK;
3849 if (RAW == '"') {
3850 NEXT;
3851 stop = '"';
3852 } else if (RAW == '\'') {
3853 NEXT;
3854 stop = '\'';
3855 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003856 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003857 return(NULL);
3858 }
3859
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003860 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003861 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003862 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003863 return(NULL);
3864 }
3865 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3866 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003867 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003868 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003869 xmlChar *tmp;
3870
Owen Taylor3473f882001-02-23 17:55:21 +00003871 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003872 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3873 if (tmp == NULL) {
3874 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003875 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003876 ctxt->instate = (xmlParserInputState) state;
3877 return(NULL);
3878 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003879 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003880 }
3881 count++;
3882 if (count > 50) {
3883 GROW;
3884 count = 0;
3885 }
3886 COPY_BUF(l,buf,len,cur);
3887 NEXTL(l);
3888 cur = CUR_CHAR(l);
3889 if (cur == 0) {
3890 GROW;
3891 SHRINK;
3892 cur = CUR_CHAR(l);
3893 }
3894 }
3895 buf[len] = 0;
3896 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003897 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003898 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003899 } else {
3900 NEXT;
3901 }
3902 return(buf);
3903}
3904
3905/**
3906 * xmlParsePubidLiteral:
3907 * @ctxt: an XML parser context
3908 *
3909 * parse an XML public literal
3910 *
3911 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3912 *
3913 * Returns the PubidLiteral parsed or NULL.
3914 */
3915
3916xmlChar *
3917xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3918 xmlChar *buf = NULL;
3919 int len = 0;
3920 int size = XML_PARSER_BUFFER_SIZE;
3921 xmlChar cur;
3922 xmlChar stop;
3923 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003924 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003925
3926 SHRINK;
3927 if (RAW == '"') {
3928 NEXT;
3929 stop = '"';
3930 } else if (RAW == '\'') {
3931 NEXT;
3932 stop = '\'';
3933 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003934 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003935 return(NULL);
3936 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003937 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003938 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003939 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003940 return(NULL);
3941 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003942 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003943 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003944 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003945 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003946 xmlChar *tmp;
3947
Owen Taylor3473f882001-02-23 17:55:21 +00003948 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003949 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3950 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003951 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003952 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003953 return(NULL);
3954 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003955 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003956 }
3957 buf[len++] = cur;
3958 count++;
3959 if (count > 50) {
3960 GROW;
3961 count = 0;
3962 }
3963 NEXT;
3964 cur = CUR;
3965 if (cur == 0) {
3966 GROW;
3967 SHRINK;
3968 cur = CUR;
3969 }
3970 }
3971 buf[len] = 0;
3972 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003973 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003974 } else {
3975 NEXT;
3976 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003977 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003978 return(buf);
3979}
3980
Daniel Veillard48b2f892001-02-25 16:11:03 +00003981void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003982
3983/*
3984 * used for the test in the inner loop of the char data testing
3985 */
3986static const unsigned char test_char_data[256] = {
3987 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3988 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3989 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3990 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3991 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3992 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3993 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3994 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3995 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3996 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3997 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3998 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3999 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4000 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4001 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4002 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4003 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4004 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4005 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4006 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4007 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4008 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4009 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4010 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4011 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4012 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4013 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4014 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4015 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4016 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4017 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4018 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4019};
4020
Owen Taylor3473f882001-02-23 17:55:21 +00004021/**
4022 * xmlParseCharData:
4023 * @ctxt: an XML parser context
4024 * @cdata: int indicating whether we are within a CDATA section
4025 *
4026 * parse a CharData section.
4027 * if we are within a CDATA section ']]>' marks an end of section.
4028 *
4029 * The right angle bracket (>) may be represented using the string "&gt;",
4030 * and must, for compatibility, be escaped using "&gt;" or a character
4031 * reference when it appears in the string "]]>" in content, when that
4032 * string is not marking the end of a CDATA section.
4033 *
4034 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4035 */
4036
4037void
4038xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004039 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004040 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004041 int line = ctxt->input->line;
4042 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004043 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004044
4045 SHRINK;
4046 GROW;
4047 /*
4048 * Accelerated common case where input don't need to be
4049 * modified before passing it to the handler.
4050 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004051 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004052 in = ctxt->input->cur;
4053 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004054get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004055 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004056 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004057 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004058 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004059 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004060 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004061 goto get_more_space;
4062 }
4063 if (*in == '<') {
4064 nbchar = in - ctxt->input->cur;
4065 if (nbchar > 0) {
4066 const xmlChar *tmp = ctxt->input->cur;
4067 ctxt->input->cur = in;
4068
Daniel Veillard34099b42004-11-04 17:34:35 +00004069 if ((ctxt->sax != NULL) &&
4070 (ctxt->sax->ignorableWhitespace !=
4071 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004072 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004073 if (ctxt->sax->ignorableWhitespace != NULL)
4074 ctxt->sax->ignorableWhitespace(ctxt->userData,
4075 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004076 } else {
4077 if (ctxt->sax->characters != NULL)
4078 ctxt->sax->characters(ctxt->userData,
4079 tmp, nbchar);
4080 if (*ctxt->space == -1)
4081 *ctxt->space = -2;
4082 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004083 } else if ((ctxt->sax != NULL) &&
4084 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004085 ctxt->sax->characters(ctxt->userData,
4086 tmp, nbchar);
4087 }
4088 }
4089 return;
4090 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004091
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004092get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004093 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004094 while (test_char_data[*in]) {
4095 in++;
4096 ccol++;
4097 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004098 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004099 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004100 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004101 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004102 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004103 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004104 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004105 }
4106 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004107 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004108 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004109 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004110 return;
4111 }
4112 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004113 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004114 goto get_more;
4115 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004116 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004117 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004118 if ((ctxt->sax != NULL) &&
4119 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004120 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004121 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004122 const xmlChar *tmp = ctxt->input->cur;
4123 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004124
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004125 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004126 if (ctxt->sax->ignorableWhitespace != NULL)
4127 ctxt->sax->ignorableWhitespace(ctxt->userData,
4128 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004129 } else {
4130 if (ctxt->sax->characters != NULL)
4131 ctxt->sax->characters(ctxt->userData,
4132 tmp, nbchar);
4133 if (*ctxt->space == -1)
4134 *ctxt->space = -2;
4135 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004136 line = ctxt->input->line;
4137 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004138 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004139 if (ctxt->sax->characters != NULL)
4140 ctxt->sax->characters(ctxt->userData,
4141 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004142 line = ctxt->input->line;
4143 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004144 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004145 /* something really bad happened in the SAX callback */
4146 if (ctxt->instate != XML_PARSER_CONTENT)
4147 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004148 }
4149 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004150 if (*in == 0xD) {
4151 in++;
4152 if (*in == 0xA) {
4153 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004154 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004155 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004156 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004157 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004158 in--;
4159 }
4160 if (*in == '<') {
4161 return;
4162 }
4163 if (*in == '&') {
4164 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004165 }
4166 SHRINK;
4167 GROW;
4168 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004169 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004170 nbchar = 0;
4171 }
Daniel Veillard50582112001-03-26 22:52:16 +00004172 ctxt->input->line = line;
4173 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004174 xmlParseCharDataComplex(ctxt, cdata);
4175}
4176
Daniel Veillard01c13b52002-12-10 15:19:08 +00004177/**
4178 * xmlParseCharDataComplex:
4179 * @ctxt: an XML parser context
4180 * @cdata: int indicating whether we are within a CDATA section
4181 *
4182 * parse a CharData section.this is the fallback function
4183 * of xmlParseCharData() when the parsing requires handling
4184 * of non-ASCII characters.
4185 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004186void
4187xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004188 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4189 int nbchar = 0;
4190 int cur, l;
4191 int count = 0;
4192
4193 SHRINK;
4194 GROW;
4195 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004196 while ((cur != '<') && /* checked */
4197 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004198 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004199 if ((cur == ']') && (NXT(1) == ']') &&
4200 (NXT(2) == '>')) {
4201 if (cdata) break;
4202 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004203 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004204 }
4205 }
4206 COPY_BUF(l,buf,nbchar,cur);
4207 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004208 buf[nbchar] = 0;
4209
Owen Taylor3473f882001-02-23 17:55:21 +00004210 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004211 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004212 */
4213 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004214 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004215 if (ctxt->sax->ignorableWhitespace != NULL)
4216 ctxt->sax->ignorableWhitespace(ctxt->userData,
4217 buf, nbchar);
4218 } else {
4219 if (ctxt->sax->characters != NULL)
4220 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004221 if ((ctxt->sax->characters !=
4222 ctxt->sax->ignorableWhitespace) &&
4223 (*ctxt->space == -1))
4224 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004225 }
4226 }
4227 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004228 /* something really bad happened in the SAX callback */
4229 if (ctxt->instate != XML_PARSER_CONTENT)
4230 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004231 }
4232 count++;
4233 if (count > 50) {
4234 GROW;
4235 count = 0;
4236 }
4237 NEXTL(l);
4238 cur = CUR_CHAR(l);
4239 }
4240 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004241 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004242 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004243 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004244 */
4245 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004246 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004247 if (ctxt->sax->ignorableWhitespace != NULL)
4248 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4249 } else {
4250 if (ctxt->sax->characters != NULL)
4251 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004252 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4253 (*ctxt->space == -1))
4254 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004255 }
4256 }
4257 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004258 if ((cur != 0) && (!IS_CHAR(cur))) {
4259 /* Generate the error and skip the offending character */
4260 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4261 "PCDATA invalid Char value %d\n",
4262 cur);
4263 NEXTL(l);
4264 }
Owen Taylor3473f882001-02-23 17:55:21 +00004265}
4266
4267/**
4268 * xmlParseExternalID:
4269 * @ctxt: an XML parser context
4270 * @publicID: a xmlChar** receiving PubidLiteral
4271 * @strict: indicate whether we should restrict parsing to only
4272 * production [75], see NOTE below
4273 *
4274 * Parse an External ID or a Public ID
4275 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004276 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004277 * 'PUBLIC' S PubidLiteral S SystemLiteral
4278 *
4279 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4280 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4281 *
4282 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4283 *
4284 * Returns the function returns SystemLiteral and in the second
4285 * case publicID receives PubidLiteral, is strict is off
4286 * it is possible to return NULL and have publicID set.
4287 */
4288
4289xmlChar *
4290xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4291 xmlChar *URI = NULL;
4292
4293 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004294
4295 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004296 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004297 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004298 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004299 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4300 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004301 }
4302 SKIP_BLANKS;
4303 URI = xmlParseSystemLiteral(ctxt);
4304 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004305 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004306 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004307 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004308 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004309 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004310 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004311 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004312 }
4313 SKIP_BLANKS;
4314 *publicID = xmlParsePubidLiteral(ctxt);
4315 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004316 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004317 }
4318 if (strict) {
4319 /*
4320 * We don't handle [83] so "S SystemLiteral" is required.
4321 */
William M. Brack76e95df2003-10-18 16:20:14 +00004322 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004324 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004325 }
4326 } else {
4327 /*
4328 * We handle [83] so we return immediately, if
4329 * "S SystemLiteral" is not detected. From a purely parsing
4330 * point of view that's a nice mess.
4331 */
4332 const xmlChar *ptr;
4333 GROW;
4334
4335 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004336 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004337
William M. Brack76e95df2003-10-18 16:20:14 +00004338 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004339 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4340 }
4341 SKIP_BLANKS;
4342 URI = xmlParseSystemLiteral(ctxt);
4343 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004344 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004345 }
4346 }
4347 return(URI);
4348}
4349
4350/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004351 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004352 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004353 * @buf: the already parsed part of the buffer
4354 * @len: number of bytes filles in the buffer
4355 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004356 *
4357 * Skip an XML (SGML) comment <!-- .... -->
4358 * The spec says that "For compatibility, the string "--" (double-hyphen)
4359 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004360 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004361 *
4362 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4363 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004364static void
4365xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004366 int q, ql;
4367 int r, rl;
4368 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004369 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004370 int inputid;
4371
4372 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004373
Owen Taylor3473f882001-02-23 17:55:21 +00004374 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004375 len = 0;
4376 size = XML_PARSER_BUFFER_SIZE;
4377 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4378 if (buf == NULL) {
4379 xmlErrMemory(ctxt, NULL);
4380 return;
4381 }
Owen Taylor3473f882001-02-23 17:55:21 +00004382 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004383 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004384 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004385 if (q == 0)
4386 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004387 if (!IS_CHAR(q)) {
4388 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4389 "xmlParseComment: invalid xmlChar value %d\n",
4390 q);
4391 xmlFree (buf);
4392 return;
4393 }
Owen Taylor3473f882001-02-23 17:55:21 +00004394 NEXTL(ql);
4395 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004396 if (r == 0)
4397 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004398 if (!IS_CHAR(r)) {
4399 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4400 "xmlParseComment: invalid xmlChar value %d\n",
4401 q);
4402 xmlFree (buf);
4403 return;
4404 }
Owen Taylor3473f882001-02-23 17:55:21 +00004405 NEXTL(rl);
4406 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004407 if (cur == 0)
4408 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004409 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004410 ((cur != '>') ||
4411 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004412 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004413 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004414 }
4415 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004416 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004417 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004418 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4419 if (new_buf == NULL) {
4420 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004421 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004422 return;
4423 }
William M. Bracka3215c72004-07-31 16:24:01 +00004424 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004425 }
4426 COPY_BUF(ql,buf,len,q);
4427 q = r;
4428 ql = rl;
4429 r = cur;
4430 rl = l;
4431
4432 count++;
4433 if (count > 50) {
4434 GROW;
4435 count = 0;
4436 }
4437 NEXTL(l);
4438 cur = CUR_CHAR(l);
4439 if (cur == 0) {
4440 SHRINK;
4441 GROW;
4442 cur = CUR_CHAR(l);
4443 }
4444 }
4445 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004446 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004447 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004448 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004449 } else if (!IS_CHAR(cur)) {
4450 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4451 "xmlParseComment: invalid xmlChar value %d\n",
4452 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004453 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004454 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004455 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4456 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004457 }
4458 NEXT;
4459 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4460 (!ctxt->disableSAX))
4461 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004462 }
Daniel Veillardda629342007-08-01 07:49:06 +00004463 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004464 return;
4465not_terminated:
4466 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4467 "Comment not terminated\n", NULL);
4468 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004469 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004470}
Daniel Veillardda629342007-08-01 07:49:06 +00004471
Daniel Veillard4c778d82005-01-23 17:37:44 +00004472/**
4473 * xmlParseComment:
4474 * @ctxt: an XML parser context
4475 *
4476 * Skip an XML (SGML) comment <!-- .... -->
4477 * The spec says that "For compatibility, the string "--" (double-hyphen)
4478 * must not occur within comments. "
4479 *
4480 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4481 */
4482void
4483xmlParseComment(xmlParserCtxtPtr ctxt) {
4484 xmlChar *buf = NULL;
4485 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004486 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004487 xmlParserInputState state;
4488 const xmlChar *in;
4489 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004490 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004491
4492 /*
4493 * Check that there is a comment right here.
4494 */
4495 if ((RAW != '<') || (NXT(1) != '!') ||
4496 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004497 state = ctxt->instate;
4498 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004499 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004500 SKIP(4);
4501 SHRINK;
4502 GROW;
4503
4504 /*
4505 * Accelerated common case where input don't need to be
4506 * modified before passing it to the handler.
4507 */
4508 in = ctxt->input->cur;
4509 do {
4510 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004511 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004512 ctxt->input->line++; ctxt->input->col = 1;
4513 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004514 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004515 }
4516get_more:
4517 ccol = ctxt->input->col;
4518 while (((*in > '-') && (*in <= 0x7F)) ||
4519 ((*in >= 0x20) && (*in < '-')) ||
4520 (*in == 0x09)) {
4521 in++;
4522 ccol++;
4523 }
4524 ctxt->input->col = ccol;
4525 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004526 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004527 ctxt->input->line++; ctxt->input->col = 1;
4528 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004529 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004530 goto get_more;
4531 }
4532 nbchar = in - ctxt->input->cur;
4533 /*
4534 * save current set of data
4535 */
4536 if (nbchar > 0) {
4537 if ((ctxt->sax != NULL) &&
4538 (ctxt->sax->comment != NULL)) {
4539 if (buf == NULL) {
4540 if ((*in == '-') && (in[1] == '-'))
4541 size = nbchar + 1;
4542 else
4543 size = XML_PARSER_BUFFER_SIZE + nbchar;
4544 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4545 if (buf == NULL) {
4546 xmlErrMemory(ctxt, NULL);
4547 ctxt->instate = state;
4548 return;
4549 }
4550 len = 0;
4551 } else if (len + nbchar + 1 >= size) {
4552 xmlChar *new_buf;
4553 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4554 new_buf = (xmlChar *) xmlRealloc(buf,
4555 size * sizeof(xmlChar));
4556 if (new_buf == NULL) {
4557 xmlFree (buf);
4558 xmlErrMemory(ctxt, NULL);
4559 ctxt->instate = state;
4560 return;
4561 }
4562 buf = new_buf;
4563 }
4564 memcpy(&buf[len], ctxt->input->cur, nbchar);
4565 len += nbchar;
4566 buf[len] = 0;
4567 }
4568 }
4569 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004570 if (*in == 0xA) {
4571 in++;
4572 ctxt->input->line++; ctxt->input->col = 1;
4573 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004574 if (*in == 0xD) {
4575 in++;
4576 if (*in == 0xA) {
4577 ctxt->input->cur = in;
4578 in++;
4579 ctxt->input->line++; ctxt->input->col = 1;
4580 continue; /* while */
4581 }
4582 in--;
4583 }
4584 SHRINK;
4585 GROW;
4586 in = ctxt->input->cur;
4587 if (*in == '-') {
4588 if (in[1] == '-') {
4589 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004590 if (ctxt->input->id != inputid) {
4591 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4592 "comment doesn't start and stop in the same entity\n");
4593 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004594 SKIP(3);
4595 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4596 (!ctxt->disableSAX)) {
4597 if (buf != NULL)
4598 ctxt->sax->comment(ctxt->userData, buf);
4599 else
4600 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4601 }
4602 if (buf != NULL)
4603 xmlFree(buf);
4604 ctxt->instate = state;
4605 return;
4606 }
4607 if (buf != NULL)
4608 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4609 "Comment not terminated \n<!--%.50s\n",
4610 buf);
4611 else
4612 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4613 "Comment not terminated \n", NULL);
4614 in++;
4615 ctxt->input->col++;
4616 }
4617 in++;
4618 ctxt->input->col++;
4619 goto get_more;
4620 }
4621 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4622 xmlParseCommentComplex(ctxt, buf, len, size);
4623 ctxt->instate = state;
4624 return;
4625}
4626
Owen Taylor3473f882001-02-23 17:55:21 +00004627
4628/**
4629 * xmlParsePITarget:
4630 * @ctxt: an XML parser context
4631 *
4632 * parse the name of a PI
4633 *
4634 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4635 *
4636 * Returns the PITarget name or NULL
4637 */
4638
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004639const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004640xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004641 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004642
4643 name = xmlParseName(ctxt);
4644 if ((name != NULL) &&
4645 ((name[0] == 'x') || (name[0] == 'X')) &&
4646 ((name[1] == 'm') || (name[1] == 'M')) &&
4647 ((name[2] == 'l') || (name[2] == 'L'))) {
4648 int i;
4649 if ((name[0] == 'x') && (name[1] == 'm') &&
4650 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004651 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004652 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004653 return(name);
4654 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004655 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004656 return(name);
4657 }
4658 for (i = 0;;i++) {
4659 if (xmlW3CPIs[i] == NULL) break;
4660 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4661 return(name);
4662 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004663 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4664 "xmlParsePITarget: invalid name prefix 'xml'\n",
4665 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004666 }
Daniel Veillard37334572008-07-31 08:20:02 +00004667 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4668 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4669 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4670 }
Owen Taylor3473f882001-02-23 17:55:21 +00004671 return(name);
4672}
4673
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004674#ifdef LIBXML_CATALOG_ENABLED
4675/**
4676 * xmlParseCatalogPI:
4677 * @ctxt: an XML parser context
4678 * @catalog: the PI value string
4679 *
4680 * parse an XML Catalog Processing Instruction.
4681 *
4682 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4683 *
4684 * Occurs only if allowed by the user and if happening in the Misc
4685 * part of the document before any doctype informations
4686 * This will add the given catalog to the parsing context in order
4687 * to be used if there is a resolution need further down in the document
4688 */
4689
4690static void
4691xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4692 xmlChar *URL = NULL;
4693 const xmlChar *tmp, *base;
4694 xmlChar marker;
4695
4696 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004697 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004698 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4699 goto error;
4700 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004701 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004702 if (*tmp != '=') {
4703 return;
4704 }
4705 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004706 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004707 marker = *tmp;
4708 if ((marker != '\'') && (marker != '"'))
4709 goto error;
4710 tmp++;
4711 base = tmp;
4712 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4713 if (*tmp == 0)
4714 goto error;
4715 URL = xmlStrndup(base, tmp - base);
4716 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004717 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004718 if (*tmp != 0)
4719 goto error;
4720
4721 if (URL != NULL) {
4722 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4723 xmlFree(URL);
4724 }
4725 return;
4726
4727error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004728 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4729 "Catalog PI syntax error: %s\n",
4730 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004731 if (URL != NULL)
4732 xmlFree(URL);
4733}
4734#endif
4735
Owen Taylor3473f882001-02-23 17:55:21 +00004736/**
4737 * xmlParsePI:
4738 * @ctxt: an XML parser context
4739 *
4740 * parse an XML Processing Instruction.
4741 *
4742 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4743 *
4744 * The processing is transfered to SAX once parsed.
4745 */
4746
4747void
4748xmlParsePI(xmlParserCtxtPtr ctxt) {
4749 xmlChar *buf = NULL;
4750 int len = 0;
4751 int size = XML_PARSER_BUFFER_SIZE;
4752 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004753 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004754 xmlParserInputState state;
4755 int count = 0;
4756
4757 if ((RAW == '<') && (NXT(1) == '?')) {
4758 xmlParserInputPtr input = ctxt->input;
4759 state = ctxt->instate;
4760 ctxt->instate = XML_PARSER_PI;
4761 /*
4762 * this is a Processing Instruction.
4763 */
4764 SKIP(2);
4765 SHRINK;
4766
4767 /*
4768 * Parse the target name and check for special support like
4769 * namespace.
4770 */
4771 target = xmlParsePITarget(ctxt);
4772 if (target != NULL) {
4773 if ((RAW == '?') && (NXT(1) == '>')) {
4774 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004775 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4776 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004777 }
4778 SKIP(2);
4779
4780 /*
4781 * SAX: PI detected.
4782 */
4783 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4784 (ctxt->sax->processingInstruction != NULL))
4785 ctxt->sax->processingInstruction(ctxt->userData,
4786 target, NULL);
4787 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004788 return;
4789 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004790 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004791 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004792 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004793 ctxt->instate = state;
4794 return;
4795 }
4796 cur = CUR;
4797 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004798 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4799 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004800 }
4801 SKIP_BLANKS;
4802 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004803 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004804 ((cur != '?') || (NXT(1) != '>'))) {
4805 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004806 xmlChar *tmp;
4807
Owen Taylor3473f882001-02-23 17:55:21 +00004808 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004809 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4810 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004811 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004812 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004813 ctxt->instate = state;
4814 return;
4815 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004816 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004817 }
4818 count++;
4819 if (count > 50) {
4820 GROW;
4821 count = 0;
4822 }
4823 COPY_BUF(l,buf,len,cur);
4824 NEXTL(l);
4825 cur = CUR_CHAR(l);
4826 if (cur == 0) {
4827 SHRINK;
4828 GROW;
4829 cur = CUR_CHAR(l);
4830 }
4831 }
4832 buf[len] = 0;
4833 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004834 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4835 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004836 } else {
4837 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004838 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4839 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004840 }
4841 SKIP(2);
4842
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004843#ifdef LIBXML_CATALOG_ENABLED
4844 if (((state == XML_PARSER_MISC) ||
4845 (state == XML_PARSER_START)) &&
4846 (xmlStrEqual(target, XML_CATALOG_PI))) {
4847 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4848 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4849 (allow == XML_CATA_ALLOW_ALL))
4850 xmlParseCatalogPI(ctxt, buf);
4851 }
4852#endif
4853
4854
Owen Taylor3473f882001-02-23 17:55:21 +00004855 /*
4856 * SAX: PI detected.
4857 */
4858 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4859 (ctxt->sax->processingInstruction != NULL))
4860 ctxt->sax->processingInstruction(ctxt->userData,
4861 target, buf);
4862 }
4863 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004864 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004865 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004866 }
4867 ctxt->instate = state;
4868 }
4869}
4870
4871/**
4872 * xmlParseNotationDecl:
4873 * @ctxt: an XML parser context
4874 *
4875 * parse a notation declaration
4876 *
4877 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4878 *
4879 * Hence there is actually 3 choices:
4880 * 'PUBLIC' S PubidLiteral
4881 * 'PUBLIC' S PubidLiteral S SystemLiteral
4882 * and 'SYSTEM' S SystemLiteral
4883 *
4884 * See the NOTE on xmlParseExternalID().
4885 */
4886
4887void
4888xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004889 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004890 xmlChar *Pubid;
4891 xmlChar *Systemid;
4892
Daniel Veillarda07050d2003-10-19 14:46:32 +00004893 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004894 xmlParserInputPtr input = ctxt->input;
4895 SHRINK;
4896 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004897 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004898 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4899 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004900 return;
4901 }
4902 SKIP_BLANKS;
4903
Daniel Veillard76d66f42001-05-16 21:05:17 +00004904 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004905 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004906 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004907 return;
4908 }
William M. Brack76e95df2003-10-18 16:20:14 +00004909 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004910 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004911 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004912 return;
4913 }
Daniel Veillard37334572008-07-31 08:20:02 +00004914 if (xmlStrchr(name, ':') != NULL) {
4915 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4916 "colon are forbidden from notation names '%s'\n",
4917 name, NULL, NULL);
4918 }
Owen Taylor3473f882001-02-23 17:55:21 +00004919 SKIP_BLANKS;
4920
4921 /*
4922 * Parse the IDs.
4923 */
4924 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4925 SKIP_BLANKS;
4926
4927 if (RAW == '>') {
4928 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004929 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4930 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004931 }
4932 NEXT;
4933 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4934 (ctxt->sax->notationDecl != NULL))
4935 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4936 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004937 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004938 }
Owen Taylor3473f882001-02-23 17:55:21 +00004939 if (Systemid != NULL) xmlFree(Systemid);
4940 if (Pubid != NULL) xmlFree(Pubid);
4941 }
4942}
4943
4944/**
4945 * xmlParseEntityDecl:
4946 * @ctxt: an XML parser context
4947 *
4948 * parse <!ENTITY declarations
4949 *
4950 * [70] EntityDecl ::= GEDecl | PEDecl
4951 *
4952 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4953 *
4954 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4955 *
4956 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4957 *
4958 * [74] PEDef ::= EntityValue | ExternalID
4959 *
4960 * [76] NDataDecl ::= S 'NDATA' S Name
4961 *
4962 * [ VC: Notation Declared ]
4963 * The Name must match the declared name of a notation.
4964 */
4965
4966void
4967xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004968 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004969 xmlChar *value = NULL;
4970 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004971 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004972 int isParameter = 0;
4973 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004974 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004975
Daniel Veillard4c778d82005-01-23 17:37:44 +00004976 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004977 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004978 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004979 SHRINK;
4980 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004981 skipped = SKIP_BLANKS;
4982 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004983 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4984 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004985 }
Owen Taylor3473f882001-02-23 17:55:21 +00004986
4987 if (RAW == '%') {
4988 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004989 skipped = SKIP_BLANKS;
4990 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004991 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4992 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004993 }
Owen Taylor3473f882001-02-23 17:55:21 +00004994 isParameter = 1;
4995 }
4996
Daniel Veillard76d66f42001-05-16 21:05:17 +00004997 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004998 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004999 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5000 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005001 return;
5002 }
Daniel Veillard37334572008-07-31 08:20:02 +00005003 if (xmlStrchr(name, ':') != NULL) {
5004 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5005 "colon are forbidden from entities names '%s'\n",
5006 name, NULL, NULL);
5007 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005008 skipped = SKIP_BLANKS;
5009 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5011 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005012 }
Owen Taylor3473f882001-02-23 17:55:21 +00005013
Daniel Veillardf5582f12002-06-11 10:08:16 +00005014 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005015 /*
5016 * handle the various case of definitions...
5017 */
5018 if (isParameter) {
5019 if ((RAW == '"') || (RAW == '\'')) {
5020 value = xmlParseEntityValue(ctxt, &orig);
5021 if (value) {
5022 if ((ctxt->sax != NULL) &&
5023 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5024 ctxt->sax->entityDecl(ctxt->userData, name,
5025 XML_INTERNAL_PARAMETER_ENTITY,
5026 NULL, NULL, value);
5027 }
5028 } else {
5029 URI = xmlParseExternalID(ctxt, &literal, 1);
5030 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005031 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005032 }
5033 if (URI) {
5034 xmlURIPtr uri;
5035
5036 uri = xmlParseURI((const char *) URI);
5037 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005038 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5039 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005040 /*
5041 * This really ought to be a well formedness error
5042 * but the XML Core WG decided otherwise c.f. issue
5043 * E26 of the XML erratas.
5044 */
Owen Taylor3473f882001-02-23 17:55:21 +00005045 } else {
5046 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005047 /*
5048 * Okay this is foolish to block those but not
5049 * invalid URIs.
5050 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005051 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005052 } else {
5053 if ((ctxt->sax != NULL) &&
5054 (!ctxt->disableSAX) &&
5055 (ctxt->sax->entityDecl != NULL))
5056 ctxt->sax->entityDecl(ctxt->userData, name,
5057 XML_EXTERNAL_PARAMETER_ENTITY,
5058 literal, URI, NULL);
5059 }
5060 xmlFreeURI(uri);
5061 }
5062 }
5063 }
5064 } else {
5065 if ((RAW == '"') || (RAW == '\'')) {
5066 value = xmlParseEntityValue(ctxt, &orig);
5067 if ((ctxt->sax != NULL) &&
5068 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5069 ctxt->sax->entityDecl(ctxt->userData, name,
5070 XML_INTERNAL_GENERAL_ENTITY,
5071 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005072 /*
5073 * For expat compatibility in SAX mode.
5074 */
5075 if ((ctxt->myDoc == NULL) ||
5076 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5077 if (ctxt->myDoc == NULL) {
5078 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005079 if (ctxt->myDoc == NULL) {
5080 xmlErrMemory(ctxt, "New Doc failed");
5081 return;
5082 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005083 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005084 }
5085 if (ctxt->myDoc->intSubset == NULL)
5086 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5087 BAD_CAST "fake", NULL, NULL);
5088
Daniel Veillard1af9a412003-08-20 22:54:39 +00005089 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5090 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005091 }
Owen Taylor3473f882001-02-23 17:55:21 +00005092 } else {
5093 URI = xmlParseExternalID(ctxt, &literal, 1);
5094 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005095 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005096 }
5097 if (URI) {
5098 xmlURIPtr uri;
5099
5100 uri = xmlParseURI((const char *)URI);
5101 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005102 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5103 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005104 /*
5105 * This really ought to be a well formedness error
5106 * but the XML Core WG decided otherwise c.f. issue
5107 * E26 of the XML erratas.
5108 */
Owen Taylor3473f882001-02-23 17:55:21 +00005109 } else {
5110 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005111 /*
5112 * Okay this is foolish to block those but not
5113 * invalid URIs.
5114 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005115 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005116 }
5117 xmlFreeURI(uri);
5118 }
5119 }
William M. Brack76e95df2003-10-18 16:20:14 +00005120 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005121 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5122 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005123 }
5124 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005125 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005126 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005127 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005128 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5129 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005130 }
5131 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005132 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5134 (ctxt->sax->unparsedEntityDecl != NULL))
5135 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5136 literal, URI, ndata);
5137 } else {
5138 if ((ctxt->sax != NULL) &&
5139 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5140 ctxt->sax->entityDecl(ctxt->userData, name,
5141 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5142 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005143 /*
5144 * For expat compatibility in SAX mode.
5145 * assuming the entity repalcement was asked for
5146 */
5147 if ((ctxt->replaceEntities != 0) &&
5148 ((ctxt->myDoc == NULL) ||
5149 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5150 if (ctxt->myDoc == NULL) {
5151 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005152 if (ctxt->myDoc == NULL) {
5153 xmlErrMemory(ctxt, "New Doc failed");
5154 return;
5155 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005156 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005157 }
5158
5159 if (ctxt->myDoc->intSubset == NULL)
5160 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5161 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005162 xmlSAX2EntityDecl(ctxt, name,
5163 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5164 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005165 }
Owen Taylor3473f882001-02-23 17:55:21 +00005166 }
5167 }
5168 }
5169 SKIP_BLANKS;
5170 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005171 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005172 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005173 } else {
5174 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005175 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5176 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005177 }
5178 NEXT;
5179 }
5180 if (orig != NULL) {
5181 /*
5182 * Ugly mechanism to save the raw entity value.
5183 */
5184 xmlEntityPtr cur = NULL;
5185
5186 if (isParameter) {
5187 if ((ctxt->sax != NULL) &&
5188 (ctxt->sax->getParameterEntity != NULL))
5189 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5190 } else {
5191 if ((ctxt->sax != NULL) &&
5192 (ctxt->sax->getEntity != NULL))
5193 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005194 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005195 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005196 }
Owen Taylor3473f882001-02-23 17:55:21 +00005197 }
5198 if (cur != NULL) {
5199 if (cur->orig != NULL)
5200 xmlFree(orig);
5201 else
5202 cur->orig = orig;
5203 } else
5204 xmlFree(orig);
5205 }
Owen Taylor3473f882001-02-23 17:55:21 +00005206 if (value != NULL) xmlFree(value);
5207 if (URI != NULL) xmlFree(URI);
5208 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005209 }
5210}
5211
5212/**
5213 * xmlParseDefaultDecl:
5214 * @ctxt: an XML parser context
5215 * @value: Receive a possible fixed default value for the attribute
5216 *
5217 * Parse an attribute default declaration
5218 *
5219 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5220 *
5221 * [ VC: Required Attribute ]
5222 * if the default declaration is the keyword #REQUIRED, then the
5223 * attribute must be specified for all elements of the type in the
5224 * attribute-list declaration.
5225 *
5226 * [ VC: Attribute Default Legal ]
5227 * The declared default value must meet the lexical constraints of
5228 * the declared attribute type c.f. xmlValidateAttributeDecl()
5229 *
5230 * [ VC: Fixed Attribute Default ]
5231 * if an attribute has a default value declared with the #FIXED
5232 * keyword, instances of that attribute must match the default value.
5233 *
5234 * [ WFC: No < in Attribute Values ]
5235 * handled in xmlParseAttValue()
5236 *
5237 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5238 * or XML_ATTRIBUTE_FIXED.
5239 */
5240
5241int
5242xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5243 int val;
5244 xmlChar *ret;
5245
5246 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005247 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005248 SKIP(9);
5249 return(XML_ATTRIBUTE_REQUIRED);
5250 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005251 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005252 SKIP(8);
5253 return(XML_ATTRIBUTE_IMPLIED);
5254 }
5255 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005256 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005257 SKIP(6);
5258 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005259 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005260 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5261 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005262 }
5263 SKIP_BLANKS;
5264 }
5265 ret = xmlParseAttValue(ctxt);
5266 ctxt->instate = XML_PARSER_DTD;
5267 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005268 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005269 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005270 } else
5271 *value = ret;
5272 return(val);
5273}
5274
5275/**
5276 * xmlParseNotationType:
5277 * @ctxt: an XML parser context
5278 *
5279 * parse an Notation attribute type.
5280 *
5281 * Note: the leading 'NOTATION' S part has already being parsed...
5282 *
5283 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5284 *
5285 * [ VC: Notation Attributes ]
5286 * Values of this type must match one of the notation names included
5287 * in the declaration; all notation names in the declaration must be declared.
5288 *
5289 * Returns: the notation attribute tree built while parsing
5290 */
5291
5292xmlEnumerationPtr
5293xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005294 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005295 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005296
5297 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005298 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005299 return(NULL);
5300 }
5301 SHRINK;
5302 do {
5303 NEXT;
5304 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005305 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005306 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005307 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5308 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005309 return(ret);
5310 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005311 tmp = ret;
5312 while (tmp != NULL) {
5313 if (xmlStrEqual(name, tmp->name)) {
5314 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5315 "standalone: attribute notation value token %s duplicated\n",
5316 name, NULL);
5317 if (!xmlDictOwns(ctxt->dict, name))
5318 xmlFree((xmlChar *) name);
5319 break;
5320 }
5321 tmp = tmp->next;
5322 }
5323 if (tmp == NULL) {
5324 cur = xmlCreateEnumeration(name);
5325 if (cur == NULL) return(ret);
5326 if (last == NULL) ret = last = cur;
5327 else {
5328 last->next = cur;
5329 last = cur;
5330 }
Owen Taylor3473f882001-02-23 17:55:21 +00005331 }
5332 SKIP_BLANKS;
5333 } while (RAW == '|');
5334 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005335 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005336 if ((last != NULL) && (last != ret))
5337 xmlFreeEnumeration(last);
5338 return(ret);
5339 }
5340 NEXT;
5341 return(ret);
5342}
5343
5344/**
5345 * xmlParseEnumerationType:
5346 * @ctxt: an XML parser context
5347 *
5348 * parse an Enumeration attribute type.
5349 *
5350 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5351 *
5352 * [ VC: Enumeration ]
5353 * Values of this type must match one of the Nmtoken tokens in
5354 * the declaration
5355 *
5356 * Returns: the enumeration attribute tree built while parsing
5357 */
5358
5359xmlEnumerationPtr
5360xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5361 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005362 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005363
5364 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005365 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005366 return(NULL);
5367 }
5368 SHRINK;
5369 do {
5370 NEXT;
5371 SKIP_BLANKS;
5372 name = xmlParseNmtoken(ctxt);
5373 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005374 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005375 return(ret);
5376 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005377 tmp = ret;
5378 while (tmp != NULL) {
5379 if (xmlStrEqual(name, tmp->name)) {
5380 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5381 "standalone: attribute enumeration value token %s duplicated\n",
5382 name, NULL);
5383 if (!xmlDictOwns(ctxt->dict, name))
5384 xmlFree(name);
5385 break;
5386 }
5387 tmp = tmp->next;
5388 }
5389 if (tmp == NULL) {
5390 cur = xmlCreateEnumeration(name);
5391 if (!xmlDictOwns(ctxt->dict, name))
5392 xmlFree(name);
5393 if (cur == NULL) return(ret);
5394 if (last == NULL) ret = last = cur;
5395 else {
5396 last->next = cur;
5397 last = cur;
5398 }
Owen Taylor3473f882001-02-23 17:55:21 +00005399 }
5400 SKIP_BLANKS;
5401 } while (RAW == '|');
5402 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005403 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005404 return(ret);
5405 }
5406 NEXT;
5407 return(ret);
5408}
5409
5410/**
5411 * xmlParseEnumeratedType:
5412 * @ctxt: an XML parser context
5413 * @tree: the enumeration tree built while parsing
5414 *
5415 * parse an Enumerated attribute type.
5416 *
5417 * [57] EnumeratedType ::= NotationType | Enumeration
5418 *
5419 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5420 *
5421 *
5422 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5423 */
5424
5425int
5426xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005427 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005428 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005429 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005430 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5431 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005432 return(0);
5433 }
5434 SKIP_BLANKS;
5435 *tree = xmlParseNotationType(ctxt);
5436 if (*tree == NULL) return(0);
5437 return(XML_ATTRIBUTE_NOTATION);
5438 }
5439 *tree = xmlParseEnumerationType(ctxt);
5440 if (*tree == NULL) return(0);
5441 return(XML_ATTRIBUTE_ENUMERATION);
5442}
5443
5444/**
5445 * xmlParseAttributeType:
5446 * @ctxt: an XML parser context
5447 * @tree: the enumeration tree built while parsing
5448 *
5449 * parse the Attribute list def for an element
5450 *
5451 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5452 *
5453 * [55] StringType ::= 'CDATA'
5454 *
5455 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5456 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5457 *
5458 * Validity constraints for attribute values syntax are checked in
5459 * xmlValidateAttributeValue()
5460 *
5461 * [ VC: ID ]
5462 * Values of type ID must match the Name production. A name must not
5463 * appear more than once in an XML document as a value of this type;
5464 * i.e., ID values must uniquely identify the elements which bear them.
5465 *
5466 * [ VC: One ID per Element Type ]
5467 * No element type may have more than one ID attribute specified.
5468 *
5469 * [ VC: ID Attribute Default ]
5470 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5471 *
5472 * [ VC: IDREF ]
5473 * Values of type IDREF must match the Name production, and values
5474 * of type IDREFS must match Names; each IDREF Name must match the value
5475 * of an ID attribute on some element in the XML document; i.e. IDREF
5476 * values must match the value of some ID attribute.
5477 *
5478 * [ VC: Entity Name ]
5479 * Values of type ENTITY must match the Name production, values
5480 * of type ENTITIES must match Names; each Entity Name must match the
5481 * name of an unparsed entity declared in the DTD.
5482 *
5483 * [ VC: Name Token ]
5484 * Values of type NMTOKEN must match the Nmtoken production; values
5485 * of type NMTOKENS must match Nmtokens.
5486 *
5487 * Returns the attribute type
5488 */
5489int
5490xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5491 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005492 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005493 SKIP(5);
5494 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005495 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005496 SKIP(6);
5497 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005498 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005499 SKIP(5);
5500 return(XML_ATTRIBUTE_IDREF);
5501 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5502 SKIP(2);
5503 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005504 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005505 SKIP(6);
5506 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005507 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005508 SKIP(8);
5509 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005510 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005511 SKIP(8);
5512 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005513 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005514 SKIP(7);
5515 return(XML_ATTRIBUTE_NMTOKEN);
5516 }
5517 return(xmlParseEnumeratedType(ctxt, tree));
5518}
5519
5520/**
5521 * xmlParseAttributeListDecl:
5522 * @ctxt: an XML parser context
5523 *
5524 * : parse the Attribute list def for an element
5525 *
5526 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5527 *
5528 * [53] AttDef ::= S Name S AttType S DefaultDecl
5529 *
5530 */
5531void
5532xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005533 const xmlChar *elemName;
5534 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005535 xmlEnumerationPtr tree;
5536
Daniel Veillarda07050d2003-10-19 14:46:32 +00005537 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005538 xmlParserInputPtr input = ctxt->input;
5539
5540 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005541 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005542 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005543 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005544 }
5545 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005546 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005547 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005548 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5549 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005550 return;
5551 }
5552 SKIP_BLANKS;
5553 GROW;
5554 while (RAW != '>') {
5555 const xmlChar *check = CUR_PTR;
5556 int type;
5557 int def;
5558 xmlChar *defaultValue = NULL;
5559
5560 GROW;
5561 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005562 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005563 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005564 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5565 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005566 break;
5567 }
5568 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005569 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005570 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005571 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005572 break;
5573 }
5574 SKIP_BLANKS;
5575
5576 type = xmlParseAttributeType(ctxt, &tree);
5577 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005578 break;
5579 }
5580
5581 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005582 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005583 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5584 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005585 if (tree != NULL)
5586 xmlFreeEnumeration(tree);
5587 break;
5588 }
5589 SKIP_BLANKS;
5590
5591 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5592 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005593 if (defaultValue != NULL)
5594 xmlFree(defaultValue);
5595 if (tree != NULL)
5596 xmlFreeEnumeration(tree);
5597 break;
5598 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005599 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5600 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005601
5602 GROW;
5603 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005604 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005605 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005606 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005607 if (defaultValue != NULL)
5608 xmlFree(defaultValue);
5609 if (tree != NULL)
5610 xmlFreeEnumeration(tree);
5611 break;
5612 }
5613 SKIP_BLANKS;
5614 }
5615 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005616 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5617 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005618 if (defaultValue != NULL)
5619 xmlFree(defaultValue);
5620 if (tree != NULL)
5621 xmlFreeEnumeration(tree);
5622 break;
5623 }
5624 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5625 (ctxt->sax->attributeDecl != NULL))
5626 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5627 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005628 else if (tree != NULL)
5629 xmlFreeEnumeration(tree);
5630
5631 if ((ctxt->sax2) && (defaultValue != NULL) &&
5632 (def != XML_ATTRIBUTE_IMPLIED) &&
5633 (def != XML_ATTRIBUTE_REQUIRED)) {
5634 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5635 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005636 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005637 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5638 }
Owen Taylor3473f882001-02-23 17:55:21 +00005639 if (defaultValue != NULL)
5640 xmlFree(defaultValue);
5641 GROW;
5642 }
5643 if (RAW == '>') {
5644 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005645 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5646 "Attribute list declaration doesn't start and stop in the same entity\n",
5647 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005648 }
5649 NEXT;
5650 }
Owen Taylor3473f882001-02-23 17:55:21 +00005651 }
5652}
5653
5654/**
5655 * xmlParseElementMixedContentDecl:
5656 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005657 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005658 *
5659 * parse the declaration for a Mixed Element content
5660 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5661 *
5662 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5663 * '(' S? '#PCDATA' S? ')'
5664 *
5665 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5666 *
5667 * [ VC: No Duplicate Types ]
5668 * The same name must not appear more than once in a single
5669 * mixed-content declaration.
5670 *
5671 * returns: the list of the xmlElementContentPtr describing the element choices
5672 */
5673xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005674xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005675 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005676 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005677
5678 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005679 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005680 SKIP(7);
5681 SKIP_BLANKS;
5682 SHRINK;
5683 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005684 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005685 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005687 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005688 }
Owen Taylor3473f882001-02-23 17:55:21 +00005689 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005690 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005691 if (ret == NULL)
5692 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005693 if (RAW == '*') {
5694 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5695 NEXT;
5696 }
5697 return(ret);
5698 }
5699 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005700 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005701 if (ret == NULL) return(NULL);
5702 }
5703 while (RAW == '|') {
5704 NEXT;
5705 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005706 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005707 if (ret == NULL) return(NULL);
5708 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005709 if (cur != NULL)
5710 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005711 cur = ret;
5712 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005713 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005714 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005715 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005716 if (n->c1 != NULL)
5717 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005718 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005719 if (n != NULL)
5720 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005721 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005722 }
5723 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005724 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005725 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005726 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005727 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005728 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005729 return(NULL);
5730 }
5731 SKIP_BLANKS;
5732 GROW;
5733 }
5734 if ((RAW == ')') && (NXT(1) == '*')) {
5735 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005736 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005737 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005738 if (cur->c2 != NULL)
5739 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005740 }
5741 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005742 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005743 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5744"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005745 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005746 }
Owen Taylor3473f882001-02-23 17:55:21 +00005747 SKIP(2);
5748 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005749 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005750 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005751 return(NULL);
5752 }
5753
5754 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005755 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005756 }
5757 return(ret);
5758}
5759
5760/**
5761 * xmlParseElementChildrenContentDecl:
5762 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005763 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005764 *
5765 * parse the declaration for a Mixed Element content
5766 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5767 *
5768 *
5769 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5770 *
5771 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5772 *
5773 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5774 *
5775 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5776 *
5777 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5778 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005779 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005780 * opening or closing parentheses in a choice, seq, or Mixed
5781 * construct is contained in the replacement text for a parameter
5782 * entity, both must be contained in the same replacement text. For
5783 * interoperability, if a parameter-entity reference appears in a
5784 * choice, seq, or Mixed construct, its replacement text should not
5785 * be empty, and neither the first nor last non-blank character of
5786 * the replacement text should be a connector (| or ,).
5787 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005788 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005789 * hierarchy.
5790 */
5791xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005792xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005793 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005794 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005795 xmlChar type = 0;
5796
5797 SKIP_BLANKS;
5798 GROW;
5799 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005800 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005801
Owen Taylor3473f882001-02-23 17:55:21 +00005802 /* Recurse on first child */
5803 NEXT;
5804 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005805 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005806 SKIP_BLANKS;
5807 GROW;
5808 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005809 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005810 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005811 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005812 return(NULL);
5813 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005814 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005815 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005816 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005817 return(NULL);
5818 }
Owen Taylor3473f882001-02-23 17:55:21 +00005819 GROW;
5820 if (RAW == '?') {
5821 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5822 NEXT;
5823 } else if (RAW == '*') {
5824 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5825 NEXT;
5826 } else if (RAW == '+') {
5827 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5828 NEXT;
5829 } else {
5830 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5831 }
Owen Taylor3473f882001-02-23 17:55:21 +00005832 GROW;
5833 }
5834 SKIP_BLANKS;
5835 SHRINK;
5836 while (RAW != ')') {
5837 /*
5838 * Each loop we parse one separator and one element.
5839 */
5840 if (RAW == ',') {
5841 if (type == 0) type = CUR;
5842
5843 /*
5844 * Detect "Name | Name , Name" error
5845 */
5846 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005847 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005848 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005849 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005850 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005851 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005852 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005853 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005854 return(NULL);
5855 }
5856 NEXT;
5857
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005858 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005859 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005860 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005861 xmlFreeDocElementContent(ctxt->myDoc, last);
5862 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005863 return(NULL);
5864 }
5865 if (last == NULL) {
5866 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005867 if (ret != NULL)
5868 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005869 ret = cur = op;
5870 } else {
5871 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005872 if (op != NULL)
5873 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005874 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005875 if (last != NULL)
5876 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005877 cur =op;
5878 last = NULL;
5879 }
5880 } else if (RAW == '|') {
5881 if (type == 0) type = CUR;
5882
5883 /*
5884 * Detect "Name , Name | Name" error
5885 */
5886 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005887 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005888 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005889 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005890 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005891 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005892 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005893 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005894 return(NULL);
5895 }
5896 NEXT;
5897
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005898 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005899 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005900 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005901 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005902 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005903 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005904 return(NULL);
5905 }
5906 if (last == NULL) {
5907 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005908 if (ret != NULL)
5909 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005910 ret = cur = op;
5911 } else {
5912 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005913 if (op != NULL)
5914 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005915 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005916 if (last != NULL)
5917 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005918 cur =op;
5919 last = NULL;
5920 }
5921 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005922 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005923 if ((last != NULL) && (last != ret))
5924 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005925 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005926 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005927 return(NULL);
5928 }
5929 GROW;
5930 SKIP_BLANKS;
5931 GROW;
5932 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005933 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005934 /* Recurse on second child */
5935 NEXT;
5936 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005937 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005938 SKIP_BLANKS;
5939 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005940 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005941 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005942 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005943 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005944 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005945 return(NULL);
5946 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005947 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005948 if (last == NULL) {
5949 if (ret != NULL)
5950 xmlFreeDocElementContent(ctxt->myDoc, ret);
5951 return(NULL);
5952 }
Owen Taylor3473f882001-02-23 17:55:21 +00005953 if (RAW == '?') {
5954 last->ocur = XML_ELEMENT_CONTENT_OPT;
5955 NEXT;
5956 } else if (RAW == '*') {
5957 last->ocur = XML_ELEMENT_CONTENT_MULT;
5958 NEXT;
5959 } else if (RAW == '+') {
5960 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5961 NEXT;
5962 } else {
5963 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5964 }
5965 }
5966 SKIP_BLANKS;
5967 GROW;
5968 }
5969 if ((cur != NULL) && (last != NULL)) {
5970 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005971 if (last != NULL)
5972 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005973 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005974 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005975 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5976"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005977 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005978 }
Owen Taylor3473f882001-02-23 17:55:21 +00005979 NEXT;
5980 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005981 if (ret != NULL) {
5982 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5983 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5984 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5985 else
5986 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5987 }
Owen Taylor3473f882001-02-23 17:55:21 +00005988 NEXT;
5989 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005990 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005991 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005992 cur = ret;
5993 /*
5994 * Some normalization:
5995 * (a | b* | c?)* == (a | b | c)*
5996 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005997 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005998 if ((cur->c1 != NULL) &&
5999 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6000 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6001 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6002 if ((cur->c2 != NULL) &&
6003 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6004 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6005 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6006 cur = cur->c2;
6007 }
6008 }
Owen Taylor3473f882001-02-23 17:55:21 +00006009 NEXT;
6010 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006011 if (ret != NULL) {
6012 int found = 0;
6013
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006014 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6015 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6016 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006017 else
6018 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006019 /*
6020 * Some normalization:
6021 * (a | b*)+ == (a | b)*
6022 * (a | b?)+ == (a | b)*
6023 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006024 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006025 if ((cur->c1 != NULL) &&
6026 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6027 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6028 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6029 found = 1;
6030 }
6031 if ((cur->c2 != NULL) &&
6032 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6033 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6034 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6035 found = 1;
6036 }
6037 cur = cur->c2;
6038 }
6039 if (found)
6040 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6041 }
Owen Taylor3473f882001-02-23 17:55:21 +00006042 NEXT;
6043 }
6044 return(ret);
6045}
6046
6047/**
6048 * xmlParseElementContentDecl:
6049 * @ctxt: an XML parser context
6050 * @name: the name of the element being defined.
6051 * @result: the Element Content pointer will be stored here if any
6052 *
6053 * parse the declaration for an Element content either Mixed or Children,
6054 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6055 *
6056 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6057 *
6058 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6059 */
6060
6061int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006062xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006063 xmlElementContentPtr *result) {
6064
6065 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006066 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006067 int res;
6068
6069 *result = NULL;
6070
6071 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006072 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006073 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006074 return(-1);
6075 }
6076 NEXT;
6077 GROW;
6078 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006079 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006080 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006081 res = XML_ELEMENT_TYPE_MIXED;
6082 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006083 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006084 res = XML_ELEMENT_TYPE_ELEMENT;
6085 }
Owen Taylor3473f882001-02-23 17:55:21 +00006086 SKIP_BLANKS;
6087 *result = tree;
6088 return(res);
6089}
6090
6091/**
6092 * xmlParseElementDecl:
6093 * @ctxt: an XML parser context
6094 *
6095 * parse an Element declaration.
6096 *
6097 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6098 *
6099 * [ VC: Unique Element Type Declaration ]
6100 * No element type may be declared more than once
6101 *
6102 * Returns the type of the element, or -1 in case of error
6103 */
6104int
6105xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006106 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006107 int ret = -1;
6108 xmlElementContentPtr content = NULL;
6109
Daniel Veillard4c778d82005-01-23 17:37:44 +00006110 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006111 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006112 xmlParserInputPtr input = ctxt->input;
6113
6114 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006115 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006116 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6117 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006118 }
6119 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006120 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006121 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006122 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6123 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006124 return(-1);
6125 }
6126 while ((RAW == 0) && (ctxt->inputNr > 1))
6127 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006128 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006129 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6130 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006131 }
6132 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006133 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006134 SKIP(5);
6135 /*
6136 * Element must always be empty.
6137 */
6138 ret = XML_ELEMENT_TYPE_EMPTY;
6139 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6140 (NXT(2) == 'Y')) {
6141 SKIP(3);
6142 /*
6143 * Element is a generic container.
6144 */
6145 ret = XML_ELEMENT_TYPE_ANY;
6146 } else if (RAW == '(') {
6147 ret = xmlParseElementContentDecl(ctxt, name, &content);
6148 } else {
6149 /*
6150 * [ WFC: PEs in Internal Subset ] error handling.
6151 */
6152 if ((RAW == '%') && (ctxt->external == 0) &&
6153 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006154 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006155 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006156 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006157 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006158 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6159 }
Owen Taylor3473f882001-02-23 17:55:21 +00006160 return(-1);
6161 }
6162
6163 SKIP_BLANKS;
6164 /*
6165 * Pop-up of finished entities.
6166 */
6167 while ((RAW == 0) && (ctxt->inputNr > 1))
6168 xmlPopInput(ctxt);
6169 SKIP_BLANKS;
6170
6171 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006172 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006173 if (content != NULL) {
6174 xmlFreeDocElementContent(ctxt->myDoc, content);
6175 }
Owen Taylor3473f882001-02-23 17:55:21 +00006176 } else {
6177 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006178 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6179 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006180 }
6181
6182 NEXT;
6183 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006184 (ctxt->sax->elementDecl != NULL)) {
6185 if (content != NULL)
6186 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006187 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6188 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006189 if ((content != NULL) && (content->parent == NULL)) {
6190 /*
6191 * this is a trick: if xmlAddElementDecl is called,
6192 * instead of copying the full tree it is plugged directly
6193 * if called from the parser. Avoid duplicating the
6194 * interfaces or change the API/ABI
6195 */
6196 xmlFreeDocElementContent(ctxt->myDoc, content);
6197 }
6198 } else if (content != NULL) {
6199 xmlFreeDocElementContent(ctxt->myDoc, content);
6200 }
Owen Taylor3473f882001-02-23 17:55:21 +00006201 }
Owen Taylor3473f882001-02-23 17:55:21 +00006202 }
6203 return(ret);
6204}
6205
6206/**
Owen Taylor3473f882001-02-23 17:55:21 +00006207 * xmlParseConditionalSections
6208 * @ctxt: an XML parser context
6209 *
6210 * [61] conditionalSect ::= includeSect | ignoreSect
6211 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6212 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6213 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6214 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6215 */
6216
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006217static void
Owen Taylor3473f882001-02-23 17:55:21 +00006218xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006219 int id = ctxt->input->id;
6220
Owen Taylor3473f882001-02-23 17:55:21 +00006221 SKIP(3);
6222 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006223 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006224 SKIP(7);
6225 SKIP_BLANKS;
6226 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006227 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006228 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006229 if (ctxt->input->id != id) {
6230 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6231 "All markup of the conditional section is not in the same entity\n",
6232 NULL, NULL);
6233 }
Owen Taylor3473f882001-02-23 17:55:21 +00006234 NEXT;
6235 }
6236 if (xmlParserDebugEntities) {
6237 if ((ctxt->input != NULL) && (ctxt->input->filename))
6238 xmlGenericError(xmlGenericErrorContext,
6239 "%s(%d): ", ctxt->input->filename,
6240 ctxt->input->line);
6241 xmlGenericError(xmlGenericErrorContext,
6242 "Entering INCLUDE Conditional Section\n");
6243 }
6244
6245 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6246 (NXT(2) != '>'))) {
6247 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006248 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006249
6250 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6251 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006252 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006253 NEXT;
6254 } else if (RAW == '%') {
6255 xmlParsePEReference(ctxt);
6256 } else
6257 xmlParseMarkupDecl(ctxt);
6258
6259 /*
6260 * Pop-up of finished entities.
6261 */
6262 while ((RAW == 0) && (ctxt->inputNr > 1))
6263 xmlPopInput(ctxt);
6264
Daniel Veillardfdc91562002-07-01 21:52:03 +00006265 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006266 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006267 break;
6268 }
6269 }
6270 if (xmlParserDebugEntities) {
6271 if ((ctxt->input != NULL) && (ctxt->input->filename))
6272 xmlGenericError(xmlGenericErrorContext,
6273 "%s(%d): ", ctxt->input->filename,
6274 ctxt->input->line);
6275 xmlGenericError(xmlGenericErrorContext,
6276 "Leaving INCLUDE Conditional Section\n");
6277 }
6278
Daniel Veillarda07050d2003-10-19 14:46:32 +00006279 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006280 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006281 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006282 int depth = 0;
6283
6284 SKIP(6);
6285 SKIP_BLANKS;
6286 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006287 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006288 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006289 if (ctxt->input->id != id) {
6290 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6291 "All markup of the conditional section is not in the same entity\n",
6292 NULL, NULL);
6293 }
Owen Taylor3473f882001-02-23 17:55:21 +00006294 NEXT;
6295 }
6296 if (xmlParserDebugEntities) {
6297 if ((ctxt->input != NULL) && (ctxt->input->filename))
6298 xmlGenericError(xmlGenericErrorContext,
6299 "%s(%d): ", ctxt->input->filename,
6300 ctxt->input->line);
6301 xmlGenericError(xmlGenericErrorContext,
6302 "Entering IGNORE Conditional Section\n");
6303 }
6304
6305 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006306 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006307 * But disable SAX event generating DTD building in the meantime
6308 */
6309 state = ctxt->disableSAX;
6310 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006311 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006312 ctxt->instate = XML_PARSER_IGNORE;
6313
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006314 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006315 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6316 depth++;
6317 SKIP(3);
6318 continue;
6319 }
6320 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6321 if (--depth >= 0) SKIP(3);
6322 continue;
6323 }
6324 NEXT;
6325 continue;
6326 }
6327
6328 ctxt->disableSAX = state;
6329 ctxt->instate = instate;
6330
6331 if (xmlParserDebugEntities) {
6332 if ((ctxt->input != NULL) && (ctxt->input->filename))
6333 xmlGenericError(xmlGenericErrorContext,
6334 "%s(%d): ", ctxt->input->filename,
6335 ctxt->input->line);
6336 xmlGenericError(xmlGenericErrorContext,
6337 "Leaving IGNORE Conditional Section\n");
6338 }
6339
6340 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006341 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006342 }
6343
6344 if (RAW == 0)
6345 SHRINK;
6346
6347 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006348 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006349 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006350 if (ctxt->input->id != id) {
6351 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6352 "All markup of the conditional section is not in the same entity\n",
6353 NULL, NULL);
6354 }
Owen Taylor3473f882001-02-23 17:55:21 +00006355 SKIP(3);
6356 }
6357}
6358
6359/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006360 * xmlParseMarkupDecl:
6361 * @ctxt: an XML parser context
6362 *
6363 * parse Markup declarations
6364 *
6365 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6366 * NotationDecl | PI | Comment
6367 *
6368 * [ VC: Proper Declaration/PE Nesting ]
6369 * Parameter-entity replacement text must be properly nested with
6370 * markup declarations. That is to say, if either the first character
6371 * or the last character of a markup declaration (markupdecl above) is
6372 * contained in the replacement text for a parameter-entity reference,
6373 * both must be contained in the same replacement text.
6374 *
6375 * [ WFC: PEs in Internal Subset ]
6376 * In the internal DTD subset, parameter-entity references can occur
6377 * only where markup declarations can occur, not within markup declarations.
6378 * (This does not apply to references that occur in external parameter
6379 * entities or to the external subset.)
6380 */
6381void
6382xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6383 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006384 if (CUR == '<') {
6385 if (NXT(1) == '!') {
6386 switch (NXT(2)) {
6387 case 'E':
6388 if (NXT(3) == 'L')
6389 xmlParseElementDecl(ctxt);
6390 else if (NXT(3) == 'N')
6391 xmlParseEntityDecl(ctxt);
6392 break;
6393 case 'A':
6394 xmlParseAttributeListDecl(ctxt);
6395 break;
6396 case 'N':
6397 xmlParseNotationDecl(ctxt);
6398 break;
6399 case '-':
6400 xmlParseComment(ctxt);
6401 break;
6402 default:
6403 /* there is an error but it will be detected later */
6404 break;
6405 }
6406 } else if (NXT(1) == '?') {
6407 xmlParsePI(ctxt);
6408 }
6409 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006410 /*
6411 * This is only for internal subset. On external entities,
6412 * the replacement is done before parsing stage
6413 */
6414 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6415 xmlParsePEReference(ctxt);
6416
6417 /*
6418 * Conditional sections are allowed from entities included
6419 * by PE References in the internal subset.
6420 */
6421 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6422 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6423 xmlParseConditionalSections(ctxt);
6424 }
6425 }
6426
6427 ctxt->instate = XML_PARSER_DTD;
6428}
6429
6430/**
6431 * xmlParseTextDecl:
6432 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006433 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006434 * parse an XML declaration header for external entities
6435 *
6436 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006437 */
6438
6439void
6440xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6441 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006442 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006443
6444 /*
6445 * We know that '<?xml' is here.
6446 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006447 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006448 SKIP(5);
6449 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006450 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006451 return;
6452 }
6453
William M. Brack76e95df2003-10-18 16:20:14 +00006454 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006455 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6456 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006457 }
6458 SKIP_BLANKS;
6459
6460 /*
6461 * We may have the VersionInfo here.
6462 */
6463 version = xmlParseVersionInfo(ctxt);
6464 if (version == NULL)
6465 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006466 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006467 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006468 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6469 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006470 }
6471 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006472 ctxt->input->version = version;
6473
6474 /*
6475 * We must have the encoding declaration
6476 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006477 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006478 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6479 /*
6480 * The XML REC instructs us to stop parsing right here
6481 */
6482 return;
6483 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006484 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6485 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6486 "Missing encoding in text declaration\n");
6487 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006488
6489 SKIP_BLANKS;
6490 if ((RAW == '?') && (NXT(1) == '>')) {
6491 SKIP(2);
6492 } else if (RAW == '>') {
6493 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006494 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006495 NEXT;
6496 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006497 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006498 MOVETO_ENDTAG(CUR_PTR);
6499 NEXT;
6500 }
6501}
6502
6503/**
Owen Taylor3473f882001-02-23 17:55:21 +00006504 * xmlParseExternalSubset:
6505 * @ctxt: an XML parser context
6506 * @ExternalID: the external identifier
6507 * @SystemID: the system identifier (or URL)
6508 *
6509 * parse Markup declarations from an external subset
6510 *
6511 * [30] extSubset ::= textDecl? extSubsetDecl
6512 *
6513 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6514 */
6515void
6516xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6517 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006518 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006519 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006520
6521 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6522 (ctxt->input->end - ctxt->input->cur >= 4)) {
6523 xmlChar start[4];
6524 xmlCharEncoding enc;
6525
6526 start[0] = RAW;
6527 start[1] = NXT(1);
6528 start[2] = NXT(2);
6529 start[3] = NXT(3);
6530 enc = xmlDetectCharEncoding(start, 4);
6531 if (enc != XML_CHAR_ENCODING_NONE)
6532 xmlSwitchEncoding(ctxt, enc);
6533 }
6534
Daniel Veillarda07050d2003-10-19 14:46:32 +00006535 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006536 xmlParseTextDecl(ctxt);
6537 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6538 /*
6539 * The XML REC instructs us to stop parsing right here
6540 */
6541 ctxt->instate = XML_PARSER_EOF;
6542 return;
6543 }
6544 }
6545 if (ctxt->myDoc == NULL) {
6546 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006547 if (ctxt->myDoc == NULL) {
6548 xmlErrMemory(ctxt, "New Doc failed");
6549 return;
6550 }
6551 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006552 }
6553 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6554 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6555
6556 ctxt->instate = XML_PARSER_DTD;
6557 ctxt->external = 1;
6558 while (((RAW == '<') && (NXT(1) == '?')) ||
6559 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006560 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006561 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006562 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006563
6564 GROW;
6565 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6566 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006567 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006568 NEXT;
6569 } else if (RAW == '%') {
6570 xmlParsePEReference(ctxt);
6571 } else
6572 xmlParseMarkupDecl(ctxt);
6573
6574 /*
6575 * Pop-up of finished entities.
6576 */
6577 while ((RAW == 0) && (ctxt->inputNr > 1))
6578 xmlPopInput(ctxt);
6579
Daniel Veillardfdc91562002-07-01 21:52:03 +00006580 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006581 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006582 break;
6583 }
6584 }
6585
6586 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006587 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006588 }
6589
6590}
6591
6592/**
6593 * xmlParseReference:
6594 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006595 *
Owen Taylor3473f882001-02-23 17:55:21 +00006596 * parse and handle entity references in content, depending on the SAX
6597 * interface, this may end-up in a call to character() if this is a
6598 * CharRef, a predefined entity, if there is no reference() callback.
6599 * or if the parser was asked to switch to that mode.
6600 *
6601 * [67] Reference ::= EntityRef | CharRef
6602 */
6603void
6604xmlParseReference(xmlParserCtxtPtr ctxt) {
6605 xmlEntityPtr ent;
6606 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006607 int was_checked;
6608 xmlNodePtr list = NULL;
6609 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006610
Daniel Veillard0161e632008-08-28 15:36:32 +00006611
6612 if (RAW != '&')
6613 return;
6614
6615 /*
6616 * Simple case of a CharRef
6617 */
Owen Taylor3473f882001-02-23 17:55:21 +00006618 if (NXT(1) == '#') {
6619 int i = 0;
6620 xmlChar out[10];
6621 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006622 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006623
Daniel Veillarddc171602008-03-26 17:41:38 +00006624 if (value == 0)
6625 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006626 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6627 /*
6628 * So we are using non-UTF-8 buffers
6629 * Check that the char fit on 8bits, if not
6630 * generate a CharRef.
6631 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006632 if (value <= 0xFF) {
6633 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006634 out[1] = 0;
6635 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6636 (!ctxt->disableSAX))
6637 ctxt->sax->characters(ctxt->userData, out, 1);
6638 } else {
6639 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006640 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006641 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006642 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006643 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6644 (!ctxt->disableSAX))
6645 ctxt->sax->reference(ctxt->userData, out);
6646 }
6647 } else {
6648 /*
6649 * Just encode the value in UTF-8
6650 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006651 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006652 out[i] = 0;
6653 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6654 (!ctxt->disableSAX))
6655 ctxt->sax->characters(ctxt->userData, out, i);
6656 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006657 return;
6658 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006659
Daniel Veillard0161e632008-08-28 15:36:32 +00006660 /*
6661 * We are seeing an entity reference
6662 */
6663 ent = xmlParseEntityRef(ctxt);
6664 if (ent == NULL) return;
6665 if (!ctxt->wellFormed)
6666 return;
6667 was_checked = ent->checked;
6668
6669 /* special case of predefined entities */
6670 if ((ent->name == NULL) ||
6671 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6672 val = ent->content;
6673 if (val == NULL) return;
6674 /*
6675 * inline the entity.
6676 */
6677 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6678 (!ctxt->disableSAX))
6679 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6680 return;
6681 }
6682
6683 /*
6684 * The first reference to the entity trigger a parsing phase
6685 * where the ent->children is filled with the result from
6686 * the parsing.
6687 */
6688 if (ent->checked == 0) {
6689 unsigned long oldnbent = ctxt->nbentities;
6690
6691 /*
6692 * This is a bit hackish but this seems the best
6693 * way to make sure both SAX and DOM entity support
6694 * behaves okay.
6695 */
6696 void *user_data;
6697 if (ctxt->userData == ctxt)
6698 user_data = NULL;
6699 else
6700 user_data = ctxt->userData;
6701
6702 /*
6703 * Check that this entity is well formed
6704 * 4.3.2: An internal general parsed entity is well-formed
6705 * if its replacement text matches the production labeled
6706 * content.
6707 */
6708 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6709 ctxt->depth++;
6710 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6711 user_data, &list);
6712 ctxt->depth--;
6713
6714 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6715 ctxt->depth++;
6716 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6717 user_data, ctxt->depth, ent->URI,
6718 ent->ExternalID, &list);
6719 ctxt->depth--;
6720 } else {
6721 ret = XML_ERR_ENTITY_PE_INTERNAL;
6722 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6723 "invalid entity type found\n", NULL);
6724 }
6725
6726 /*
6727 * Store the number of entities needing parsing for this entity
6728 * content and do checkings
6729 */
6730 ent->checked = ctxt->nbentities - oldnbent;
6731 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006732 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006733 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006734 return;
6735 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006736 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6737 xmlFreeNodeList(list);
6738 return;
6739 }
Owen Taylor3473f882001-02-23 17:55:21 +00006740
Daniel Veillard0161e632008-08-28 15:36:32 +00006741 if ((ret == XML_ERR_OK) && (list != NULL)) {
6742 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6743 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6744 (ent->children == NULL)) {
6745 ent->children = list;
6746 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006747 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006748 * Prune it directly in the generated document
6749 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006750 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006751 if (((list->type == XML_TEXT_NODE) &&
6752 (list->next == NULL)) ||
6753 (ctxt->parseMode == XML_PARSE_READER)) {
6754 list->parent = (xmlNodePtr) ent;
6755 list = NULL;
6756 ent->owner = 1;
6757 } else {
6758 ent->owner = 0;
6759 while (list != NULL) {
6760 list->parent = (xmlNodePtr) ctxt->node;
6761 list->doc = ctxt->myDoc;
6762 if (list->next == NULL)
6763 ent->last = list;
6764 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006765 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006766 list = ent->children;
6767#ifdef LIBXML_LEGACY_ENABLED
6768 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6769 xmlAddEntityReference(ent, list, NULL);
6770#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006771 }
6772 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006773 ent->owner = 1;
6774 while (list != NULL) {
6775 list->parent = (xmlNodePtr) ent;
6776 if (list->next == NULL)
6777 ent->last = list;
6778 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006779 }
6780 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006781 } else {
6782 xmlFreeNodeList(list);
6783 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006784 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006785 } else if ((ret != XML_ERR_OK) &&
6786 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6787 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6788 "Entity '%s' failed to parse\n", ent->name);
6789 } else if (list != NULL) {
6790 xmlFreeNodeList(list);
6791 list = NULL;
6792 }
6793 if (ent->checked == 0)
6794 ent->checked = 1;
6795 } else if (ent->checked != 1) {
6796 ctxt->nbentities += ent->checked;
6797 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006798
Daniel Veillard0161e632008-08-28 15:36:32 +00006799 /*
6800 * Now that the entity content has been gathered
6801 * provide it to the application, this can take different forms based
6802 * on the parsing modes.
6803 */
6804 if (ent->children == NULL) {
6805 /*
6806 * Probably running in SAX mode and the callbacks don't
6807 * build the entity content. So unless we already went
6808 * though parsing for first checking go though the entity
6809 * content to generate callbacks associated to the entity
6810 */
6811 if (was_checked != 0) {
6812 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006813 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006814 * This is a bit hackish but this seems the best
6815 * way to make sure both SAX and DOM entity support
6816 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006817 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006818 if (ctxt->userData == ctxt)
6819 user_data = NULL;
6820 else
6821 user_data = ctxt->userData;
6822
6823 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6824 ctxt->depth++;
6825 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6826 ent->content, user_data, NULL);
6827 ctxt->depth--;
6828 } else if (ent->etype ==
6829 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6830 ctxt->depth++;
6831 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6832 ctxt->sax, user_data, ctxt->depth,
6833 ent->URI, ent->ExternalID, NULL);
6834 ctxt->depth--;
6835 } else {
6836 ret = XML_ERR_ENTITY_PE_INTERNAL;
6837 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6838 "invalid entity type found\n", NULL);
6839 }
6840 if (ret == XML_ERR_ENTITY_LOOP) {
6841 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6842 return;
6843 }
6844 }
6845 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6846 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6847 /*
6848 * Entity reference callback comes second, it's somewhat
6849 * superfluous but a compatibility to historical behaviour
6850 */
6851 ctxt->sax->reference(ctxt->userData, ent->name);
6852 }
6853 return;
6854 }
6855
6856 /*
6857 * If we didn't get any children for the entity being built
6858 */
6859 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6860 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6861 /*
6862 * Create a node.
6863 */
6864 ctxt->sax->reference(ctxt->userData, ent->name);
6865 return;
6866 }
6867
6868 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6869 /*
6870 * There is a problem on the handling of _private for entities
6871 * (bug 155816): Should we copy the content of the field from
6872 * the entity (possibly overwriting some value set by the user
6873 * when a copy is created), should we leave it alone, or should
6874 * we try to take care of different situations? The problem
6875 * is exacerbated by the usage of this field by the xmlReader.
6876 * To fix this bug, we look at _private on the created node
6877 * and, if it's NULL, we copy in whatever was in the entity.
6878 * If it's not NULL we leave it alone. This is somewhat of a
6879 * hack - maybe we should have further tests to determine
6880 * what to do.
6881 */
6882 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6883 /*
6884 * Seems we are generating the DOM content, do
6885 * a simple tree copy for all references except the first
6886 * In the first occurrence list contains the replacement.
6887 * progressive == 2 means we are operating on the Reader
6888 * and since nodes are discarded we must copy all the time.
6889 */
6890 if (((list == NULL) && (ent->owner == 0)) ||
6891 (ctxt->parseMode == XML_PARSE_READER)) {
6892 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6893
6894 /*
6895 * when operating on a reader, the entities definitions
6896 * are always owning the entities subtree.
6897 if (ctxt->parseMode == XML_PARSE_READER)
6898 ent->owner = 1;
6899 */
6900
6901 cur = ent->children;
6902 while (cur != NULL) {
6903 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6904 if (nw != NULL) {
6905 if (nw->_private == NULL)
6906 nw->_private = cur->_private;
6907 if (firstChild == NULL){
6908 firstChild = nw;
6909 }
6910 nw = xmlAddChild(ctxt->node, nw);
6911 }
6912 if (cur == ent->last) {
6913 /*
6914 * needed to detect some strange empty
6915 * node cases in the reader tests
6916 */
6917 if ((ctxt->parseMode == XML_PARSE_READER) &&
6918 (nw != NULL) &&
6919 (nw->type == XML_ELEMENT_NODE) &&
6920 (nw->children == NULL))
6921 nw->extra = 1;
6922
6923 break;
6924 }
6925 cur = cur->next;
6926 }
6927#ifdef LIBXML_LEGACY_ENABLED
6928 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6929 xmlAddEntityReference(ent, firstChild, nw);
6930#endif /* LIBXML_LEGACY_ENABLED */
6931 } else if (list == NULL) {
6932 xmlNodePtr nw = NULL, cur, next, last,
6933 firstChild = NULL;
6934 /*
6935 * Copy the entity child list and make it the new
6936 * entity child list. The goal is to make sure any
6937 * ID or REF referenced will be the one from the
6938 * document content and not the entity copy.
6939 */
6940 cur = ent->children;
6941 ent->children = NULL;
6942 last = ent->last;
6943 ent->last = NULL;
6944 while (cur != NULL) {
6945 next = cur->next;
6946 cur->next = NULL;
6947 cur->parent = NULL;
6948 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6949 if (nw != NULL) {
6950 if (nw->_private == NULL)
6951 nw->_private = cur->_private;
6952 if (firstChild == NULL){
6953 firstChild = cur;
6954 }
6955 xmlAddChild((xmlNodePtr) ent, nw);
6956 xmlAddChild(ctxt->node, cur);
6957 }
6958 if (cur == last)
6959 break;
6960 cur = next;
6961 }
Daniel Veillardcba68392008-08-29 12:43:40 +00006962 if (ent->owner == 0)
6963 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00006964#ifdef LIBXML_LEGACY_ENABLED
6965 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6966 xmlAddEntityReference(ent, firstChild, nw);
6967#endif /* LIBXML_LEGACY_ENABLED */
6968 } else {
6969 const xmlChar *nbktext;
6970
6971 /*
6972 * the name change is to avoid coalescing of the
6973 * node with a possible previous text one which
6974 * would make ent->children a dangling pointer
6975 */
6976 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6977 -1);
6978 if (ent->children->type == XML_TEXT_NODE)
6979 ent->children->name = nbktext;
6980 if ((ent->last != ent->children) &&
6981 (ent->last->type == XML_TEXT_NODE))
6982 ent->last->name = nbktext;
6983 xmlAddChildList(ctxt->node, ent->children);
6984 }
6985
6986 /*
6987 * This is to avoid a nasty side effect, see
6988 * characters() in SAX.c
6989 */
6990 ctxt->nodemem = 0;
6991 ctxt->nodelen = 0;
6992 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006993 }
6994 }
6995}
6996
6997/**
6998 * xmlParseEntityRef:
6999 * @ctxt: an XML parser context
7000 *
7001 * parse ENTITY references declarations
7002 *
7003 * [68] EntityRef ::= '&' Name ';'
7004 *
7005 * [ WFC: Entity Declared ]
7006 * In a document without any DTD, a document with only an internal DTD
7007 * subset which contains no parameter entity references, or a document
7008 * with "standalone='yes'", the Name given in the entity reference
7009 * must match that in an entity declaration, except that well-formed
7010 * documents need not declare any of the following entities: amp, lt,
7011 * gt, apos, quot. The declaration of a parameter entity must precede
7012 * any reference to it. Similarly, the declaration of a general entity
7013 * must precede any reference to it which appears in a default value in an
7014 * attribute-list declaration. Note that if entities are declared in the
7015 * external subset or in external parameter entities, a non-validating
7016 * processor is not obligated to read and process their declarations;
7017 * for such documents, the rule that an entity must be declared is a
7018 * well-formedness constraint only if standalone='yes'.
7019 *
7020 * [ WFC: Parsed Entity ]
7021 * An entity reference must not contain the name of an unparsed entity
7022 *
7023 * Returns the xmlEntityPtr if found, or NULL otherwise.
7024 */
7025xmlEntityPtr
7026xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007027 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007028 xmlEntityPtr ent = NULL;
7029
7030 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007031
Daniel Veillard0161e632008-08-28 15:36:32 +00007032 if (RAW != '&')
7033 return(NULL);
7034 NEXT;
7035 name = xmlParseName(ctxt);
7036 if (name == NULL) {
7037 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7038 "xmlParseEntityRef: no name\n");
7039 return(NULL);
7040 }
7041 if (RAW != ';') {
7042 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7043 return(NULL);
7044 }
7045 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007046
Daniel Veillard0161e632008-08-28 15:36:32 +00007047 /*
7048 * Predefined entites override any extra definition
7049 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007050 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7051 ent = xmlGetPredefinedEntity(name);
7052 if (ent != NULL)
7053 return(ent);
7054 }
Owen Taylor3473f882001-02-23 17:55:21 +00007055
Daniel Veillard0161e632008-08-28 15:36:32 +00007056 /*
7057 * Increate the number of entity references parsed
7058 */
7059 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007060
Daniel Veillard0161e632008-08-28 15:36:32 +00007061 /*
7062 * Ask first SAX for entity resolution, otherwise try the
7063 * entities which may have stored in the parser context.
7064 */
7065 if (ctxt->sax != NULL) {
7066 if (ctxt->sax->getEntity != NULL)
7067 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007068 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7069 (ctxt->options & XML_PARSE_OLDSAX))
7070 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007071 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7072 (ctxt->userData==ctxt)) {
7073 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007074 }
7075 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007076 /*
7077 * [ WFC: Entity Declared ]
7078 * In a document without any DTD, a document with only an
7079 * internal DTD subset which contains no parameter entity
7080 * references, or a document with "standalone='yes'", the
7081 * Name given in the entity reference must match that in an
7082 * entity declaration, except that well-formed documents
7083 * need not declare any of the following entities: amp, lt,
7084 * gt, apos, quot.
7085 * The declaration of a parameter entity must precede any
7086 * reference to it.
7087 * Similarly, the declaration of a general entity must
7088 * precede any reference to it which appears in a default
7089 * value in an attribute-list declaration. Note that if
7090 * entities are declared in the external subset or in
7091 * external parameter entities, a non-validating processor
7092 * is not obligated to read and process their declarations;
7093 * for such documents, the rule that an entity must be
7094 * declared is a well-formedness constraint only if
7095 * standalone='yes'.
7096 */
7097 if (ent == NULL) {
7098 if ((ctxt->standalone == 1) ||
7099 ((ctxt->hasExternalSubset == 0) &&
7100 (ctxt->hasPErefs == 0))) {
7101 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7102 "Entity '%s' not defined\n", name);
7103 } else {
7104 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7105 "Entity '%s' not defined\n", name);
7106 if ((ctxt->inSubset == 0) &&
7107 (ctxt->sax != NULL) &&
7108 (ctxt->sax->reference != NULL)) {
7109 ctxt->sax->reference(ctxt->userData, name);
7110 }
7111 }
7112 ctxt->valid = 0;
7113 }
7114
7115 /*
7116 * [ WFC: Parsed Entity ]
7117 * An entity reference must not contain the name of an
7118 * unparsed entity
7119 */
7120 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7121 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7122 "Entity reference to unparsed entity %s\n", name);
7123 }
7124
7125 /*
7126 * [ WFC: No External Entity References ]
7127 * Attribute values cannot contain direct or indirect
7128 * entity references to external entities.
7129 */
7130 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7131 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7132 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7133 "Attribute references external entity '%s'\n", name);
7134 }
7135 /*
7136 * [ WFC: No < in Attribute Values ]
7137 * The replacement text of any entity referred to directly or
7138 * indirectly in an attribute value (other than "&lt;") must
7139 * not contain a <.
7140 */
7141 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7142 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007143 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007144 (xmlStrchr(ent->content, '<'))) {
7145 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7146 "'<' in entity '%s' is not allowed in attributes values\n", name);
7147 }
7148
7149 /*
7150 * Internal check, no parameter entities here ...
7151 */
7152 else {
7153 switch (ent->etype) {
7154 case XML_INTERNAL_PARAMETER_ENTITY:
7155 case XML_EXTERNAL_PARAMETER_ENTITY:
7156 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7157 "Attempt to reference the parameter entity '%s'\n",
7158 name);
7159 break;
7160 default:
7161 break;
7162 }
7163 }
7164
7165 /*
7166 * [ WFC: No Recursion ]
7167 * A parsed entity must not contain a recursive reference
7168 * to itself, either directly or indirectly.
7169 * Done somewhere else
7170 */
Owen Taylor3473f882001-02-23 17:55:21 +00007171 return(ent);
7172}
7173
7174/**
7175 * xmlParseStringEntityRef:
7176 * @ctxt: an XML parser context
7177 * @str: a pointer to an index in the string
7178 *
7179 * parse ENTITY references declarations, but this version parses it from
7180 * a string value.
7181 *
7182 * [68] EntityRef ::= '&' Name ';'
7183 *
7184 * [ WFC: Entity Declared ]
7185 * In a document without any DTD, a document with only an internal DTD
7186 * subset which contains no parameter entity references, or a document
7187 * with "standalone='yes'", the Name given in the entity reference
7188 * must match that in an entity declaration, except that well-formed
7189 * documents need not declare any of the following entities: amp, lt,
7190 * gt, apos, quot. The declaration of a parameter entity must precede
7191 * any reference to it. Similarly, the declaration of a general entity
7192 * must precede any reference to it which appears in a default value in an
7193 * attribute-list declaration. Note that if entities are declared in the
7194 * external subset or in external parameter entities, a non-validating
7195 * processor is not obligated to read and process their declarations;
7196 * for such documents, the rule that an entity must be declared is a
7197 * well-formedness constraint only if standalone='yes'.
7198 *
7199 * [ WFC: Parsed Entity ]
7200 * An entity reference must not contain the name of an unparsed entity
7201 *
7202 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7203 * is updated to the current location in the string.
7204 */
7205xmlEntityPtr
7206xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7207 xmlChar *name;
7208 const xmlChar *ptr;
7209 xmlChar cur;
7210 xmlEntityPtr ent = NULL;
7211
7212 if ((str == NULL) || (*str == NULL))
7213 return(NULL);
7214 ptr = *str;
7215 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007216 if (cur != '&')
7217 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007218
Daniel Veillard0161e632008-08-28 15:36:32 +00007219 ptr++;
7220 cur = *ptr;
7221 name = xmlParseStringName(ctxt, &ptr);
7222 if (name == NULL) {
7223 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7224 "xmlParseStringEntityRef: no name\n");
7225 *str = ptr;
7226 return(NULL);
7227 }
7228 if (*ptr != ';') {
7229 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007230 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007231 *str = ptr;
7232 return(NULL);
7233 }
7234 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007235
Owen Taylor3473f882001-02-23 17:55:21 +00007236
Daniel Veillard0161e632008-08-28 15:36:32 +00007237 /*
7238 * Predefined entites override any extra definition
7239 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007240 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7241 ent = xmlGetPredefinedEntity(name);
7242 if (ent != NULL) {
7243 xmlFree(name);
7244 *str = ptr;
7245 return(ent);
7246 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007247 }
Owen Taylor3473f882001-02-23 17:55:21 +00007248
Daniel Veillard0161e632008-08-28 15:36:32 +00007249 /*
7250 * Increate the number of entity references parsed
7251 */
7252 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007253
Daniel Veillard0161e632008-08-28 15:36:32 +00007254 /*
7255 * Ask first SAX for entity resolution, otherwise try the
7256 * entities which may have stored in the parser context.
7257 */
7258 if (ctxt->sax != NULL) {
7259 if (ctxt->sax->getEntity != NULL)
7260 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007261 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7262 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007263 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7264 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007265 }
7266 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007267
7268 /*
7269 * [ WFC: Entity Declared ]
7270 * In a document without any DTD, a document with only an
7271 * internal DTD subset which contains no parameter entity
7272 * references, or a document with "standalone='yes'", the
7273 * Name given in the entity reference must match that in an
7274 * entity declaration, except that well-formed documents
7275 * need not declare any of the following entities: amp, lt,
7276 * gt, apos, quot.
7277 * The declaration of a parameter entity must precede any
7278 * reference to it.
7279 * Similarly, the declaration of a general entity must
7280 * precede any reference to it which appears in a default
7281 * value in an attribute-list declaration. Note that if
7282 * entities are declared in the external subset or in
7283 * external parameter entities, a non-validating processor
7284 * is not obligated to read and process their declarations;
7285 * for such documents, the rule that an entity must be
7286 * declared is a well-formedness constraint only if
7287 * standalone='yes'.
7288 */
7289 if (ent == NULL) {
7290 if ((ctxt->standalone == 1) ||
7291 ((ctxt->hasExternalSubset == 0) &&
7292 (ctxt->hasPErefs == 0))) {
7293 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7294 "Entity '%s' not defined\n", name);
7295 } else {
7296 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7297 "Entity '%s' not defined\n",
7298 name);
7299 }
7300 /* TODO ? check regressions ctxt->valid = 0; */
7301 }
7302
7303 /*
7304 * [ WFC: Parsed Entity ]
7305 * An entity reference must not contain the name of an
7306 * unparsed entity
7307 */
7308 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7309 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7310 "Entity reference to unparsed entity %s\n", name);
7311 }
7312
7313 /*
7314 * [ WFC: No External Entity References ]
7315 * Attribute values cannot contain direct or indirect
7316 * entity references to external entities.
7317 */
7318 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7319 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7320 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7321 "Attribute references external entity '%s'\n", name);
7322 }
7323 /*
7324 * [ WFC: No < in Attribute Values ]
7325 * The replacement text of any entity referred to directly or
7326 * indirectly in an attribute value (other than "&lt;") must
7327 * not contain a <.
7328 */
7329 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7330 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007331 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007332 (xmlStrchr(ent->content, '<'))) {
7333 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7334 "'<' in entity '%s' is not allowed in attributes values\n",
7335 name);
7336 }
7337
7338 /*
7339 * Internal check, no parameter entities here ...
7340 */
7341 else {
7342 switch (ent->etype) {
7343 case XML_INTERNAL_PARAMETER_ENTITY:
7344 case XML_EXTERNAL_PARAMETER_ENTITY:
7345 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7346 "Attempt to reference the parameter entity '%s'\n",
7347 name);
7348 break;
7349 default:
7350 break;
7351 }
7352 }
7353
7354 /*
7355 * [ WFC: No Recursion ]
7356 * A parsed entity must not contain a recursive reference
7357 * to itself, either directly or indirectly.
7358 * Done somewhere else
7359 */
7360
7361 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007362 *str = ptr;
7363 return(ent);
7364}
7365
7366/**
7367 * xmlParsePEReference:
7368 * @ctxt: an XML parser context
7369 *
7370 * parse PEReference declarations
7371 * The entity content is handled directly by pushing it's content as
7372 * a new input stream.
7373 *
7374 * [69] PEReference ::= '%' Name ';'
7375 *
7376 * [ WFC: No Recursion ]
7377 * A parsed entity must not contain a recursive
7378 * reference to itself, either directly or indirectly.
7379 *
7380 * [ WFC: Entity Declared ]
7381 * In a document without any DTD, a document with only an internal DTD
7382 * subset which contains no parameter entity references, or a document
7383 * with "standalone='yes'", ... ... The declaration of a parameter
7384 * entity must precede any reference to it...
7385 *
7386 * [ VC: Entity Declared ]
7387 * In a document with an external subset or external parameter entities
7388 * with "standalone='no'", ... ... The declaration of a parameter entity
7389 * must precede any reference to it...
7390 *
7391 * [ WFC: In DTD ]
7392 * Parameter-entity references may only appear in the DTD.
7393 * NOTE: misleading but this is handled.
7394 */
7395void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007396xmlParsePEReference(xmlParserCtxtPtr ctxt)
7397{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007398 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007399 xmlEntityPtr entity = NULL;
7400 xmlParserInputPtr input;
7401
Daniel Veillard0161e632008-08-28 15:36:32 +00007402 if (RAW != '%')
7403 return;
7404 NEXT;
7405 name = xmlParseName(ctxt);
7406 if (name == NULL) {
7407 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7408 "xmlParsePEReference: no name\n");
7409 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007410 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007411 if (RAW != ';') {
7412 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7413 return;
7414 }
7415
7416 NEXT;
7417
7418 /*
7419 * Increate the number of entity references parsed
7420 */
7421 ctxt->nbentities++;
7422
7423 /*
7424 * Request the entity from SAX
7425 */
7426 if ((ctxt->sax != NULL) &&
7427 (ctxt->sax->getParameterEntity != NULL))
7428 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7429 name);
7430 if (entity == NULL) {
7431 /*
7432 * [ WFC: Entity Declared ]
7433 * In a document without any DTD, a document with only an
7434 * internal DTD subset which contains no parameter entity
7435 * references, or a document with "standalone='yes'", ...
7436 * ... The declaration of a parameter entity must precede
7437 * any reference to it...
7438 */
7439 if ((ctxt->standalone == 1) ||
7440 ((ctxt->hasExternalSubset == 0) &&
7441 (ctxt->hasPErefs == 0))) {
7442 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7443 "PEReference: %%%s; not found\n",
7444 name);
7445 } else {
7446 /*
7447 * [ VC: Entity Declared ]
7448 * In a document with an external subset or external
7449 * parameter entities with "standalone='no'", ...
7450 * ... The declaration of a parameter entity must
7451 * precede any reference to it...
7452 */
7453 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7454 "PEReference: %%%s; not found\n",
7455 name, NULL);
7456 ctxt->valid = 0;
7457 }
7458 } else {
7459 /*
7460 * Internal checking in case the entity quest barfed
7461 */
7462 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7463 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7464 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7465 "Internal: %%%s; is not a parameter entity\n",
7466 name, NULL);
7467 } else if (ctxt->input->free != deallocblankswrapper) {
7468 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7469 if (xmlPushInput(ctxt, input) < 0)
7470 return;
7471 } else {
7472 /*
7473 * TODO !!!
7474 * handle the extra spaces added before and after
7475 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7476 */
7477 input = xmlNewEntityInputStream(ctxt, entity);
7478 if (xmlPushInput(ctxt, input) < 0)
7479 return;
7480 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7481 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7482 (IS_BLANK_CH(NXT(5)))) {
7483 xmlParseTextDecl(ctxt);
7484 if (ctxt->errNo ==
7485 XML_ERR_UNSUPPORTED_ENCODING) {
7486 /*
7487 * The XML REC instructs us to stop parsing
7488 * right here
7489 */
7490 ctxt->instate = XML_PARSER_EOF;
7491 return;
7492 }
7493 }
7494 }
7495 }
7496 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007497}
7498
7499/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007500 * xmlLoadEntityContent:
7501 * @ctxt: an XML parser context
7502 * @entity: an unloaded system entity
7503 *
7504 * Load the original content of the given system entity from the
7505 * ExternalID/SystemID given. This is to be used for Included in Literal
7506 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7507 *
7508 * Returns 0 in case of success and -1 in case of failure
7509 */
7510static int
7511xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7512 xmlParserInputPtr input;
7513 xmlBufferPtr buf;
7514 int l, c;
7515 int count = 0;
7516
7517 if ((ctxt == NULL) || (entity == NULL) ||
7518 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7519 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7520 (entity->content != NULL)) {
7521 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7522 "xmlLoadEntityContent parameter error");
7523 return(-1);
7524 }
7525
7526 if (xmlParserDebugEntities)
7527 xmlGenericError(xmlGenericErrorContext,
7528 "Reading %s entity content input\n", entity->name);
7529
7530 buf = xmlBufferCreate();
7531 if (buf == NULL) {
7532 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7533 "xmlLoadEntityContent parameter error");
7534 return(-1);
7535 }
7536
7537 input = xmlNewEntityInputStream(ctxt, entity);
7538 if (input == NULL) {
7539 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7540 "xmlLoadEntityContent input error");
7541 xmlBufferFree(buf);
7542 return(-1);
7543 }
7544
7545 /*
7546 * Push the entity as the current input, read char by char
7547 * saving to the buffer until the end of the entity or an error
7548 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007549 if (xmlPushInput(ctxt, input) < 0) {
7550 xmlBufferFree(buf);
7551 return(-1);
7552 }
7553
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007554 GROW;
7555 c = CUR_CHAR(l);
7556 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7557 (IS_CHAR(c))) {
7558 xmlBufferAdd(buf, ctxt->input->cur, l);
7559 if (count++ > 100) {
7560 count = 0;
7561 GROW;
7562 }
7563 NEXTL(l);
7564 c = CUR_CHAR(l);
7565 }
7566
7567 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7568 xmlPopInput(ctxt);
7569 } else if (!IS_CHAR(c)) {
7570 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7571 "xmlLoadEntityContent: invalid char value %d\n",
7572 c);
7573 xmlBufferFree(buf);
7574 return(-1);
7575 }
7576 entity->content = buf->content;
7577 buf->content = NULL;
7578 xmlBufferFree(buf);
7579
7580 return(0);
7581}
7582
7583/**
Owen Taylor3473f882001-02-23 17:55:21 +00007584 * xmlParseStringPEReference:
7585 * @ctxt: an XML parser context
7586 * @str: a pointer to an index in the string
7587 *
7588 * parse PEReference declarations
7589 *
7590 * [69] PEReference ::= '%' Name ';'
7591 *
7592 * [ WFC: No Recursion ]
7593 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007594 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007595 *
7596 * [ WFC: Entity Declared ]
7597 * In a document without any DTD, a document with only an internal DTD
7598 * subset which contains no parameter entity references, or a document
7599 * with "standalone='yes'", ... ... The declaration of a parameter
7600 * entity must precede any reference to it...
7601 *
7602 * [ VC: Entity Declared ]
7603 * In a document with an external subset or external parameter entities
7604 * with "standalone='no'", ... ... The declaration of a parameter entity
7605 * must precede any reference to it...
7606 *
7607 * [ WFC: In DTD ]
7608 * Parameter-entity references may only appear in the DTD.
7609 * NOTE: misleading but this is handled.
7610 *
7611 * Returns the string of the entity content.
7612 * str is updated to the current value of the index
7613 */
7614xmlEntityPtr
7615xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7616 const xmlChar *ptr;
7617 xmlChar cur;
7618 xmlChar *name;
7619 xmlEntityPtr entity = NULL;
7620
7621 if ((str == NULL) || (*str == NULL)) return(NULL);
7622 ptr = *str;
7623 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007624 if (cur != '%')
7625 return(NULL);
7626 ptr++;
7627 cur = *ptr;
7628 name = xmlParseStringName(ctxt, &ptr);
7629 if (name == NULL) {
7630 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7631 "xmlParseStringPEReference: no name\n");
7632 *str = ptr;
7633 return(NULL);
7634 }
7635 cur = *ptr;
7636 if (cur != ';') {
7637 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638 xmlFree(name);
7639 *str = ptr;
7640 return(NULL);
7641 }
7642 ptr++;
7643
7644 /*
7645 * Increate the number of entity references parsed
7646 */
7647 ctxt->nbentities++;
7648
7649 /*
7650 * Request the entity from SAX
7651 */
7652 if ((ctxt->sax != NULL) &&
7653 (ctxt->sax->getParameterEntity != NULL))
7654 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7655 name);
7656 if (entity == NULL) {
7657 /*
7658 * [ WFC: Entity Declared ]
7659 * In a document without any DTD, a document with only an
7660 * internal DTD subset which contains no parameter entity
7661 * references, or a document with "standalone='yes'", ...
7662 * ... The declaration of a parameter entity must precede
7663 * any reference to it...
7664 */
7665 if ((ctxt->standalone == 1) ||
7666 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7667 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7668 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007669 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007670 /*
7671 * [ VC: Entity Declared ]
7672 * In a document with an external subset or external
7673 * parameter entities with "standalone='no'", ...
7674 * ... The declaration of a parameter entity must
7675 * precede any reference to it...
7676 */
7677 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7678 "PEReference: %%%s; not found\n",
7679 name, NULL);
7680 ctxt->valid = 0;
7681 }
7682 } else {
7683 /*
7684 * Internal checking in case the entity quest barfed
7685 */
7686 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7687 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7688 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7689 "%%%s; is not a parameter entity\n",
7690 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007691 }
7692 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007693 ctxt->hasPErefs = 1;
7694 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007695 *str = ptr;
7696 return(entity);
7697}
7698
7699/**
7700 * xmlParseDocTypeDecl:
7701 * @ctxt: an XML parser context
7702 *
7703 * parse a DOCTYPE declaration
7704 *
7705 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7706 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7707 *
7708 * [ VC: Root Element Type ]
7709 * The Name in the document type declaration must match the element
7710 * type of the root element.
7711 */
7712
7713void
7714xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007715 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007716 xmlChar *ExternalID = NULL;
7717 xmlChar *URI = NULL;
7718
7719 /*
7720 * We know that '<!DOCTYPE' has been detected.
7721 */
7722 SKIP(9);
7723
7724 SKIP_BLANKS;
7725
7726 /*
7727 * Parse the DOCTYPE name.
7728 */
7729 name = xmlParseName(ctxt);
7730 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007731 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7732 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007733 }
7734 ctxt->intSubName = name;
7735
7736 SKIP_BLANKS;
7737
7738 /*
7739 * Check for SystemID and ExternalID
7740 */
7741 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7742
7743 if ((URI != NULL) || (ExternalID != NULL)) {
7744 ctxt->hasExternalSubset = 1;
7745 }
7746 ctxt->extSubURI = URI;
7747 ctxt->extSubSystem = ExternalID;
7748
7749 SKIP_BLANKS;
7750
7751 /*
7752 * Create and update the internal subset.
7753 */
7754 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7755 (!ctxt->disableSAX))
7756 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7757
7758 /*
7759 * Is there any internal subset declarations ?
7760 * they are handled separately in xmlParseInternalSubset()
7761 */
7762 if (RAW == '[')
7763 return;
7764
7765 /*
7766 * We should be at the end of the DOCTYPE declaration.
7767 */
7768 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007769 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007770 }
7771 NEXT;
7772}
7773
7774/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007775 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007776 * @ctxt: an XML parser context
7777 *
7778 * parse the internal subset declaration
7779 *
7780 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7781 */
7782
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007783static void
Owen Taylor3473f882001-02-23 17:55:21 +00007784xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7785 /*
7786 * Is there any DTD definition ?
7787 */
7788 if (RAW == '[') {
7789 ctxt->instate = XML_PARSER_DTD;
7790 NEXT;
7791 /*
7792 * Parse the succession of Markup declarations and
7793 * PEReferences.
7794 * Subsequence (markupdecl | PEReference | S)*
7795 */
7796 while (RAW != ']') {
7797 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007798 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007799
7800 SKIP_BLANKS;
7801 xmlParseMarkupDecl(ctxt);
7802 xmlParsePEReference(ctxt);
7803
7804 /*
7805 * Pop-up of finished entities.
7806 */
7807 while ((RAW == 0) && (ctxt->inputNr > 1))
7808 xmlPopInput(ctxt);
7809
7810 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007811 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007812 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007813 break;
7814 }
7815 }
7816 if (RAW == ']') {
7817 NEXT;
7818 SKIP_BLANKS;
7819 }
7820 }
7821
7822 /*
7823 * We should be at the end of the DOCTYPE declaration.
7824 */
7825 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007826 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007827 }
7828 NEXT;
7829}
7830
Daniel Veillard81273902003-09-30 00:43:48 +00007831#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007832/**
7833 * xmlParseAttribute:
7834 * @ctxt: an XML parser context
7835 * @value: a xmlChar ** used to store the value of the attribute
7836 *
7837 * parse an attribute
7838 *
7839 * [41] Attribute ::= Name Eq AttValue
7840 *
7841 * [ WFC: No External Entity References ]
7842 * Attribute values cannot contain direct or indirect entity references
7843 * to external entities.
7844 *
7845 * [ WFC: No < in Attribute Values ]
7846 * The replacement text of any entity referred to directly or indirectly in
7847 * an attribute value (other than "&lt;") must not contain a <.
7848 *
7849 * [ VC: Attribute Value Type ]
7850 * The attribute must have been declared; the value must be of the type
7851 * declared for it.
7852 *
7853 * [25] Eq ::= S? '=' S?
7854 *
7855 * With namespace:
7856 *
7857 * [NS 11] Attribute ::= QName Eq AttValue
7858 *
7859 * Also the case QName == xmlns:??? is handled independently as a namespace
7860 * definition.
7861 *
7862 * Returns the attribute name, and the value in *value.
7863 */
7864
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007865const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007866xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007867 const xmlChar *name;
7868 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007869
7870 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007871 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007872 name = xmlParseName(ctxt);
7873 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007874 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007875 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007876 return(NULL);
7877 }
7878
7879 /*
7880 * read the value
7881 */
7882 SKIP_BLANKS;
7883 if (RAW == '=') {
7884 NEXT;
7885 SKIP_BLANKS;
7886 val = xmlParseAttValue(ctxt);
7887 ctxt->instate = XML_PARSER_CONTENT;
7888 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007889 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007890 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007891 return(NULL);
7892 }
7893
7894 /*
7895 * Check that xml:lang conforms to the specification
7896 * No more registered as an error, just generate a warning now
7897 * since this was deprecated in XML second edition
7898 */
7899 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7900 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007901 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7902 "Malformed value for xml:lang : %s\n",
7903 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007904 }
7905 }
7906
7907 /*
7908 * Check that xml:space conforms to the specification
7909 */
7910 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7911 if (xmlStrEqual(val, BAD_CAST "default"))
7912 *(ctxt->space) = 0;
7913 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7914 *(ctxt->space) = 1;
7915 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007916 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007917"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007918 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007919 }
7920 }
7921
7922 *value = val;
7923 return(name);
7924}
7925
7926/**
7927 * xmlParseStartTag:
7928 * @ctxt: an XML parser context
7929 *
7930 * parse a start of tag either for rule element or
7931 * EmptyElement. In both case we don't parse the tag closing chars.
7932 *
7933 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7934 *
7935 * [ WFC: Unique Att Spec ]
7936 * No attribute name may appear more than once in the same start-tag or
7937 * empty-element tag.
7938 *
7939 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7940 *
7941 * [ WFC: Unique Att Spec ]
7942 * No attribute name may appear more than once in the same start-tag or
7943 * empty-element tag.
7944 *
7945 * With namespace:
7946 *
7947 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7948 *
7949 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7950 *
7951 * Returns the element name parsed
7952 */
7953
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007954const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007955xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007956 const xmlChar *name;
7957 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007958 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007959 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007960 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007961 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007962 int i;
7963
7964 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007965 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007966
7967 name = xmlParseName(ctxt);
7968 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007969 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007970 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007971 return(NULL);
7972 }
7973
7974 /*
7975 * Now parse the attributes, it ends up with the ending
7976 *
7977 * (S Attribute)* S?
7978 */
7979 SKIP_BLANKS;
7980 GROW;
7981
Daniel Veillard21a0f912001-02-25 19:54:14 +00007982 while ((RAW != '>') &&
7983 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007984 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007985 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007986 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007987
7988 attname = xmlParseAttribute(ctxt, &attvalue);
7989 if ((attname != NULL) && (attvalue != NULL)) {
7990 /*
7991 * [ WFC: Unique Att Spec ]
7992 * No attribute name may appear more than once in the same
7993 * start-tag or empty-element tag.
7994 */
7995 for (i = 0; i < nbatts;i += 2) {
7996 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007997 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007998 xmlFree(attvalue);
7999 goto failed;
8000 }
8001 }
Owen Taylor3473f882001-02-23 17:55:21 +00008002 /*
8003 * Add the pair to atts
8004 */
8005 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008006 maxatts = 22; /* allow for 10 attrs by default */
8007 atts = (const xmlChar **)
8008 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008009 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008010 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008011 if (attvalue != NULL)
8012 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008013 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008014 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008015 ctxt->atts = atts;
8016 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008017 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008018 const xmlChar **n;
8019
Owen Taylor3473f882001-02-23 17:55:21 +00008020 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008021 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008022 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008023 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008024 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008025 if (attvalue != NULL)
8026 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008027 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008028 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008029 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008030 ctxt->atts = atts;
8031 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008032 }
8033 atts[nbatts++] = attname;
8034 atts[nbatts++] = attvalue;
8035 atts[nbatts] = NULL;
8036 atts[nbatts + 1] = NULL;
8037 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008038 if (attvalue != NULL)
8039 xmlFree(attvalue);
8040 }
8041
8042failed:
8043
Daniel Veillard3772de32002-12-17 10:31:45 +00008044 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008045 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8046 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008047 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008048 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8049 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008050 }
8051 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008052 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8053 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008054 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8055 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008056 break;
8057 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008058 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008059 GROW;
8060 }
8061
8062 /*
8063 * SAX: Start of Element !
8064 */
8065 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008066 (!ctxt->disableSAX)) {
8067 if (nbatts > 0)
8068 ctxt->sax->startElement(ctxt->userData, name, atts);
8069 else
8070 ctxt->sax->startElement(ctxt->userData, name, NULL);
8071 }
Owen Taylor3473f882001-02-23 17:55:21 +00008072
8073 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008074 /* Free only the content strings */
8075 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008076 if (atts[i] != NULL)
8077 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008078 }
8079 return(name);
8080}
8081
8082/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008083 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008084 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008085 * @line: line of the start tag
8086 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008087 *
8088 * parse an end of tag
8089 *
8090 * [42] ETag ::= '</' Name S? '>'
8091 *
8092 * With namespace
8093 *
8094 * [NS 9] ETag ::= '</' QName S? '>'
8095 */
8096
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008097static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008098xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008099 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008100
8101 GROW;
8102 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008103 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008104 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008105 return;
8106 }
8107 SKIP(2);
8108
Daniel Veillard46de64e2002-05-29 08:21:33 +00008109 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008110
8111 /*
8112 * We should definitely be at the ending "S? '>'" part
8113 */
8114 GROW;
8115 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008116 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008117 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008118 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008119 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008120
8121 /*
8122 * [ WFC: Element Type Match ]
8123 * The Name in an element's end-tag must match the element type in the
8124 * start-tag.
8125 *
8126 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008127 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008128 if (name == NULL) name = BAD_CAST "unparseable";
8129 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008130 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008131 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008132 }
8133
8134 /*
8135 * SAX: End of Tag
8136 */
8137 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8138 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008139 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008140
Daniel Veillarde57ec792003-09-10 10:50:59 +00008141 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008142 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008143 return;
8144}
8145
8146/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008147 * xmlParseEndTag:
8148 * @ctxt: an XML parser context
8149 *
8150 * parse an end of tag
8151 *
8152 * [42] ETag ::= '</' Name S? '>'
8153 *
8154 * With namespace
8155 *
8156 * [NS 9] ETag ::= '</' QName S? '>'
8157 */
8158
8159void
8160xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008161 xmlParseEndTag1(ctxt, 0);
8162}
Daniel Veillard81273902003-09-30 00:43:48 +00008163#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008164
8165/************************************************************************
8166 * *
8167 * SAX 2 specific operations *
8168 * *
8169 ************************************************************************/
8170
Daniel Veillard0fb18932003-09-07 09:14:37 +00008171/*
8172 * xmlGetNamespace:
8173 * @ctxt: an XML parser context
8174 * @prefix: the prefix to lookup
8175 *
8176 * Lookup the namespace name for the @prefix (which ca be NULL)
8177 * The prefix must come from the @ctxt->dict dictionnary
8178 *
8179 * Returns the namespace name or NULL if not bound
8180 */
8181static const xmlChar *
8182xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8183 int i;
8184
Daniel Veillarde57ec792003-09-10 10:50:59 +00008185 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008186 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008187 if (ctxt->nsTab[i] == prefix) {
8188 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8189 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008190 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008191 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008192 return(NULL);
8193}
8194
8195/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008196 * xmlParseQName:
8197 * @ctxt: an XML parser context
8198 * @prefix: pointer to store the prefix part
8199 *
8200 * parse an XML Namespace QName
8201 *
8202 * [6] QName ::= (Prefix ':')? LocalPart
8203 * [7] Prefix ::= NCName
8204 * [8] LocalPart ::= NCName
8205 *
8206 * Returns the Name parsed or NULL
8207 */
8208
8209static const xmlChar *
8210xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8211 const xmlChar *l, *p;
8212
8213 GROW;
8214
8215 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008216 if (l == NULL) {
8217 if (CUR == ':') {
8218 l = xmlParseName(ctxt);
8219 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008220 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8221 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008222 *prefix = NULL;
8223 return(l);
8224 }
8225 }
8226 return(NULL);
8227 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008228 if (CUR == ':') {
8229 NEXT;
8230 p = l;
8231 l = xmlParseNCName(ctxt);
8232 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008233 xmlChar *tmp;
8234
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008235 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8236 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008237 l = xmlParseNmtoken(ctxt);
8238 if (l == NULL)
8239 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8240 else {
8241 tmp = xmlBuildQName(l, p, NULL, 0);
8242 xmlFree((char *)l);
8243 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008244 p = xmlDictLookup(ctxt->dict, tmp, -1);
8245 if (tmp != NULL) xmlFree(tmp);
8246 *prefix = NULL;
8247 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008248 }
8249 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008250 xmlChar *tmp;
8251
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008252 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8253 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008254 NEXT;
8255 tmp = (xmlChar *) xmlParseName(ctxt);
8256 if (tmp != NULL) {
8257 tmp = xmlBuildQName(tmp, l, NULL, 0);
8258 l = xmlDictLookup(ctxt->dict, tmp, -1);
8259 if (tmp != NULL) xmlFree(tmp);
8260 *prefix = p;
8261 return(l);
8262 }
8263 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8264 l = xmlDictLookup(ctxt->dict, tmp, -1);
8265 if (tmp != NULL) xmlFree(tmp);
8266 *prefix = p;
8267 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008268 }
8269 *prefix = p;
8270 } else
8271 *prefix = NULL;
8272 return(l);
8273}
8274
8275/**
8276 * xmlParseQNameAndCompare:
8277 * @ctxt: an XML parser context
8278 * @name: the localname
8279 * @prefix: the prefix, if any.
8280 *
8281 * parse an XML name and compares for match
8282 * (specialized for endtag parsing)
8283 *
8284 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8285 * and the name for mismatch
8286 */
8287
8288static const xmlChar *
8289xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8290 xmlChar const *prefix) {
8291 const xmlChar *cmp = name;
8292 const xmlChar *in;
8293 const xmlChar *ret;
8294 const xmlChar *prefix2;
8295
8296 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8297
8298 GROW;
8299 in = ctxt->input->cur;
8300
8301 cmp = prefix;
8302 while (*in != 0 && *in == *cmp) {
8303 ++in;
8304 ++cmp;
8305 }
8306 if ((*cmp == 0) && (*in == ':')) {
8307 in++;
8308 cmp = name;
8309 while (*in != 0 && *in == *cmp) {
8310 ++in;
8311 ++cmp;
8312 }
William M. Brack76e95df2003-10-18 16:20:14 +00008313 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008314 /* success */
8315 ctxt->input->cur = in;
8316 return((const xmlChar*) 1);
8317 }
8318 }
8319 /*
8320 * all strings coms from the dictionary, equality can be done directly
8321 */
8322 ret = xmlParseQName (ctxt, &prefix2);
8323 if ((ret == name) && (prefix == prefix2))
8324 return((const xmlChar*) 1);
8325 return ret;
8326}
8327
8328/**
8329 * xmlParseAttValueInternal:
8330 * @ctxt: an XML parser context
8331 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008332 * @alloc: whether the attribute was reallocated as a new string
8333 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008334 *
8335 * parse a value for an attribute.
8336 * NOTE: if no normalization is needed, the routine will return pointers
8337 * directly from the data buffer.
8338 *
8339 * 3.3.3 Attribute-Value Normalization:
8340 * Before the value of an attribute is passed to the application or
8341 * checked for validity, the XML processor must normalize it as follows:
8342 * - a character reference is processed by appending the referenced
8343 * character to the attribute value
8344 * - an entity reference is processed by recursively processing the
8345 * replacement text of the entity
8346 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8347 * appending #x20 to the normalized value, except that only a single
8348 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8349 * parsed entity or the literal entity value of an internal parsed entity
8350 * - other characters are processed by appending them to the normalized value
8351 * If the declared value is not CDATA, then the XML processor must further
8352 * process the normalized attribute value by discarding any leading and
8353 * trailing space (#x20) characters, and by replacing sequences of space
8354 * (#x20) characters by a single space (#x20) character.
8355 * All attributes for which no declaration has been read should be treated
8356 * by a non-validating parser as if declared CDATA.
8357 *
8358 * Returns the AttValue parsed or NULL. The value has to be freed by the
8359 * caller if it was copied, this can be detected by val[*len] == 0.
8360 */
8361
8362static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008363xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8364 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008365{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008366 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008367 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008368 xmlChar *ret = NULL;
8369
8370 GROW;
8371 in = (xmlChar *) CUR_PTR;
8372 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008373 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008374 return (NULL);
8375 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008376 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008377
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008378 /*
8379 * try to handle in this routine the most common case where no
8380 * allocation of a new string is required and where content is
8381 * pure ASCII.
8382 */
8383 limit = *in++;
8384 end = ctxt->input->end;
8385 start = in;
8386 if (in >= end) {
8387 const xmlChar *oldbase = ctxt->input->base;
8388 GROW;
8389 if (oldbase != ctxt->input->base) {
8390 long delta = ctxt->input->base - oldbase;
8391 start = start + delta;
8392 in = in + delta;
8393 }
8394 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008395 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008396 if (normalize) {
8397 /*
8398 * Skip any leading spaces
8399 */
8400 while ((in < end) && (*in != limit) &&
8401 ((*in == 0x20) || (*in == 0x9) ||
8402 (*in == 0xA) || (*in == 0xD))) {
8403 in++;
8404 start = in;
8405 if (in >= end) {
8406 const xmlChar *oldbase = ctxt->input->base;
8407 GROW;
8408 if (oldbase != ctxt->input->base) {
8409 long delta = ctxt->input->base - oldbase;
8410 start = start + delta;
8411 in = in + delta;
8412 }
8413 end = ctxt->input->end;
8414 }
8415 }
8416 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8417 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8418 if ((*in++ == 0x20) && (*in == 0x20)) break;
8419 if (in >= end) {
8420 const xmlChar *oldbase = ctxt->input->base;
8421 GROW;
8422 if (oldbase != ctxt->input->base) {
8423 long delta = ctxt->input->base - oldbase;
8424 start = start + delta;
8425 in = in + delta;
8426 }
8427 end = ctxt->input->end;
8428 }
8429 }
8430 last = in;
8431 /*
8432 * skip the trailing blanks
8433 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008434 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008435 while ((in < end) && (*in != limit) &&
8436 ((*in == 0x20) || (*in == 0x9) ||
8437 (*in == 0xA) || (*in == 0xD))) {
8438 in++;
8439 if (in >= end) {
8440 const xmlChar *oldbase = ctxt->input->base;
8441 GROW;
8442 if (oldbase != ctxt->input->base) {
8443 long delta = ctxt->input->base - oldbase;
8444 start = start + delta;
8445 in = in + delta;
8446 last = last + delta;
8447 }
8448 end = ctxt->input->end;
8449 }
8450 }
8451 if (*in != limit) goto need_complex;
8452 } else {
8453 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8454 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8455 in++;
8456 if (in >= end) {
8457 const xmlChar *oldbase = ctxt->input->base;
8458 GROW;
8459 if (oldbase != ctxt->input->base) {
8460 long delta = ctxt->input->base - oldbase;
8461 start = start + delta;
8462 in = in + delta;
8463 }
8464 end = ctxt->input->end;
8465 }
8466 }
8467 last = in;
8468 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008469 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008470 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008471 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008472 *len = last - start;
8473 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008474 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008475 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008476 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008477 }
8478 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008479 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008480 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008481need_complex:
8482 if (alloc) *alloc = 1;
8483 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008484}
8485
8486/**
8487 * xmlParseAttribute2:
8488 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008489 * @pref: the element prefix
8490 * @elem: the element name
8491 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008492 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008493 * @len: an int * to save the length of the attribute
8494 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008495 *
8496 * parse an attribute in the new SAX2 framework.
8497 *
8498 * Returns the attribute name, and the value in *value, .
8499 */
8500
8501static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008502xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008503 const xmlChar * pref, const xmlChar * elem,
8504 const xmlChar ** prefix, xmlChar ** value,
8505 int *len, int *alloc)
8506{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008507 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008508 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008509 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008510
8511 *value = NULL;
8512 GROW;
8513 name = xmlParseQName(ctxt, prefix);
8514 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008515 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8516 "error parsing attribute name\n");
8517 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008518 }
8519
8520 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008521 * get the type if needed
8522 */
8523 if (ctxt->attsSpecial != NULL) {
8524 int type;
8525
8526 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008527 pref, elem, *prefix, name);
8528 if (type != 0)
8529 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008530 }
8531
8532 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008533 * read the value
8534 */
8535 SKIP_BLANKS;
8536 if (RAW == '=') {
8537 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008538 SKIP_BLANKS;
8539 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8540 if (normalize) {
8541 /*
8542 * Sometimes a second normalisation pass for spaces is needed
8543 * but that only happens if charrefs or entities refernces
8544 * have been used in the attribute value, i.e. the attribute
8545 * value have been extracted in an allocated string already.
8546 */
8547 if (*alloc) {
8548 const xmlChar *val2;
8549
8550 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008551 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008552 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008553 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008554 }
8555 }
8556 }
8557 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008558 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008559 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8560 "Specification mandate value for attribute %s\n",
8561 name);
8562 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008563 }
8564
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008565 if (*prefix == ctxt->str_xml) {
8566 /*
8567 * Check that xml:lang conforms to the specification
8568 * No more registered as an error, just generate a warning now
8569 * since this was deprecated in XML second edition
8570 */
8571 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8572 internal_val = xmlStrndup(val, *len);
8573 if (!xmlCheckLanguageID(internal_val)) {
8574 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8575 "Malformed value for xml:lang : %s\n",
8576 internal_val, NULL);
8577 }
8578 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008579
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008580 /*
8581 * Check that xml:space conforms to the specification
8582 */
8583 if (xmlStrEqual(name, BAD_CAST "space")) {
8584 internal_val = xmlStrndup(val, *len);
8585 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8586 *(ctxt->space) = 0;
8587 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8588 *(ctxt->space) = 1;
8589 else {
8590 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8591 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8592 internal_val, NULL);
8593 }
8594 }
8595 if (internal_val) {
8596 xmlFree(internal_val);
8597 }
8598 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008599
8600 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008601 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008602}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008603/**
8604 * xmlParseStartTag2:
8605 * @ctxt: an XML parser context
8606 *
8607 * parse a start of tag either for rule element or
8608 * EmptyElement. In both case we don't parse the tag closing chars.
8609 * This routine is called when running SAX2 parsing
8610 *
8611 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8612 *
8613 * [ WFC: Unique Att Spec ]
8614 * No attribute name may appear more than once in the same start-tag or
8615 * empty-element tag.
8616 *
8617 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8618 *
8619 * [ WFC: Unique Att Spec ]
8620 * No attribute name may appear more than once in the same start-tag or
8621 * empty-element tag.
8622 *
8623 * With namespace:
8624 *
8625 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8626 *
8627 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8628 *
8629 * Returns the element name parsed
8630 */
8631
8632static const xmlChar *
8633xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008634 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008635 const xmlChar *localname;
8636 const xmlChar *prefix;
8637 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008638 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008639 const xmlChar *nsname;
8640 xmlChar *attvalue;
8641 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008642 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008643 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008644 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008645 const xmlChar *base;
8646 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008647 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008648
8649 if (RAW != '<') return(NULL);
8650 NEXT1;
8651
8652 /*
8653 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8654 * point since the attribute values may be stored as pointers to
8655 * the buffer and calling SHRINK would destroy them !
8656 * The Shrinking is only possible once the full set of attribute
8657 * callbacks have been done.
8658 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008659reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008660 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008661 base = ctxt->input->base;
8662 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008663 oldline = ctxt->input->line;
8664 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008665 nbatts = 0;
8666 nratts = 0;
8667 nbdef = 0;
8668 nbNs = 0;
8669 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008670 /* Forget any namespaces added during an earlier parse of this element. */
8671 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008672
8673 localname = xmlParseQName(ctxt, &prefix);
8674 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008675 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8676 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008677 return(NULL);
8678 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008679 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008680
8681 /*
8682 * Now parse the attributes, it ends up with the ending
8683 *
8684 * (S Attribute)* S?
8685 */
8686 SKIP_BLANKS;
8687 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008688 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008689
8690 while ((RAW != '>') &&
8691 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008692 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008693 const xmlChar *q = CUR_PTR;
8694 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008695 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008696
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008697 attname = xmlParseAttribute2(ctxt, prefix, localname,
8698 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008699 if (ctxt->input->base != base) {
8700 if ((attvalue != NULL) && (alloc != 0))
8701 xmlFree(attvalue);
8702 attvalue = NULL;
8703 goto base_changed;
8704 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008705 if ((attname != NULL) && (attvalue != NULL)) {
8706 if (len < 0) len = xmlStrlen(attvalue);
8707 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008708 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8709 xmlURIPtr uri;
8710
8711 if (*URL != 0) {
8712 uri = xmlParseURI((const char *) URL);
8713 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008714 xmlNsErr(ctxt, XML_WAR_NS_URI,
8715 "xmlns: '%s' is not a valid URI\n",
8716 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008717 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008718 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008719 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8720 "xmlns: URI %s is not absolute\n",
8721 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008722 }
8723 xmlFreeURI(uri);
8724 }
Daniel Veillard37334572008-07-31 08:20:02 +00008725 if (URL == ctxt->str_xml_ns) {
8726 if (attname != ctxt->str_xml) {
8727 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8728 "xml namespace URI cannot be the default namespace\n",
8729 NULL, NULL, NULL);
8730 }
8731 goto skip_default_ns;
8732 }
8733 if ((len == 29) &&
8734 (xmlStrEqual(URL,
8735 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8736 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8737 "reuse of the xmlns namespace name is forbidden\n",
8738 NULL, NULL, NULL);
8739 goto skip_default_ns;
8740 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008741 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008742 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008743 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008744 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008745 for (j = 1;j <= nbNs;j++)
8746 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8747 break;
8748 if (j <= nbNs)
8749 xmlErrAttributeDup(ctxt, NULL, attname);
8750 else
8751 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008752skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008753 if (alloc != 0) xmlFree(attvalue);
8754 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008755 continue;
8756 }
8757 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008758 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8759 xmlURIPtr uri;
8760
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008761 if (attname == ctxt->str_xml) {
8762 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008763 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8764 "xml namespace prefix mapped to wrong URI\n",
8765 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008766 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008767 /*
8768 * Do not keep a namespace definition node
8769 */
Daniel Veillard37334572008-07-31 08:20:02 +00008770 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008771 }
Daniel Veillard37334572008-07-31 08:20:02 +00008772 if (URL == ctxt->str_xml_ns) {
8773 if (attname != ctxt->str_xml) {
8774 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8775 "xml namespace URI mapped to wrong prefix\n",
8776 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008777 }
Daniel Veillard37334572008-07-31 08:20:02 +00008778 goto skip_ns;
8779 }
8780 if (attname == ctxt->str_xmlns) {
8781 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8782 "redefinition of the xmlns prefix is forbidden\n",
8783 NULL, NULL, NULL);
8784 goto skip_ns;
8785 }
8786 if ((len == 29) &&
8787 (xmlStrEqual(URL,
8788 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8789 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8790 "reuse of the xmlns namespace name is forbidden\n",
8791 NULL, NULL, NULL);
8792 goto skip_ns;
8793 }
8794 if ((URL == NULL) || (URL[0] == 0)) {
8795 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8796 "xmlns:%s: Empty XML namespace is not allowed\n",
8797 attname, NULL, NULL);
8798 goto skip_ns;
8799 } else {
8800 uri = xmlParseURI((const char *) URL);
8801 if (uri == NULL) {
8802 xmlNsErr(ctxt, XML_WAR_NS_URI,
8803 "xmlns:%s: '%s' is not a valid URI\n",
8804 attname, URL, NULL);
8805 } else {
8806 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8807 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8808 "xmlns:%s: URI %s is not absolute\n",
8809 attname, URL, NULL);
8810 }
8811 xmlFreeURI(uri);
8812 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008813 }
8814
Daniel Veillard0fb18932003-09-07 09:14:37 +00008815 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008816 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008817 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008818 for (j = 1;j <= nbNs;j++)
8819 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8820 break;
8821 if (j <= nbNs)
8822 xmlErrAttributeDup(ctxt, aprefix, attname);
8823 else
8824 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008825skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008826 if (alloc != 0) xmlFree(attvalue);
8827 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008828 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008829 continue;
8830 }
8831
8832 /*
8833 * Add the pair to atts
8834 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008835 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8836 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008837 if (attvalue[len] == 0)
8838 xmlFree(attvalue);
8839 goto failed;
8840 }
8841 maxatts = ctxt->maxatts;
8842 atts = ctxt->atts;
8843 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008844 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008845 atts[nbatts++] = attname;
8846 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008847 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008848 atts[nbatts++] = attvalue;
8849 attvalue += len;
8850 atts[nbatts++] = attvalue;
8851 /*
8852 * tag if some deallocation is needed
8853 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008854 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008855 } else {
8856 if ((attvalue != NULL) && (attvalue[len] == 0))
8857 xmlFree(attvalue);
8858 }
8859
Daniel Veillard37334572008-07-31 08:20:02 +00008860failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008861
8862 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008863 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008864 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8865 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008866 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008867 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8868 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008869 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008870 }
8871 SKIP_BLANKS;
8872 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8873 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008874 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008875 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008876 break;
8877 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008878 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008879 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008880 }
8881
Daniel Veillard0fb18932003-09-07 09:14:37 +00008882 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008883 * The attributes defaulting
8884 */
8885 if (ctxt->attsDefault != NULL) {
8886 xmlDefAttrsPtr defaults;
8887
8888 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8889 if (defaults != NULL) {
8890 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008891 attname = defaults->values[5 * i];
8892 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008893
8894 /*
8895 * special work for namespaces defaulted defs
8896 */
8897 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8898 /*
8899 * check that it's not a defined namespace
8900 */
8901 for (j = 1;j <= nbNs;j++)
8902 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8903 break;
8904 if (j <= nbNs) continue;
8905
8906 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008907 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008908 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008909 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008910 nbNs++;
8911 }
8912 } else if (aprefix == ctxt->str_xmlns) {
8913 /*
8914 * check that it's not a defined namespace
8915 */
8916 for (j = 1;j <= nbNs;j++)
8917 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8918 break;
8919 if (j <= nbNs) continue;
8920
8921 nsname = xmlGetNamespace(ctxt, attname);
8922 if (nsname != defaults->values[2]) {
8923 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008924 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008925 nbNs++;
8926 }
8927 } else {
8928 /*
8929 * check that it's not a defined attribute
8930 */
8931 for (j = 0;j < nbatts;j+=5) {
8932 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8933 break;
8934 }
8935 if (j < nbatts) continue;
8936
8937 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8938 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008939 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008940 }
8941 maxatts = ctxt->maxatts;
8942 atts = ctxt->atts;
8943 }
8944 atts[nbatts++] = attname;
8945 atts[nbatts++] = aprefix;
8946 if (aprefix == NULL)
8947 atts[nbatts++] = NULL;
8948 else
8949 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008950 atts[nbatts++] = defaults->values[5 * i + 2];
8951 atts[nbatts++] = defaults->values[5 * i + 3];
8952 if ((ctxt->standalone == 1) &&
8953 (defaults->values[5 * i + 4] != NULL)) {
8954 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
8955 "standalone: attribute %s on %s defaulted from external subset\n",
8956 attname, localname);
8957 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008958 nbdef++;
8959 }
8960 }
8961 }
8962 }
8963
Daniel Veillarde70c8772003-11-25 07:21:18 +00008964 /*
8965 * The attributes checkings
8966 */
8967 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008968 /*
8969 * The default namespace does not apply to attribute names.
8970 */
8971 if (atts[i + 1] != NULL) {
8972 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8973 if (nsname == NULL) {
8974 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8975 "Namespace prefix %s for %s on %s is not defined\n",
8976 atts[i + 1], atts[i], localname);
8977 }
8978 atts[i + 2] = nsname;
8979 } else
8980 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008981 /*
8982 * [ WFC: Unique Att Spec ]
8983 * No attribute name may appear more than once in the same
8984 * start-tag or empty-element tag.
8985 * As extended by the Namespace in XML REC.
8986 */
8987 for (j = 0; j < i;j += 5) {
8988 if (atts[i] == atts[j]) {
8989 if (atts[i+1] == atts[j+1]) {
8990 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8991 break;
8992 }
8993 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8994 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8995 "Namespaced Attribute %s in '%s' redefined\n",
8996 atts[i], nsname, NULL);
8997 break;
8998 }
8999 }
9000 }
9001 }
9002
Daniel Veillarde57ec792003-09-10 10:50:59 +00009003 nsname = xmlGetNamespace(ctxt, prefix);
9004 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009005 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9006 "Namespace prefix %s on %s is not defined\n",
9007 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009008 }
9009 *pref = prefix;
9010 *URI = nsname;
9011
9012 /*
9013 * SAX: Start of Element !
9014 */
9015 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9016 (!ctxt->disableSAX)) {
9017 if (nbNs > 0)
9018 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9019 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9020 nbatts / 5, nbdef, atts);
9021 else
9022 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9023 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9024 }
9025
9026 /*
9027 * Free up attribute allocated strings if needed
9028 */
9029 if (attval != 0) {
9030 for (i = 3,j = 0; j < nratts;i += 5,j++)
9031 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9032 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009033 }
9034
9035 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009036
9037base_changed:
9038 /*
9039 * the attribute strings are valid iif the base didn't changed
9040 */
9041 if (attval != 0) {
9042 for (i = 3,j = 0; j < nratts;i += 5,j++)
9043 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9044 xmlFree((xmlChar *) atts[i]);
9045 }
9046 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009047 ctxt->input->line = oldline;
9048 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009049 if (ctxt->wellFormed == 1) {
9050 goto reparse;
9051 }
9052 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009053}
9054
9055/**
9056 * xmlParseEndTag2:
9057 * @ctxt: an XML parser context
9058 * @line: line of the start tag
9059 * @nsNr: number of namespaces on the start tag
9060 *
9061 * parse an end of tag
9062 *
9063 * [42] ETag ::= '</' Name S? '>'
9064 *
9065 * With namespace
9066 *
9067 * [NS 9] ETag ::= '</' QName S? '>'
9068 */
9069
9070static void
9071xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009072 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009073 const xmlChar *name;
9074
9075 GROW;
9076 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009077 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009078 return;
9079 }
9080 SKIP(2);
9081
William M. Brack13dfa872004-09-18 04:52:08 +00009082 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009083 if (ctxt->input->cur[tlen] == '>') {
9084 ctxt->input->cur += tlen + 1;
9085 goto done;
9086 }
9087 ctxt->input->cur += tlen;
9088 name = (xmlChar*)1;
9089 } else {
9090 if (prefix == NULL)
9091 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9092 else
9093 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9094 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009095
9096 /*
9097 * We should definitely be at the ending "S? '>'" part
9098 */
9099 GROW;
9100 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009101 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009102 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009103 } else
9104 NEXT1;
9105
9106 /*
9107 * [ WFC: Element Type Match ]
9108 * The Name in an element's end-tag must match the element type in the
9109 * start-tag.
9110 *
9111 */
9112 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009113 if (name == NULL) name = BAD_CAST "unparseable";
9114 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009115 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009116 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009117 }
9118
9119 /*
9120 * SAX: End of Tag
9121 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009122done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009123 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9124 (!ctxt->disableSAX))
9125 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9126
Daniel Veillard0fb18932003-09-07 09:14:37 +00009127 spacePop(ctxt);
9128 if (nsNr != 0)
9129 nsPop(ctxt, nsNr);
9130 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009131}
9132
9133/**
Owen Taylor3473f882001-02-23 17:55:21 +00009134 * xmlParseCDSect:
9135 * @ctxt: an XML parser context
9136 *
9137 * Parse escaped pure raw content.
9138 *
9139 * [18] CDSect ::= CDStart CData CDEnd
9140 *
9141 * [19] CDStart ::= '<![CDATA['
9142 *
9143 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9144 *
9145 * [21] CDEnd ::= ']]>'
9146 */
9147void
9148xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9149 xmlChar *buf = NULL;
9150 int len = 0;
9151 int size = XML_PARSER_BUFFER_SIZE;
9152 int r, rl;
9153 int s, sl;
9154 int cur, l;
9155 int count = 0;
9156
Daniel Veillard8f597c32003-10-06 08:19:27 +00009157 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009158 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009159 SKIP(9);
9160 } else
9161 return;
9162
9163 ctxt->instate = XML_PARSER_CDATA_SECTION;
9164 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009165 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009166 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009167 ctxt->instate = XML_PARSER_CONTENT;
9168 return;
9169 }
9170 NEXTL(rl);
9171 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009172 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009173 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009174 ctxt->instate = XML_PARSER_CONTENT;
9175 return;
9176 }
9177 NEXTL(sl);
9178 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009179 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009180 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009181 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009182 return;
9183 }
William M. Brack871611b2003-10-18 04:53:14 +00009184 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009185 ((r != ']') || (s != ']') || (cur != '>'))) {
9186 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009187 xmlChar *tmp;
9188
Owen Taylor3473f882001-02-23 17:55:21 +00009189 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009190 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9191 if (tmp == NULL) {
9192 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009193 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009194 return;
9195 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009196 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009197 }
9198 COPY_BUF(rl,buf,len,r);
9199 r = s;
9200 rl = sl;
9201 s = cur;
9202 sl = l;
9203 count++;
9204 if (count > 50) {
9205 GROW;
9206 count = 0;
9207 }
9208 NEXTL(l);
9209 cur = CUR_CHAR(l);
9210 }
9211 buf[len] = 0;
9212 ctxt->instate = XML_PARSER_CONTENT;
9213 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009214 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009215 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009216 xmlFree(buf);
9217 return;
9218 }
9219 NEXTL(l);
9220
9221 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009222 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009223 */
9224 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9225 if (ctxt->sax->cdataBlock != NULL)
9226 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009227 else if (ctxt->sax->characters != NULL)
9228 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009229 }
9230 xmlFree(buf);
9231}
9232
9233/**
9234 * xmlParseContent:
9235 * @ctxt: an XML parser context
9236 *
9237 * Parse a content:
9238 *
9239 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9240 */
9241
9242void
9243xmlParseContent(xmlParserCtxtPtr ctxt) {
9244 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009245 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009246 ((RAW != '<') || (NXT(1) != '/')) &&
9247 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009248 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009249 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009250 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009251
9252 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009253 * First case : a Processing Instruction.
9254 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009255 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009256 xmlParsePI(ctxt);
9257 }
9258
9259 /*
9260 * Second case : a CDSection
9261 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009262 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009263 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009264 xmlParseCDSect(ctxt);
9265 }
9266
9267 /*
9268 * Third case : a comment
9269 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009270 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009271 (NXT(2) == '-') && (NXT(3) == '-')) {
9272 xmlParseComment(ctxt);
9273 ctxt->instate = XML_PARSER_CONTENT;
9274 }
9275
9276 /*
9277 * Fourth case : a sub-element.
9278 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009279 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009280 xmlParseElement(ctxt);
9281 }
9282
9283 /*
9284 * Fifth case : a reference. If if has not been resolved,
9285 * parsing returns it's Name, create the node
9286 */
9287
Daniel Veillard21a0f912001-02-25 19:54:14 +00009288 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009289 xmlParseReference(ctxt);
9290 }
9291
9292 /*
9293 * Last case, text. Note that References are handled directly.
9294 */
9295 else {
9296 xmlParseCharData(ctxt, 0);
9297 }
9298
9299 GROW;
9300 /*
9301 * Pop-up of finished entities.
9302 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009303 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009304 xmlPopInput(ctxt);
9305 SHRINK;
9306
Daniel Veillardfdc91562002-07-01 21:52:03 +00009307 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009308 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9309 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009310 ctxt->instate = XML_PARSER_EOF;
9311 break;
9312 }
9313 }
9314}
9315
9316/**
9317 * xmlParseElement:
9318 * @ctxt: an XML parser context
9319 *
9320 * parse an XML element, this is highly recursive
9321 *
9322 * [39] element ::= EmptyElemTag | STag content ETag
9323 *
9324 * [ WFC: Element Type Match ]
9325 * The Name in an element's end-tag must match the element type in the
9326 * start-tag.
9327 *
Owen Taylor3473f882001-02-23 17:55:21 +00009328 */
9329
9330void
9331xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009332 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009333 const xmlChar *prefix;
9334 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009335 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009336 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009337 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009338 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009339
Daniel Veillard8915c152008-08-26 13:05:34 +00009340 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9341 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9342 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9343 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9344 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009345 ctxt->instate = XML_PARSER_EOF;
9346 return;
9347 }
9348
Owen Taylor3473f882001-02-23 17:55:21 +00009349 /* Capture start position */
9350 if (ctxt->record_info) {
9351 node_info.begin_pos = ctxt->input->consumed +
9352 (CUR_PTR - ctxt->input->base);
9353 node_info.begin_line = ctxt->input->line;
9354 }
9355
9356 if (ctxt->spaceNr == 0)
9357 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009358 else if (*ctxt->space == -2)
9359 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009360 else
9361 spacePush(ctxt, *ctxt->space);
9362
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009363 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009364#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009365 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009366#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009367 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009368#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009369 else
9370 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009371#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009372 if (name == NULL) {
9373 spacePop(ctxt);
9374 return;
9375 }
9376 namePush(ctxt, name);
9377 ret = ctxt->node;
9378
Daniel Veillard4432df22003-09-28 18:58:27 +00009379#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009380 /*
9381 * [ VC: Root Element Type ]
9382 * The Name in the document type declaration must match the element
9383 * type of the root element.
9384 */
9385 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9386 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9387 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009388#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009389
9390 /*
9391 * Check for an Empty Element.
9392 */
9393 if ((RAW == '/') && (NXT(1) == '>')) {
9394 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009395 if (ctxt->sax2) {
9396 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9397 (!ctxt->disableSAX))
9398 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009399#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009400 } else {
9401 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9402 (!ctxt->disableSAX))
9403 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009404#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009405 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009406 namePop(ctxt);
9407 spacePop(ctxt);
9408 if (nsNr != ctxt->nsNr)
9409 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009410 if ( ret != NULL && ctxt->record_info ) {
9411 node_info.end_pos = ctxt->input->consumed +
9412 (CUR_PTR - ctxt->input->base);
9413 node_info.end_line = ctxt->input->line;
9414 node_info.node = ret;
9415 xmlParserAddNodeInfo(ctxt, &node_info);
9416 }
9417 return;
9418 }
9419 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009420 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009421 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009422 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9423 "Couldn't find end of Start Tag %s line %d\n",
9424 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009425
9426 /*
9427 * end of parsing of this node.
9428 */
9429 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009430 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009431 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009432 if (nsNr != ctxt->nsNr)
9433 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009434
9435 /*
9436 * Capture end position and add node
9437 */
9438 if ( ret != NULL && ctxt->record_info ) {
9439 node_info.end_pos = ctxt->input->consumed +
9440 (CUR_PTR - ctxt->input->base);
9441 node_info.end_line = ctxt->input->line;
9442 node_info.node = ret;
9443 xmlParserAddNodeInfo(ctxt, &node_info);
9444 }
9445 return;
9446 }
9447
9448 /*
9449 * Parse the content of the element:
9450 */
9451 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009452 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009453 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009454 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009455 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009456
9457 /*
9458 * end of parsing of this node.
9459 */
9460 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009461 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009462 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009463 if (nsNr != ctxt->nsNr)
9464 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009465 return;
9466 }
9467
9468 /*
9469 * parse the end of tag: '</' should be here.
9470 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009471 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009472 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009473 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009474 }
9475#ifdef LIBXML_SAX1_ENABLED
9476 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009477 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009478#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009479
9480 /*
9481 * Capture end position and add node
9482 */
9483 if ( ret != NULL && ctxt->record_info ) {
9484 node_info.end_pos = ctxt->input->consumed +
9485 (CUR_PTR - ctxt->input->base);
9486 node_info.end_line = ctxt->input->line;
9487 node_info.node = ret;
9488 xmlParserAddNodeInfo(ctxt, &node_info);
9489 }
9490}
9491
9492/**
9493 * xmlParseVersionNum:
9494 * @ctxt: an XML parser context
9495 *
9496 * parse the XML version value.
9497 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009498 * [26] VersionNum ::= '1.' [0-9]+
9499 *
9500 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009501 *
9502 * Returns the string giving the XML version number, or NULL
9503 */
9504xmlChar *
9505xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9506 xmlChar *buf = NULL;
9507 int len = 0;
9508 int size = 10;
9509 xmlChar cur;
9510
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009511 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009512 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009513 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009514 return(NULL);
9515 }
9516 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009517 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009518 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009519 return(NULL);
9520 }
9521 buf[len++] = cur;
9522 NEXT;
9523 cur=CUR;
9524 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009525 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009526 return(NULL);
9527 }
9528 buf[len++] = cur;
9529 NEXT;
9530 cur=CUR;
9531 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009532 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009533 xmlChar *tmp;
9534
Owen Taylor3473f882001-02-23 17:55:21 +00009535 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009536 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9537 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009538 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009539 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009540 return(NULL);
9541 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009542 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009543 }
9544 buf[len++] = cur;
9545 NEXT;
9546 cur=CUR;
9547 }
9548 buf[len] = 0;
9549 return(buf);
9550}
9551
9552/**
9553 * xmlParseVersionInfo:
9554 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009555 *
Owen Taylor3473f882001-02-23 17:55:21 +00009556 * parse the XML version.
9557 *
9558 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009559 *
Owen Taylor3473f882001-02-23 17:55:21 +00009560 * [25] Eq ::= S? '=' S?
9561 *
9562 * Returns the version string, e.g. "1.0"
9563 */
9564
9565xmlChar *
9566xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9567 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009568
Daniel Veillarda07050d2003-10-19 14:46:32 +00009569 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009570 SKIP(7);
9571 SKIP_BLANKS;
9572 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009573 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009574 return(NULL);
9575 }
9576 NEXT;
9577 SKIP_BLANKS;
9578 if (RAW == '"') {
9579 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009580 version = xmlParseVersionNum(ctxt);
9581 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009582 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009583 } else
9584 NEXT;
9585 } else if (RAW == '\''){
9586 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009587 version = xmlParseVersionNum(ctxt);
9588 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009589 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009590 } else
9591 NEXT;
9592 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009593 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009594 }
9595 }
9596 return(version);
9597}
9598
9599/**
9600 * xmlParseEncName:
9601 * @ctxt: an XML parser context
9602 *
9603 * parse the XML encoding name
9604 *
9605 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9606 *
9607 * Returns the encoding name value or NULL
9608 */
9609xmlChar *
9610xmlParseEncName(xmlParserCtxtPtr ctxt) {
9611 xmlChar *buf = NULL;
9612 int len = 0;
9613 int size = 10;
9614 xmlChar cur;
9615
9616 cur = CUR;
9617 if (((cur >= 'a') && (cur <= 'z')) ||
9618 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009619 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009620 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009621 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009622 return(NULL);
9623 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009624
Owen Taylor3473f882001-02-23 17:55:21 +00009625 buf[len++] = cur;
9626 NEXT;
9627 cur = CUR;
9628 while (((cur >= 'a') && (cur <= 'z')) ||
9629 ((cur >= 'A') && (cur <= 'Z')) ||
9630 ((cur >= '0') && (cur <= '9')) ||
9631 (cur == '.') || (cur == '_') ||
9632 (cur == '-')) {
9633 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009634 xmlChar *tmp;
9635
Owen Taylor3473f882001-02-23 17:55:21 +00009636 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009637 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9638 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009639 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009640 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009641 return(NULL);
9642 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009643 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009644 }
9645 buf[len++] = cur;
9646 NEXT;
9647 cur = CUR;
9648 if (cur == 0) {
9649 SHRINK;
9650 GROW;
9651 cur = CUR;
9652 }
9653 }
9654 buf[len] = 0;
9655 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009656 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009657 }
9658 return(buf);
9659}
9660
9661/**
9662 * xmlParseEncodingDecl:
9663 * @ctxt: an XML parser context
9664 *
9665 * parse the XML encoding declaration
9666 *
9667 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9668 *
9669 * this setups the conversion filters.
9670 *
9671 * Returns the encoding value or NULL
9672 */
9673
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009674const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009675xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9676 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009677
9678 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009679 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009680 SKIP(8);
9681 SKIP_BLANKS;
9682 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009683 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009684 return(NULL);
9685 }
9686 NEXT;
9687 SKIP_BLANKS;
9688 if (RAW == '"') {
9689 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009690 encoding = xmlParseEncName(ctxt);
9691 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009692 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009693 } else
9694 NEXT;
9695 } else if (RAW == '\''){
9696 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009697 encoding = xmlParseEncName(ctxt);
9698 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009699 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009700 } else
9701 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009702 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009703 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009704 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009705 /*
9706 * UTF-16 encoding stwich has already taken place at this stage,
9707 * more over the little-endian/big-endian selection is already done
9708 */
9709 if ((encoding != NULL) &&
9710 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9711 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009712 /*
9713 * If no encoding was passed to the parser, that we are
9714 * using UTF-16 and no decoder is present i.e. the
9715 * document is apparently UTF-8 compatible, then raise an
9716 * encoding mismatch fatal error
9717 */
9718 if ((ctxt->encoding == NULL) &&
9719 (ctxt->input->buf != NULL) &&
9720 (ctxt->input->buf->encoder == NULL)) {
9721 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9722 "Document labelled UTF-16 but has UTF-8 content\n");
9723 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009724 if (ctxt->encoding != NULL)
9725 xmlFree((xmlChar *) ctxt->encoding);
9726 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009727 }
9728 /*
9729 * UTF-8 encoding is handled natively
9730 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009731 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009732 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9733 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009734 if (ctxt->encoding != NULL)
9735 xmlFree((xmlChar *) ctxt->encoding);
9736 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009737 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009738 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009739 xmlCharEncodingHandlerPtr handler;
9740
9741 if (ctxt->input->encoding != NULL)
9742 xmlFree((xmlChar *) ctxt->input->encoding);
9743 ctxt->input->encoding = encoding;
9744
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009745 handler = xmlFindCharEncodingHandler((const char *) encoding);
9746 if (handler != NULL) {
9747 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009748 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009749 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009750 "Unsupported encoding %s\n", encoding);
9751 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009752 }
9753 }
9754 }
9755 return(encoding);
9756}
9757
9758/**
9759 * xmlParseSDDecl:
9760 * @ctxt: an XML parser context
9761 *
9762 * parse the XML standalone declaration
9763 *
9764 * [32] SDDecl ::= S 'standalone' Eq
9765 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9766 *
9767 * [ VC: Standalone Document Declaration ]
9768 * TODO The standalone document declaration must have the value "no"
9769 * if any external markup declarations contain declarations of:
9770 * - attributes with default values, if elements to which these
9771 * attributes apply appear in the document without specifications
9772 * of values for these attributes, or
9773 * - entities (other than amp, lt, gt, apos, quot), if references
9774 * to those entities appear in the document, or
9775 * - attributes with values subject to normalization, where the
9776 * attribute appears in the document with a value which will change
9777 * as a result of normalization, or
9778 * - element types with element content, if white space occurs directly
9779 * within any instance of those types.
9780 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009781 * Returns:
9782 * 1 if standalone="yes"
9783 * 0 if standalone="no"
9784 * -2 if standalone attribute is missing or invalid
9785 * (A standalone value of -2 means that the XML declaration was found,
9786 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009787 */
9788
9789int
9790xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009791 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009792
9793 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009794 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009795 SKIP(10);
9796 SKIP_BLANKS;
9797 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009798 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009799 return(standalone);
9800 }
9801 NEXT;
9802 SKIP_BLANKS;
9803 if (RAW == '\''){
9804 NEXT;
9805 if ((RAW == 'n') && (NXT(1) == 'o')) {
9806 standalone = 0;
9807 SKIP(2);
9808 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9809 (NXT(2) == 's')) {
9810 standalone = 1;
9811 SKIP(3);
9812 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009813 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009814 }
9815 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009816 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009817 } else
9818 NEXT;
9819 } else if (RAW == '"'){
9820 NEXT;
9821 if ((RAW == 'n') && (NXT(1) == 'o')) {
9822 standalone = 0;
9823 SKIP(2);
9824 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9825 (NXT(2) == 's')) {
9826 standalone = 1;
9827 SKIP(3);
9828 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009829 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009830 }
9831 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009832 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009833 } else
9834 NEXT;
9835 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009836 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009837 }
9838 }
9839 return(standalone);
9840}
9841
9842/**
9843 * xmlParseXMLDecl:
9844 * @ctxt: an XML parser context
9845 *
9846 * parse an XML declaration header
9847 *
9848 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9849 */
9850
9851void
9852xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9853 xmlChar *version;
9854
9855 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009856 * This value for standalone indicates that the document has an
9857 * XML declaration but it does not have a standalone attribute.
9858 * It will be overwritten later if a standalone attribute is found.
9859 */
9860 ctxt->input->standalone = -2;
9861
9862 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009863 * We know that '<?xml' is here.
9864 */
9865 SKIP(5);
9866
William M. Brack76e95df2003-10-18 16:20:14 +00009867 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009868 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9869 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009870 }
9871 SKIP_BLANKS;
9872
9873 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009874 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009875 */
9876 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009877 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009878 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009879 } else {
9880 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9881 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009882 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009883 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009884 if (ctxt->options & XML_PARSE_OLD10) {
9885 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9886 "Unsupported version '%s'\n",
9887 version);
9888 } else {
9889 if ((version[0] == '1') && ((version[1] == '.'))) {
9890 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9891 "Unsupported version '%s'\n",
9892 version, NULL);
9893 } else {
9894 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9895 "Unsupported version '%s'\n",
9896 version);
9897 }
9898 }
Daniel Veillard19840942001-11-29 16:11:38 +00009899 }
9900 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009901 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009902 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009903 }
Owen Taylor3473f882001-02-23 17:55:21 +00009904
9905 /*
9906 * We may have the encoding declaration
9907 */
William M. Brack76e95df2003-10-18 16:20:14 +00009908 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009909 if ((RAW == '?') && (NXT(1) == '>')) {
9910 SKIP(2);
9911 return;
9912 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009913 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009914 }
9915 xmlParseEncodingDecl(ctxt);
9916 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9917 /*
9918 * The XML REC instructs us to stop parsing right here
9919 */
9920 return;
9921 }
9922
9923 /*
9924 * We may have the standalone status.
9925 */
William M. Brack76e95df2003-10-18 16:20:14 +00009926 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009927 if ((RAW == '?') && (NXT(1) == '>')) {
9928 SKIP(2);
9929 return;
9930 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009931 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009932 }
9933 SKIP_BLANKS;
9934 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9935
9936 SKIP_BLANKS;
9937 if ((RAW == '?') && (NXT(1) == '>')) {
9938 SKIP(2);
9939 } else if (RAW == '>') {
9940 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009941 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009942 NEXT;
9943 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009944 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009945 MOVETO_ENDTAG(CUR_PTR);
9946 NEXT;
9947 }
9948}
9949
9950/**
9951 * xmlParseMisc:
9952 * @ctxt: an XML parser context
9953 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009954 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009955 *
9956 * [27] Misc ::= Comment | PI | S
9957 */
9958
9959void
9960xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009961 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009962 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009963 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009964 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009965 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009966 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009967 NEXT;
9968 } else
9969 xmlParseComment(ctxt);
9970 }
9971}
9972
9973/**
9974 * xmlParseDocument:
9975 * @ctxt: an XML parser context
9976 *
9977 * parse an XML document (and build a tree if using the standard SAX
9978 * interface).
9979 *
9980 * [1] document ::= prolog element Misc*
9981 *
9982 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9983 *
9984 * Returns 0, -1 in case of error. the parser context is augmented
9985 * as a result of the parsing.
9986 */
9987
9988int
9989xmlParseDocument(xmlParserCtxtPtr ctxt) {
9990 xmlChar start[4];
9991 xmlCharEncoding enc;
9992
9993 xmlInitParser();
9994
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009995 if ((ctxt == NULL) || (ctxt->input == NULL))
9996 return(-1);
9997
Owen Taylor3473f882001-02-23 17:55:21 +00009998 GROW;
9999
10000 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010001 * SAX: detecting the level.
10002 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010003 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010004
10005 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010006 * SAX: beginning of the document processing.
10007 */
10008 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10009 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10010
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010011 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10012 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010013 /*
10014 * Get the 4 first bytes and decode the charset
10015 * if enc != XML_CHAR_ENCODING_NONE
10016 * plug some encoding conversion routines.
10017 */
10018 start[0] = RAW;
10019 start[1] = NXT(1);
10020 start[2] = NXT(2);
10021 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010022 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010023 if (enc != XML_CHAR_ENCODING_NONE) {
10024 xmlSwitchEncoding(ctxt, enc);
10025 }
Owen Taylor3473f882001-02-23 17:55:21 +000010026 }
10027
10028
10029 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010030 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010031 }
10032
10033 /*
10034 * Check for the XMLDecl in the Prolog.
10035 */
10036 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010037 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010038
10039 /*
10040 * Note that we will switch encoding on the fly.
10041 */
10042 xmlParseXMLDecl(ctxt);
10043 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10044 /*
10045 * The XML REC instructs us to stop parsing right here
10046 */
10047 return(-1);
10048 }
10049 ctxt->standalone = ctxt->input->standalone;
10050 SKIP_BLANKS;
10051 } else {
10052 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10053 }
10054 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10055 ctxt->sax->startDocument(ctxt->userData);
10056
10057 /*
10058 * The Misc part of the Prolog
10059 */
10060 GROW;
10061 xmlParseMisc(ctxt);
10062
10063 /*
10064 * Then possibly doc type declaration(s) and more Misc
10065 * (doctypedecl Misc*)?
10066 */
10067 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010068 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010069
10070 ctxt->inSubset = 1;
10071 xmlParseDocTypeDecl(ctxt);
10072 if (RAW == '[') {
10073 ctxt->instate = XML_PARSER_DTD;
10074 xmlParseInternalSubset(ctxt);
10075 }
10076
10077 /*
10078 * Create and update the external subset.
10079 */
10080 ctxt->inSubset = 2;
10081 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10082 (!ctxt->disableSAX))
10083 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10084 ctxt->extSubSystem, ctxt->extSubURI);
10085 ctxt->inSubset = 0;
10086
Daniel Veillardac4118d2008-01-11 05:27:32 +000010087 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010088
10089 ctxt->instate = XML_PARSER_PROLOG;
10090 xmlParseMisc(ctxt);
10091 }
10092
10093 /*
10094 * Time to start parsing the tree itself
10095 */
10096 GROW;
10097 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010098 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10099 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010100 } else {
10101 ctxt->instate = XML_PARSER_CONTENT;
10102 xmlParseElement(ctxt);
10103 ctxt->instate = XML_PARSER_EPILOG;
10104
10105
10106 /*
10107 * The Misc part at the end
10108 */
10109 xmlParseMisc(ctxt);
10110
Daniel Veillard561b7f82002-03-20 21:55:57 +000010111 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010112 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010113 }
10114 ctxt->instate = XML_PARSER_EOF;
10115 }
10116
10117 /*
10118 * SAX: end of the document processing.
10119 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010120 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010121 ctxt->sax->endDocument(ctxt->userData);
10122
Daniel Veillard5997aca2002-03-18 18:36:20 +000010123 /*
10124 * Remove locally kept entity definitions if the tree was not built
10125 */
10126 if ((ctxt->myDoc != NULL) &&
10127 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10128 xmlFreeDoc(ctxt->myDoc);
10129 ctxt->myDoc = NULL;
10130 }
10131
Daniel Veillardae0765b2008-07-31 19:54:59 +000010132 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10133 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10134 if (ctxt->valid)
10135 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10136 if (ctxt->nsWellFormed)
10137 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10138 if (ctxt->options & XML_PARSE_OLD10)
10139 ctxt->myDoc->properties |= XML_DOC_OLD10;
10140 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010141 if (! ctxt->wellFormed) {
10142 ctxt->valid = 0;
10143 return(-1);
10144 }
Owen Taylor3473f882001-02-23 17:55:21 +000010145 return(0);
10146}
10147
10148/**
10149 * xmlParseExtParsedEnt:
10150 * @ctxt: an XML parser context
10151 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010152 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010153 * An external general parsed entity is well-formed if it matches the
10154 * production labeled extParsedEnt.
10155 *
10156 * [78] extParsedEnt ::= TextDecl? content
10157 *
10158 * Returns 0, -1 in case of error. the parser context is augmented
10159 * as a result of the parsing.
10160 */
10161
10162int
10163xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10164 xmlChar start[4];
10165 xmlCharEncoding enc;
10166
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010167 if ((ctxt == NULL) || (ctxt->input == NULL))
10168 return(-1);
10169
Owen Taylor3473f882001-02-23 17:55:21 +000010170 xmlDefaultSAXHandlerInit();
10171
Daniel Veillard309f81d2003-09-23 09:02:53 +000010172 xmlDetectSAX2(ctxt);
10173
Owen Taylor3473f882001-02-23 17:55:21 +000010174 GROW;
10175
10176 /*
10177 * SAX: beginning of the document processing.
10178 */
10179 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10180 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10181
10182 /*
10183 * Get the 4 first bytes and decode the charset
10184 * if enc != XML_CHAR_ENCODING_NONE
10185 * plug some encoding conversion routines.
10186 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010187 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10188 start[0] = RAW;
10189 start[1] = NXT(1);
10190 start[2] = NXT(2);
10191 start[3] = NXT(3);
10192 enc = xmlDetectCharEncoding(start, 4);
10193 if (enc != XML_CHAR_ENCODING_NONE) {
10194 xmlSwitchEncoding(ctxt, enc);
10195 }
Owen Taylor3473f882001-02-23 17:55:21 +000010196 }
10197
10198
10199 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010200 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010201 }
10202
10203 /*
10204 * Check for the XMLDecl in the Prolog.
10205 */
10206 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010207 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010208
10209 /*
10210 * Note that we will switch encoding on the fly.
10211 */
10212 xmlParseXMLDecl(ctxt);
10213 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10214 /*
10215 * The XML REC instructs us to stop parsing right here
10216 */
10217 return(-1);
10218 }
10219 SKIP_BLANKS;
10220 } else {
10221 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10222 }
10223 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10224 ctxt->sax->startDocument(ctxt->userData);
10225
10226 /*
10227 * Doing validity checking on chunk doesn't make sense
10228 */
10229 ctxt->instate = XML_PARSER_CONTENT;
10230 ctxt->validate = 0;
10231 ctxt->loadsubset = 0;
10232 ctxt->depth = 0;
10233
10234 xmlParseContent(ctxt);
10235
10236 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010237 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010238 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010239 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010240 }
10241
10242 /*
10243 * SAX: end of the document processing.
10244 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010245 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010246 ctxt->sax->endDocument(ctxt->userData);
10247
10248 if (! ctxt->wellFormed) return(-1);
10249 return(0);
10250}
10251
Daniel Veillard73b013f2003-09-30 12:36:01 +000010252#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010253/************************************************************************
10254 * *
10255 * Progressive parsing interfaces *
10256 * *
10257 ************************************************************************/
10258
10259/**
10260 * xmlParseLookupSequence:
10261 * @ctxt: an XML parser context
10262 * @first: the first char to lookup
10263 * @next: the next char to lookup or zero
10264 * @third: the next char to lookup or zero
10265 *
10266 * Try to find if a sequence (first, next, third) or just (first next) or
10267 * (first) is available in the input stream.
10268 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10269 * to avoid rescanning sequences of bytes, it DOES change the state of the
10270 * parser, do not use liberally.
10271 *
10272 * Returns the index to the current parsing point if the full sequence
10273 * is available, -1 otherwise.
10274 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010275static int
Owen Taylor3473f882001-02-23 17:55:21 +000010276xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10277 xmlChar next, xmlChar third) {
10278 int base, len;
10279 xmlParserInputPtr in;
10280 const xmlChar *buf;
10281
10282 in = ctxt->input;
10283 if (in == NULL) return(-1);
10284 base = in->cur - in->base;
10285 if (base < 0) return(-1);
10286 if (ctxt->checkIndex > base)
10287 base = ctxt->checkIndex;
10288 if (in->buf == NULL) {
10289 buf = in->base;
10290 len = in->length;
10291 } else {
10292 buf = in->buf->buffer->content;
10293 len = in->buf->buffer->use;
10294 }
10295 /* take into account the sequence length */
10296 if (third) len -= 2;
10297 else if (next) len --;
10298 for (;base < len;base++) {
10299 if (buf[base] == first) {
10300 if (third != 0) {
10301 if ((buf[base + 1] != next) ||
10302 (buf[base + 2] != third)) continue;
10303 } else if (next != 0) {
10304 if (buf[base + 1] != next) continue;
10305 }
10306 ctxt->checkIndex = 0;
10307#ifdef DEBUG_PUSH
10308 if (next == 0)
10309 xmlGenericError(xmlGenericErrorContext,
10310 "PP: lookup '%c' found at %d\n",
10311 first, base);
10312 else if (third == 0)
10313 xmlGenericError(xmlGenericErrorContext,
10314 "PP: lookup '%c%c' found at %d\n",
10315 first, next, base);
10316 else
10317 xmlGenericError(xmlGenericErrorContext,
10318 "PP: lookup '%c%c%c' found at %d\n",
10319 first, next, third, base);
10320#endif
10321 return(base - (in->cur - in->base));
10322 }
10323 }
10324 ctxt->checkIndex = base;
10325#ifdef DEBUG_PUSH
10326 if (next == 0)
10327 xmlGenericError(xmlGenericErrorContext,
10328 "PP: lookup '%c' failed\n", first);
10329 else if (third == 0)
10330 xmlGenericError(xmlGenericErrorContext,
10331 "PP: lookup '%c%c' failed\n", first, next);
10332 else
10333 xmlGenericError(xmlGenericErrorContext,
10334 "PP: lookup '%c%c%c' failed\n", first, next, third);
10335#endif
10336 return(-1);
10337}
10338
10339/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010340 * xmlParseGetLasts:
10341 * @ctxt: an XML parser context
10342 * @lastlt: pointer to store the last '<' from the input
10343 * @lastgt: pointer to store the last '>' from the input
10344 *
10345 * Lookup the last < and > in the current chunk
10346 */
10347static void
10348xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10349 const xmlChar **lastgt) {
10350 const xmlChar *tmp;
10351
10352 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10353 xmlGenericError(xmlGenericErrorContext,
10354 "Internal error: xmlParseGetLasts\n");
10355 return;
10356 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010357 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010358 tmp = ctxt->input->end;
10359 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010360 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010361 if (tmp < ctxt->input->base) {
10362 *lastlt = NULL;
10363 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010364 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010365 *lastlt = tmp;
10366 tmp++;
10367 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10368 if (*tmp == '\'') {
10369 tmp++;
10370 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10371 if (tmp < ctxt->input->end) tmp++;
10372 } else if (*tmp == '"') {
10373 tmp++;
10374 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10375 if (tmp < ctxt->input->end) tmp++;
10376 } else
10377 tmp++;
10378 }
10379 if (tmp < ctxt->input->end)
10380 *lastgt = tmp;
10381 else {
10382 tmp = *lastlt;
10383 tmp--;
10384 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10385 if (tmp >= ctxt->input->base)
10386 *lastgt = tmp;
10387 else
10388 *lastgt = NULL;
10389 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010390 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010391 } else {
10392 *lastlt = NULL;
10393 *lastgt = NULL;
10394 }
10395}
10396/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010397 * xmlCheckCdataPush:
10398 * @cur: pointer to the bock of characters
10399 * @len: length of the block in bytes
10400 *
10401 * Check that the block of characters is okay as SCdata content [20]
10402 *
10403 * Returns the number of bytes to pass if okay, a negative index where an
10404 * UTF-8 error occured otherwise
10405 */
10406static int
10407xmlCheckCdataPush(const xmlChar *utf, int len) {
10408 int ix;
10409 unsigned char c;
10410 int codepoint;
10411
10412 if ((utf == NULL) || (len <= 0))
10413 return(0);
10414
10415 for (ix = 0; ix < len;) { /* string is 0-terminated */
10416 c = utf[ix];
10417 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10418 if (c >= 0x20)
10419 ix++;
10420 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10421 ix++;
10422 else
10423 return(-ix);
10424 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10425 if (ix + 2 > len) return(ix);
10426 if ((utf[ix+1] & 0xc0 ) != 0x80)
10427 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010428 codepoint = (utf[ix] & 0x1f) << 6;
10429 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010430 if (!xmlIsCharQ(codepoint))
10431 return(-ix);
10432 ix += 2;
10433 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10434 if (ix + 3 > len) return(ix);
10435 if (((utf[ix+1] & 0xc0) != 0x80) ||
10436 ((utf[ix+2] & 0xc0) != 0x80))
10437 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010438 codepoint = (utf[ix] & 0xf) << 12;
10439 codepoint |= (utf[ix+1] & 0x3f) << 6;
10440 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010441 if (!xmlIsCharQ(codepoint))
10442 return(-ix);
10443 ix += 3;
10444 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10445 if (ix + 4 > len) return(ix);
10446 if (((utf[ix+1] & 0xc0) != 0x80) ||
10447 ((utf[ix+2] & 0xc0) != 0x80) ||
10448 ((utf[ix+3] & 0xc0) != 0x80))
10449 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010450 codepoint = (utf[ix] & 0x7) << 18;
10451 codepoint |= (utf[ix+1] & 0x3f) << 12;
10452 codepoint |= (utf[ix+2] & 0x3f) << 6;
10453 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010454 if (!xmlIsCharQ(codepoint))
10455 return(-ix);
10456 ix += 4;
10457 } else /* unknown encoding */
10458 return(-ix);
10459 }
10460 return(ix);
10461}
10462
10463/**
Owen Taylor3473f882001-02-23 17:55:21 +000010464 * xmlParseTryOrFinish:
10465 * @ctxt: an XML parser context
10466 * @terminate: last chunk indicator
10467 *
10468 * Try to progress on parsing
10469 *
10470 * Returns zero if no parsing was possible
10471 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010472static int
Owen Taylor3473f882001-02-23 17:55:21 +000010473xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10474 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010475 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010476 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010477 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010478
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010479 if (ctxt->input == NULL)
10480 return(0);
10481
Owen Taylor3473f882001-02-23 17:55:21 +000010482#ifdef DEBUG_PUSH
10483 switch (ctxt->instate) {
10484 case XML_PARSER_EOF:
10485 xmlGenericError(xmlGenericErrorContext,
10486 "PP: try EOF\n"); break;
10487 case XML_PARSER_START:
10488 xmlGenericError(xmlGenericErrorContext,
10489 "PP: try START\n"); break;
10490 case XML_PARSER_MISC:
10491 xmlGenericError(xmlGenericErrorContext,
10492 "PP: try MISC\n");break;
10493 case XML_PARSER_COMMENT:
10494 xmlGenericError(xmlGenericErrorContext,
10495 "PP: try COMMENT\n");break;
10496 case XML_PARSER_PROLOG:
10497 xmlGenericError(xmlGenericErrorContext,
10498 "PP: try PROLOG\n");break;
10499 case XML_PARSER_START_TAG:
10500 xmlGenericError(xmlGenericErrorContext,
10501 "PP: try START_TAG\n");break;
10502 case XML_PARSER_CONTENT:
10503 xmlGenericError(xmlGenericErrorContext,
10504 "PP: try CONTENT\n");break;
10505 case XML_PARSER_CDATA_SECTION:
10506 xmlGenericError(xmlGenericErrorContext,
10507 "PP: try CDATA_SECTION\n");break;
10508 case XML_PARSER_END_TAG:
10509 xmlGenericError(xmlGenericErrorContext,
10510 "PP: try END_TAG\n");break;
10511 case XML_PARSER_ENTITY_DECL:
10512 xmlGenericError(xmlGenericErrorContext,
10513 "PP: try ENTITY_DECL\n");break;
10514 case XML_PARSER_ENTITY_VALUE:
10515 xmlGenericError(xmlGenericErrorContext,
10516 "PP: try ENTITY_VALUE\n");break;
10517 case XML_PARSER_ATTRIBUTE_VALUE:
10518 xmlGenericError(xmlGenericErrorContext,
10519 "PP: try ATTRIBUTE_VALUE\n");break;
10520 case XML_PARSER_DTD:
10521 xmlGenericError(xmlGenericErrorContext,
10522 "PP: try DTD\n");break;
10523 case XML_PARSER_EPILOG:
10524 xmlGenericError(xmlGenericErrorContext,
10525 "PP: try EPILOG\n");break;
10526 case XML_PARSER_PI:
10527 xmlGenericError(xmlGenericErrorContext,
10528 "PP: try PI\n");break;
10529 case XML_PARSER_IGNORE:
10530 xmlGenericError(xmlGenericErrorContext,
10531 "PP: try IGNORE\n");break;
10532 }
10533#endif
10534
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010535 if ((ctxt->input != NULL) &&
10536 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010537 xmlSHRINK(ctxt);
10538 ctxt->checkIndex = 0;
10539 }
10540 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010541
Daniel Veillarda880b122003-04-21 21:36:41 +000010542 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010543 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010544 return(0);
10545
10546
Owen Taylor3473f882001-02-23 17:55:21 +000010547 /*
10548 * Pop-up of finished entities.
10549 */
10550 while ((RAW == 0) && (ctxt->inputNr > 1))
10551 xmlPopInput(ctxt);
10552
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010553 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010554 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010555 avail = ctxt->input->length -
10556 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010557 else {
10558 /*
10559 * If we are operating on converted input, try to flush
10560 * remainng chars to avoid them stalling in the non-converted
10561 * buffer.
10562 */
10563 if ((ctxt->input->buf->raw != NULL) &&
10564 (ctxt->input->buf->raw->use > 0)) {
10565 int base = ctxt->input->base -
10566 ctxt->input->buf->buffer->content;
10567 int current = ctxt->input->cur - ctxt->input->base;
10568
10569 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10570 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10571 ctxt->input->cur = ctxt->input->base + current;
10572 ctxt->input->end =
10573 &ctxt->input->buf->buffer->content[
10574 ctxt->input->buf->buffer->use];
10575 }
10576 avail = ctxt->input->buf->buffer->use -
10577 (ctxt->input->cur - ctxt->input->base);
10578 }
Owen Taylor3473f882001-02-23 17:55:21 +000010579 if (avail < 1)
10580 goto done;
10581 switch (ctxt->instate) {
10582 case XML_PARSER_EOF:
10583 /*
10584 * Document parsing is done !
10585 */
10586 goto done;
10587 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010588 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10589 xmlChar start[4];
10590 xmlCharEncoding enc;
10591
10592 /*
10593 * Very first chars read from the document flow.
10594 */
10595 if (avail < 4)
10596 goto done;
10597
10598 /*
10599 * Get the 4 first bytes and decode the charset
10600 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010601 * plug some encoding conversion routines,
10602 * else xmlSwitchEncoding will set to (default)
10603 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010604 */
10605 start[0] = RAW;
10606 start[1] = NXT(1);
10607 start[2] = NXT(2);
10608 start[3] = NXT(3);
10609 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010610 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010611 break;
10612 }
Owen Taylor3473f882001-02-23 17:55:21 +000010613
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010614 if (avail < 2)
10615 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010616 cur = ctxt->input->cur[0];
10617 next = ctxt->input->cur[1];
10618 if (cur == 0) {
10619 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10620 ctxt->sax->setDocumentLocator(ctxt->userData,
10621 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010622 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010623 ctxt->instate = XML_PARSER_EOF;
10624#ifdef DEBUG_PUSH
10625 xmlGenericError(xmlGenericErrorContext,
10626 "PP: entering EOF\n");
10627#endif
10628 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10629 ctxt->sax->endDocument(ctxt->userData);
10630 goto done;
10631 }
10632 if ((cur == '<') && (next == '?')) {
10633 /* PI or XML decl */
10634 if (avail < 5) return(ret);
10635 if ((!terminate) &&
10636 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10637 return(ret);
10638 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10639 ctxt->sax->setDocumentLocator(ctxt->userData,
10640 &xmlDefaultSAXLocator);
10641 if ((ctxt->input->cur[2] == 'x') &&
10642 (ctxt->input->cur[3] == 'm') &&
10643 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010644 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010645 ret += 5;
10646#ifdef DEBUG_PUSH
10647 xmlGenericError(xmlGenericErrorContext,
10648 "PP: Parsing XML Decl\n");
10649#endif
10650 xmlParseXMLDecl(ctxt);
10651 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10652 /*
10653 * The XML REC instructs us to stop parsing right
10654 * here
10655 */
10656 ctxt->instate = XML_PARSER_EOF;
10657 return(0);
10658 }
10659 ctxt->standalone = ctxt->input->standalone;
10660 if ((ctxt->encoding == NULL) &&
10661 (ctxt->input->encoding != NULL))
10662 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10663 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10664 (!ctxt->disableSAX))
10665 ctxt->sax->startDocument(ctxt->userData);
10666 ctxt->instate = XML_PARSER_MISC;
10667#ifdef DEBUG_PUSH
10668 xmlGenericError(xmlGenericErrorContext,
10669 "PP: entering MISC\n");
10670#endif
10671 } else {
10672 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10673 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10674 (!ctxt->disableSAX))
10675 ctxt->sax->startDocument(ctxt->userData);
10676 ctxt->instate = XML_PARSER_MISC;
10677#ifdef DEBUG_PUSH
10678 xmlGenericError(xmlGenericErrorContext,
10679 "PP: entering MISC\n");
10680#endif
10681 }
10682 } else {
10683 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10684 ctxt->sax->setDocumentLocator(ctxt->userData,
10685 &xmlDefaultSAXLocator);
10686 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010687 if (ctxt->version == NULL) {
10688 xmlErrMemory(ctxt, NULL);
10689 break;
10690 }
Owen Taylor3473f882001-02-23 17:55:21 +000010691 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10692 (!ctxt->disableSAX))
10693 ctxt->sax->startDocument(ctxt->userData);
10694 ctxt->instate = XML_PARSER_MISC;
10695#ifdef DEBUG_PUSH
10696 xmlGenericError(xmlGenericErrorContext,
10697 "PP: entering MISC\n");
10698#endif
10699 }
10700 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010701 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010702 const xmlChar *name;
10703 const xmlChar *prefix;
10704 const xmlChar *URI;
10705 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010706
10707 if ((avail < 2) && (ctxt->inputNr == 1))
10708 goto done;
10709 cur = ctxt->input->cur[0];
10710 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010711 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010712 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010713 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10714 ctxt->sax->endDocument(ctxt->userData);
10715 goto done;
10716 }
10717 if (!terminate) {
10718 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010719 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010720 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010721 goto done;
10722 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10723 goto done;
10724 }
10725 }
10726 if (ctxt->spaceNr == 0)
10727 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010728 else if (*ctxt->space == -2)
10729 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010730 else
10731 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010732#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010733 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010734#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010735 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010736#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010737 else
10738 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010739#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010740 if (name == NULL) {
10741 spacePop(ctxt);
10742 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010743 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10744 ctxt->sax->endDocument(ctxt->userData);
10745 goto done;
10746 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010747#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010748 /*
10749 * [ VC: Root Element Type ]
10750 * The Name in the document type declaration must match
10751 * the element type of the root element.
10752 */
10753 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10754 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10755 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010756#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010757
10758 /*
10759 * Check for an Empty Element.
10760 */
10761 if ((RAW == '/') && (NXT(1) == '>')) {
10762 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010763
10764 if (ctxt->sax2) {
10765 if ((ctxt->sax != NULL) &&
10766 (ctxt->sax->endElementNs != NULL) &&
10767 (!ctxt->disableSAX))
10768 ctxt->sax->endElementNs(ctxt->userData, name,
10769 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010770 if (ctxt->nsNr - nsNr > 0)
10771 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010772#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010773 } else {
10774 if ((ctxt->sax != NULL) &&
10775 (ctxt->sax->endElement != NULL) &&
10776 (!ctxt->disableSAX))
10777 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010778#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010779 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010780 spacePop(ctxt);
10781 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010782 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010783 } else {
10784 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010785 }
10786 break;
10787 }
10788 if (RAW == '>') {
10789 NEXT;
10790 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010791 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010792 "Couldn't find end of Start Tag %s\n",
10793 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010794 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010795 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010796 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010797 if (ctxt->sax2)
10798 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010799#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010800 else
10801 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010802#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010803
Daniel Veillarda880b122003-04-21 21:36:41 +000010804 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010805 break;
10806 }
10807 case XML_PARSER_CONTENT: {
10808 const xmlChar *test;
10809 unsigned int cons;
10810 if ((avail < 2) && (ctxt->inputNr == 1))
10811 goto done;
10812 cur = ctxt->input->cur[0];
10813 next = ctxt->input->cur[1];
10814
10815 test = CUR_PTR;
10816 cons = ctxt->input->consumed;
10817 if ((cur == '<') && (next == '/')) {
10818 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010819 break;
10820 } else if ((cur == '<') && (next == '?')) {
10821 if ((!terminate) &&
10822 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10823 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010824 xmlParsePI(ctxt);
10825 } else if ((cur == '<') && (next != '!')) {
10826 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010827 break;
10828 } else if ((cur == '<') && (next == '!') &&
10829 (ctxt->input->cur[2] == '-') &&
10830 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010831 int term;
10832
10833 if (avail < 4)
10834 goto done;
10835 ctxt->input->cur += 4;
10836 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10837 ctxt->input->cur -= 4;
10838 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010839 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010840 xmlParseComment(ctxt);
10841 ctxt->instate = XML_PARSER_CONTENT;
10842 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10843 (ctxt->input->cur[2] == '[') &&
10844 (ctxt->input->cur[3] == 'C') &&
10845 (ctxt->input->cur[4] == 'D') &&
10846 (ctxt->input->cur[5] == 'A') &&
10847 (ctxt->input->cur[6] == 'T') &&
10848 (ctxt->input->cur[7] == 'A') &&
10849 (ctxt->input->cur[8] == '[')) {
10850 SKIP(9);
10851 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010852 break;
10853 } else if ((cur == '<') && (next == '!') &&
10854 (avail < 9)) {
10855 goto done;
10856 } else if (cur == '&') {
10857 if ((!terminate) &&
10858 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10859 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010860 xmlParseReference(ctxt);
10861 } else {
10862 /* TODO Avoid the extra copy, handle directly !!! */
10863 /*
10864 * Goal of the following test is:
10865 * - minimize calls to the SAX 'character' callback
10866 * when they are mergeable
10867 * - handle an problem for isBlank when we only parse
10868 * a sequence of blank chars and the next one is
10869 * not available to check against '<' presence.
10870 * - tries to homogenize the differences in SAX
10871 * callbacks between the push and pull versions
10872 * of the parser.
10873 */
10874 if ((ctxt->inputNr == 1) &&
10875 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10876 if (!terminate) {
10877 if (ctxt->progressive) {
10878 if ((lastlt == NULL) ||
10879 (ctxt->input->cur > lastlt))
10880 goto done;
10881 } else if (xmlParseLookupSequence(ctxt,
10882 '<', 0, 0) < 0) {
10883 goto done;
10884 }
10885 }
10886 }
10887 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010888 xmlParseCharData(ctxt, 0);
10889 }
10890 /*
10891 * Pop-up of finished entities.
10892 */
10893 while ((RAW == 0) && (ctxt->inputNr > 1))
10894 xmlPopInput(ctxt);
10895 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010896 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10897 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010898 ctxt->instate = XML_PARSER_EOF;
10899 break;
10900 }
10901 break;
10902 }
10903 case XML_PARSER_END_TAG:
10904 if (avail < 2)
10905 goto done;
10906 if (!terminate) {
10907 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010908 /* > can be found unescaped in attribute values */
10909 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010910 goto done;
10911 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10912 goto done;
10913 }
10914 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010915 if (ctxt->sax2) {
10916 xmlParseEndTag2(ctxt,
10917 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10918 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010919 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010920 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010921 }
10922#ifdef LIBXML_SAX1_ENABLED
10923 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010924 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010925#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010926 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010927 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010928 } else {
10929 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010930 }
10931 break;
10932 case XML_PARSER_CDATA_SECTION: {
10933 /*
10934 * The Push mode need to have the SAX callback for
10935 * cdataBlock merge back contiguous callbacks.
10936 */
10937 int base;
10938
10939 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10940 if (base < 0) {
10941 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010942 int tmp;
10943
10944 tmp = xmlCheckCdataPush(ctxt->input->cur,
10945 XML_PARSER_BIG_BUFFER_SIZE);
10946 if (tmp < 0) {
10947 tmp = -tmp;
10948 ctxt->input->cur += tmp;
10949 goto encoding_error;
10950 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010951 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10952 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010953 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010954 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010955 else if (ctxt->sax->characters != NULL)
10956 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010957 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010958 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010959 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010960 ctxt->checkIndex = 0;
10961 }
10962 goto done;
10963 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010964 int tmp;
10965
10966 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10967 if ((tmp < 0) || (tmp != base)) {
10968 tmp = -tmp;
10969 ctxt->input->cur += tmp;
10970 goto encoding_error;
10971 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010972 if ((ctxt->sax != NULL) && (base == 0) &&
10973 (ctxt->sax->cdataBlock != NULL) &&
10974 (!ctxt->disableSAX)) {
10975 /*
10976 * Special case to provide identical behaviour
10977 * between pull and push parsers on enpty CDATA
10978 * sections
10979 */
10980 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10981 (!strncmp((const char *)&ctxt->input->cur[-9],
10982 "<![CDATA[", 9)))
10983 ctxt->sax->cdataBlock(ctxt->userData,
10984 BAD_CAST "", 0);
10985 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010986 (!ctxt->disableSAX)) {
10987 if (ctxt->sax->cdataBlock != NULL)
10988 ctxt->sax->cdataBlock(ctxt->userData,
10989 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010990 else if (ctxt->sax->characters != NULL)
10991 ctxt->sax->characters(ctxt->userData,
10992 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010993 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010994 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010995 ctxt->checkIndex = 0;
10996 ctxt->instate = XML_PARSER_CONTENT;
10997#ifdef DEBUG_PUSH
10998 xmlGenericError(xmlGenericErrorContext,
10999 "PP: entering CONTENT\n");
11000#endif
11001 }
11002 break;
11003 }
Owen Taylor3473f882001-02-23 17:55:21 +000011004 case XML_PARSER_MISC:
11005 SKIP_BLANKS;
11006 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011007 avail = ctxt->input->length -
11008 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011009 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011010 avail = ctxt->input->buf->buffer->use -
11011 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011012 if (avail < 2)
11013 goto done;
11014 cur = ctxt->input->cur[0];
11015 next = ctxt->input->cur[1];
11016 if ((cur == '<') && (next == '?')) {
11017 if ((!terminate) &&
11018 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11019 goto done;
11020#ifdef DEBUG_PUSH
11021 xmlGenericError(xmlGenericErrorContext,
11022 "PP: Parsing PI\n");
11023#endif
11024 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011025 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011026 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011027 (ctxt->input->cur[2] == '-') &&
11028 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011029 if ((!terminate) &&
11030 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11031 goto done;
11032#ifdef DEBUG_PUSH
11033 xmlGenericError(xmlGenericErrorContext,
11034 "PP: Parsing Comment\n");
11035#endif
11036 xmlParseComment(ctxt);
11037 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011038 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011039 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011040 (ctxt->input->cur[2] == 'D') &&
11041 (ctxt->input->cur[3] == 'O') &&
11042 (ctxt->input->cur[4] == 'C') &&
11043 (ctxt->input->cur[5] == 'T') &&
11044 (ctxt->input->cur[6] == 'Y') &&
11045 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011046 (ctxt->input->cur[8] == 'E')) {
11047 if ((!terminate) &&
11048 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11049 goto done;
11050#ifdef DEBUG_PUSH
11051 xmlGenericError(xmlGenericErrorContext,
11052 "PP: Parsing internal subset\n");
11053#endif
11054 ctxt->inSubset = 1;
11055 xmlParseDocTypeDecl(ctxt);
11056 if (RAW == '[') {
11057 ctxt->instate = XML_PARSER_DTD;
11058#ifdef DEBUG_PUSH
11059 xmlGenericError(xmlGenericErrorContext,
11060 "PP: entering DTD\n");
11061#endif
11062 } else {
11063 /*
11064 * Create and update the external subset.
11065 */
11066 ctxt->inSubset = 2;
11067 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11068 (ctxt->sax->externalSubset != NULL))
11069 ctxt->sax->externalSubset(ctxt->userData,
11070 ctxt->intSubName, ctxt->extSubSystem,
11071 ctxt->extSubURI);
11072 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011073 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011074 ctxt->instate = XML_PARSER_PROLOG;
11075#ifdef DEBUG_PUSH
11076 xmlGenericError(xmlGenericErrorContext,
11077 "PP: entering PROLOG\n");
11078#endif
11079 }
11080 } else if ((cur == '<') && (next == '!') &&
11081 (avail < 9)) {
11082 goto done;
11083 } else {
11084 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011085 ctxt->progressive = 1;
11086 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011087#ifdef DEBUG_PUSH
11088 xmlGenericError(xmlGenericErrorContext,
11089 "PP: entering START_TAG\n");
11090#endif
11091 }
11092 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011093 case XML_PARSER_PROLOG:
11094 SKIP_BLANKS;
11095 if (ctxt->input->buf == NULL)
11096 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11097 else
11098 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11099 if (avail < 2)
11100 goto done;
11101 cur = ctxt->input->cur[0];
11102 next = ctxt->input->cur[1];
11103 if ((cur == '<') && (next == '?')) {
11104 if ((!terminate) &&
11105 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11106 goto done;
11107#ifdef DEBUG_PUSH
11108 xmlGenericError(xmlGenericErrorContext,
11109 "PP: Parsing PI\n");
11110#endif
11111 xmlParsePI(ctxt);
11112 } else if ((cur == '<') && (next == '!') &&
11113 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11114 if ((!terminate) &&
11115 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11116 goto done;
11117#ifdef DEBUG_PUSH
11118 xmlGenericError(xmlGenericErrorContext,
11119 "PP: Parsing Comment\n");
11120#endif
11121 xmlParseComment(ctxt);
11122 ctxt->instate = XML_PARSER_PROLOG;
11123 } else if ((cur == '<') && (next == '!') &&
11124 (avail < 4)) {
11125 goto done;
11126 } else {
11127 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011128 if (ctxt->progressive == 0)
11129 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011130 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011131#ifdef DEBUG_PUSH
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: entering START_TAG\n");
11134#endif
11135 }
11136 break;
11137 case XML_PARSER_EPILOG:
11138 SKIP_BLANKS;
11139 if (ctxt->input->buf == NULL)
11140 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11141 else
11142 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11143 if (avail < 2)
11144 goto done;
11145 cur = ctxt->input->cur[0];
11146 next = ctxt->input->cur[1];
11147 if ((cur == '<') && (next == '?')) {
11148 if ((!terminate) &&
11149 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11150 goto done;
11151#ifdef DEBUG_PUSH
11152 xmlGenericError(xmlGenericErrorContext,
11153 "PP: Parsing PI\n");
11154#endif
11155 xmlParsePI(ctxt);
11156 ctxt->instate = XML_PARSER_EPILOG;
11157 } else if ((cur == '<') && (next == '!') &&
11158 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11159 if ((!terminate) &&
11160 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11161 goto done;
11162#ifdef DEBUG_PUSH
11163 xmlGenericError(xmlGenericErrorContext,
11164 "PP: Parsing Comment\n");
11165#endif
11166 xmlParseComment(ctxt);
11167 ctxt->instate = XML_PARSER_EPILOG;
11168 } else if ((cur == '<') && (next == '!') &&
11169 (avail < 4)) {
11170 goto done;
11171 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011172 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011173 ctxt->instate = XML_PARSER_EOF;
11174#ifdef DEBUG_PUSH
11175 xmlGenericError(xmlGenericErrorContext,
11176 "PP: entering EOF\n");
11177#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011178 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011179 ctxt->sax->endDocument(ctxt->userData);
11180 goto done;
11181 }
11182 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011183 case XML_PARSER_DTD: {
11184 /*
11185 * Sorry but progressive parsing of the internal subset
11186 * is not expected to be supported. We first check that
11187 * the full content of the internal subset is available and
11188 * the parsing is launched only at that point.
11189 * Internal subset ends up with "']' S? '>'" in an unescaped
11190 * section and not in a ']]>' sequence which are conditional
11191 * sections (whoever argued to keep that crap in XML deserve
11192 * a place in hell !).
11193 */
11194 int base, i;
11195 xmlChar *buf;
11196 xmlChar quote = 0;
11197
11198 base = ctxt->input->cur - ctxt->input->base;
11199 if (base < 0) return(0);
11200 if (ctxt->checkIndex > base)
11201 base = ctxt->checkIndex;
11202 buf = ctxt->input->buf->buffer->content;
11203 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11204 base++) {
11205 if (quote != 0) {
11206 if (buf[base] == quote)
11207 quote = 0;
11208 continue;
11209 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011210 if ((quote == 0) && (buf[base] == '<')) {
11211 int found = 0;
11212 /* special handling of comments */
11213 if (((unsigned int) base + 4 <
11214 ctxt->input->buf->buffer->use) &&
11215 (buf[base + 1] == '!') &&
11216 (buf[base + 2] == '-') &&
11217 (buf[base + 3] == '-')) {
11218 for (;(unsigned int) base + 3 <
11219 ctxt->input->buf->buffer->use; base++) {
11220 if ((buf[base] == '-') &&
11221 (buf[base + 1] == '-') &&
11222 (buf[base + 2] == '>')) {
11223 found = 1;
11224 base += 2;
11225 break;
11226 }
11227 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011228 if (!found) {
11229#if 0
11230 fprintf(stderr, "unfinished comment\n");
11231#endif
11232 break; /* for */
11233 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011234 continue;
11235 }
11236 }
Owen Taylor3473f882001-02-23 17:55:21 +000011237 if (buf[base] == '"') {
11238 quote = '"';
11239 continue;
11240 }
11241 if (buf[base] == '\'') {
11242 quote = '\'';
11243 continue;
11244 }
11245 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011246#if 0
11247 fprintf(stderr, "%c%c%c%c: ", buf[base],
11248 buf[base + 1], buf[base + 2], buf[base + 3]);
11249#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011250 if ((unsigned int) base +1 >=
11251 ctxt->input->buf->buffer->use)
11252 break;
11253 if (buf[base + 1] == ']') {
11254 /* conditional crap, skip both ']' ! */
11255 base++;
11256 continue;
11257 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011258 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011259 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11260 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011261 if (buf[base + i] == '>') {
11262#if 0
11263 fprintf(stderr, "found\n");
11264#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011265 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011266 }
11267 if (!IS_BLANK_CH(buf[base + i])) {
11268#if 0
11269 fprintf(stderr, "not found\n");
11270#endif
11271 goto not_end_of_int_subset;
11272 }
Owen Taylor3473f882001-02-23 17:55:21 +000011273 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011274#if 0
11275 fprintf(stderr, "end of stream\n");
11276#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011277 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011278
Owen Taylor3473f882001-02-23 17:55:21 +000011279 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011280not_end_of_int_subset:
11281 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011282 }
11283 /*
11284 * We didn't found the end of the Internal subset
11285 */
Owen Taylor3473f882001-02-23 17:55:21 +000011286#ifdef DEBUG_PUSH
11287 if (next == 0)
11288 xmlGenericError(xmlGenericErrorContext,
11289 "PP: lookup of int subset end filed\n");
11290#endif
11291 goto done;
11292
11293found_end_int_subset:
11294 xmlParseInternalSubset(ctxt);
11295 ctxt->inSubset = 2;
11296 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11297 (ctxt->sax->externalSubset != NULL))
11298 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11299 ctxt->extSubSystem, ctxt->extSubURI);
11300 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011301 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011302 ctxt->instate = XML_PARSER_PROLOG;
11303 ctxt->checkIndex = 0;
11304#ifdef DEBUG_PUSH
11305 xmlGenericError(xmlGenericErrorContext,
11306 "PP: entering PROLOG\n");
11307#endif
11308 break;
11309 }
11310 case XML_PARSER_COMMENT:
11311 xmlGenericError(xmlGenericErrorContext,
11312 "PP: internal error, state == COMMENT\n");
11313 ctxt->instate = XML_PARSER_CONTENT;
11314#ifdef DEBUG_PUSH
11315 xmlGenericError(xmlGenericErrorContext,
11316 "PP: entering CONTENT\n");
11317#endif
11318 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011319 case XML_PARSER_IGNORE:
11320 xmlGenericError(xmlGenericErrorContext,
11321 "PP: internal error, state == IGNORE");
11322 ctxt->instate = XML_PARSER_DTD;
11323#ifdef DEBUG_PUSH
11324 xmlGenericError(xmlGenericErrorContext,
11325 "PP: entering DTD\n");
11326#endif
11327 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011328 case XML_PARSER_PI:
11329 xmlGenericError(xmlGenericErrorContext,
11330 "PP: internal error, state == PI\n");
11331 ctxt->instate = XML_PARSER_CONTENT;
11332#ifdef DEBUG_PUSH
11333 xmlGenericError(xmlGenericErrorContext,
11334 "PP: entering CONTENT\n");
11335#endif
11336 break;
11337 case XML_PARSER_ENTITY_DECL:
11338 xmlGenericError(xmlGenericErrorContext,
11339 "PP: internal error, state == ENTITY_DECL\n");
11340 ctxt->instate = XML_PARSER_DTD;
11341#ifdef DEBUG_PUSH
11342 xmlGenericError(xmlGenericErrorContext,
11343 "PP: entering DTD\n");
11344#endif
11345 break;
11346 case XML_PARSER_ENTITY_VALUE:
11347 xmlGenericError(xmlGenericErrorContext,
11348 "PP: internal error, state == ENTITY_VALUE\n");
11349 ctxt->instate = XML_PARSER_CONTENT;
11350#ifdef DEBUG_PUSH
11351 xmlGenericError(xmlGenericErrorContext,
11352 "PP: entering DTD\n");
11353#endif
11354 break;
11355 case XML_PARSER_ATTRIBUTE_VALUE:
11356 xmlGenericError(xmlGenericErrorContext,
11357 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11358 ctxt->instate = XML_PARSER_START_TAG;
11359#ifdef DEBUG_PUSH
11360 xmlGenericError(xmlGenericErrorContext,
11361 "PP: entering START_TAG\n");
11362#endif
11363 break;
11364 case XML_PARSER_SYSTEM_LITERAL:
11365 xmlGenericError(xmlGenericErrorContext,
11366 "PP: internal error, state == SYSTEM_LITERAL\n");
11367 ctxt->instate = XML_PARSER_START_TAG;
11368#ifdef DEBUG_PUSH
11369 xmlGenericError(xmlGenericErrorContext,
11370 "PP: entering START_TAG\n");
11371#endif
11372 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011373 case XML_PARSER_PUBLIC_LITERAL:
11374 xmlGenericError(xmlGenericErrorContext,
11375 "PP: internal error, state == PUBLIC_LITERAL\n");
11376 ctxt->instate = XML_PARSER_START_TAG;
11377#ifdef DEBUG_PUSH
11378 xmlGenericError(xmlGenericErrorContext,
11379 "PP: entering START_TAG\n");
11380#endif
11381 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011382 }
11383 }
11384done:
11385#ifdef DEBUG_PUSH
11386 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11387#endif
11388 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011389encoding_error:
11390 {
11391 char buffer[150];
11392
11393 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11394 ctxt->input->cur[0], ctxt->input->cur[1],
11395 ctxt->input->cur[2], ctxt->input->cur[3]);
11396 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11397 "Input is not proper UTF-8, indicate encoding !\n%s",
11398 BAD_CAST buffer, NULL);
11399 }
11400 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011401}
11402
11403/**
Owen Taylor3473f882001-02-23 17:55:21 +000011404 * xmlParseChunk:
11405 * @ctxt: an XML parser context
11406 * @chunk: an char array
11407 * @size: the size in byte of the chunk
11408 * @terminate: last chunk indicator
11409 *
11410 * Parse a Chunk of memory
11411 *
11412 * Returns zero if no error, the xmlParserErrors otherwise.
11413 */
11414int
11415xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11416 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011417 int end_in_lf = 0;
11418
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011419 if (ctxt == NULL)
11420 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011421 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011422 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011423 if (ctxt->instate == XML_PARSER_START)
11424 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011425 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11426 (chunk[size - 1] == '\r')) {
11427 end_in_lf = 1;
11428 size--;
11429 }
Owen Taylor3473f882001-02-23 17:55:21 +000011430 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11431 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11432 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11433 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011434 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011435
William M. Bracka3215c72004-07-31 16:24:01 +000011436 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11437 if (res < 0) {
11438 ctxt->errNo = XML_PARSER_EOF;
11439 ctxt->disableSAX = 1;
11440 return (XML_PARSER_EOF);
11441 }
Owen Taylor3473f882001-02-23 17:55:21 +000011442 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11443 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011444 ctxt->input->end =
11445 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011446#ifdef DEBUG_PUSH
11447 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11448#endif
11449
Owen Taylor3473f882001-02-23 17:55:21 +000011450 } else if (ctxt->instate != XML_PARSER_EOF) {
11451 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11452 xmlParserInputBufferPtr in = ctxt->input->buf;
11453 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11454 (in->raw != NULL)) {
11455 int nbchars;
11456
11457 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11458 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011459 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011460 xmlGenericError(xmlGenericErrorContext,
11461 "xmlParseChunk: encoder error\n");
11462 return(XML_ERR_INVALID_ENCODING);
11463 }
11464 }
11465 }
11466 }
11467 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011468 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11469 (ctxt->input->buf != NULL)) {
11470 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11471 }
Daniel Veillard14412512005-01-21 23:53:26 +000011472 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011473 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011474 if (terminate) {
11475 /*
11476 * Check for termination
11477 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011478 int avail = 0;
11479
11480 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011481 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011482 avail = ctxt->input->length -
11483 (ctxt->input->cur - ctxt->input->base);
11484 else
11485 avail = ctxt->input->buf->buffer->use -
11486 (ctxt->input->cur - ctxt->input->base);
11487 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011488
Owen Taylor3473f882001-02-23 17:55:21 +000011489 if ((ctxt->instate != XML_PARSER_EOF) &&
11490 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011491 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011492 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011493 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011494 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011495 }
Owen Taylor3473f882001-02-23 17:55:21 +000011496 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011497 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011498 ctxt->sax->endDocument(ctxt->userData);
11499 }
11500 ctxt->instate = XML_PARSER_EOF;
11501 }
11502 return((xmlParserErrors) ctxt->errNo);
11503}
11504
11505/************************************************************************
11506 * *
11507 * I/O front end functions to the parser *
11508 * *
11509 ************************************************************************/
11510
11511/**
Owen Taylor3473f882001-02-23 17:55:21 +000011512 * xmlCreatePushParserCtxt:
11513 * @sax: a SAX handler
11514 * @user_data: The user data returned on SAX callbacks
11515 * @chunk: a pointer to an array of chars
11516 * @size: number of chars in the array
11517 * @filename: an optional file name or URI
11518 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011519 * Create a parser context for using the XML parser in push mode.
11520 * If @buffer and @size are non-NULL, the data is used to detect
11521 * the encoding. The remaining characters will be parsed so they
11522 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011523 * To allow content encoding detection, @size should be >= 4
11524 * The value of @filename is used for fetching external entities
11525 * and error/warning reports.
11526 *
11527 * Returns the new parser context or NULL
11528 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011529
Owen Taylor3473f882001-02-23 17:55:21 +000011530xmlParserCtxtPtr
11531xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11532 const char *chunk, int size, const char *filename) {
11533 xmlParserCtxtPtr ctxt;
11534 xmlParserInputPtr inputStream;
11535 xmlParserInputBufferPtr buf;
11536 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11537
11538 /*
11539 * plug some encoding conversion routines
11540 */
11541 if ((chunk != NULL) && (size >= 4))
11542 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11543
11544 buf = xmlAllocParserInputBuffer(enc);
11545 if (buf == NULL) return(NULL);
11546
11547 ctxt = xmlNewParserCtxt();
11548 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011549 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011550 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011551 return(NULL);
11552 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011553 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011554 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11555 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011556 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011557 xmlFreeParserInputBuffer(buf);
11558 xmlFreeParserCtxt(ctxt);
11559 return(NULL);
11560 }
Owen Taylor3473f882001-02-23 17:55:21 +000011561 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011562#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011563 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011564#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011565 xmlFree(ctxt->sax);
11566 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11567 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011568 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011569 xmlFreeParserInputBuffer(buf);
11570 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011571 return(NULL);
11572 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011573 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11574 if (sax->initialized == XML_SAX2_MAGIC)
11575 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11576 else
11577 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011578 if (user_data != NULL)
11579 ctxt->userData = user_data;
11580 }
11581 if (filename == NULL) {
11582 ctxt->directory = NULL;
11583 } else {
11584 ctxt->directory = xmlParserGetDirectory(filename);
11585 }
11586
11587 inputStream = xmlNewInputStream(ctxt);
11588 if (inputStream == NULL) {
11589 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011590 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011591 return(NULL);
11592 }
11593
11594 if (filename == NULL)
11595 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011596 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011597 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011598 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011599 if (inputStream->filename == NULL) {
11600 xmlFreeParserCtxt(ctxt);
11601 xmlFreeParserInputBuffer(buf);
11602 return(NULL);
11603 }
11604 }
Owen Taylor3473f882001-02-23 17:55:21 +000011605 inputStream->buf = buf;
11606 inputStream->base = inputStream->buf->buffer->content;
11607 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011608 inputStream->end =
11609 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011610
11611 inputPush(ctxt, inputStream);
11612
William M. Brack3a1cd212005-02-11 14:35:54 +000011613 /*
11614 * If the caller didn't provide an initial 'chunk' for determining
11615 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11616 * that it can be automatically determined later
11617 */
11618 if ((size == 0) || (chunk == NULL)) {
11619 ctxt->charset = XML_CHAR_ENCODING_NONE;
11620 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011621 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11622 int cur = ctxt->input->cur - ctxt->input->base;
11623
Owen Taylor3473f882001-02-23 17:55:21 +000011624 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011625
11626 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11627 ctxt->input->cur = ctxt->input->base + cur;
11628 ctxt->input->end =
11629 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011630#ifdef DEBUG_PUSH
11631 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11632#endif
11633 }
11634
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011635 if (enc != XML_CHAR_ENCODING_NONE) {
11636 xmlSwitchEncoding(ctxt, enc);
11637 }
11638
Owen Taylor3473f882001-02-23 17:55:21 +000011639 return(ctxt);
11640}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011641#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011642
11643/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011644 * xmlStopParser:
11645 * @ctxt: an XML parser context
11646 *
11647 * Blocks further parser processing
11648 */
11649void
11650xmlStopParser(xmlParserCtxtPtr ctxt) {
11651 if (ctxt == NULL)
11652 return;
11653 ctxt->instate = XML_PARSER_EOF;
11654 ctxt->disableSAX = 1;
11655 if (ctxt->input != NULL) {
11656 ctxt->input->cur = BAD_CAST"";
11657 ctxt->input->base = ctxt->input->cur;
11658 }
11659}
11660
11661/**
Owen Taylor3473f882001-02-23 17:55:21 +000011662 * xmlCreateIOParserCtxt:
11663 * @sax: a SAX handler
11664 * @user_data: The user data returned on SAX callbacks
11665 * @ioread: an I/O read function
11666 * @ioclose: an I/O close function
11667 * @ioctx: an I/O handler
11668 * @enc: the charset encoding if known
11669 *
11670 * Create a parser context for using the XML parser with an existing
11671 * I/O stream
11672 *
11673 * Returns the new parser context or NULL
11674 */
11675xmlParserCtxtPtr
11676xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11677 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11678 void *ioctx, xmlCharEncoding enc) {
11679 xmlParserCtxtPtr ctxt;
11680 xmlParserInputPtr inputStream;
11681 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011682
11683 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011684
11685 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11686 if (buf == NULL) return(NULL);
11687
11688 ctxt = xmlNewParserCtxt();
11689 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011690 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011691 return(NULL);
11692 }
11693 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011694#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011695 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011696#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011697 xmlFree(ctxt->sax);
11698 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11699 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011700 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011701 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011702 return(NULL);
11703 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011704 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11705 if (sax->initialized == XML_SAX2_MAGIC)
11706 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11707 else
11708 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011709 if (user_data != NULL)
11710 ctxt->userData = user_data;
11711 }
11712
11713 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11714 if (inputStream == NULL) {
11715 xmlFreeParserCtxt(ctxt);
11716 return(NULL);
11717 }
11718 inputPush(ctxt, inputStream);
11719
11720 return(ctxt);
11721}
11722
Daniel Veillard4432df22003-09-28 18:58:27 +000011723#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011724/************************************************************************
11725 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011726 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011727 * *
11728 ************************************************************************/
11729
11730/**
11731 * xmlIOParseDTD:
11732 * @sax: the SAX handler block or NULL
11733 * @input: an Input Buffer
11734 * @enc: the charset encoding if known
11735 *
11736 * Load and parse a DTD
11737 *
11738 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011739 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011740 */
11741
11742xmlDtdPtr
11743xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11744 xmlCharEncoding enc) {
11745 xmlDtdPtr ret = NULL;
11746 xmlParserCtxtPtr ctxt;
11747 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011748 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011749
11750 if (input == NULL)
11751 return(NULL);
11752
11753 ctxt = xmlNewParserCtxt();
11754 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011755 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011756 return(NULL);
11757 }
11758
11759 /*
11760 * Set-up the SAX context
11761 */
11762 if (sax != NULL) {
11763 if (ctxt->sax != NULL)
11764 xmlFree(ctxt->sax);
11765 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011766 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011767 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011768 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011769
11770 /*
11771 * generate a parser input from the I/O handler
11772 */
11773
Daniel Veillard43caefb2003-12-07 19:32:22 +000011774 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011775 if (pinput == NULL) {
11776 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011777 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011778 xmlFreeParserCtxt(ctxt);
11779 return(NULL);
11780 }
11781
11782 /*
11783 * plug some encoding conversion routines here.
11784 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011785 if (xmlPushInput(ctxt, pinput) < 0) {
11786 if (sax != NULL) ctxt->sax = NULL;
11787 xmlFreeParserCtxt(ctxt);
11788 return(NULL);
11789 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011790 if (enc != XML_CHAR_ENCODING_NONE) {
11791 xmlSwitchEncoding(ctxt, enc);
11792 }
Owen Taylor3473f882001-02-23 17:55:21 +000011793
11794 pinput->filename = NULL;
11795 pinput->line = 1;
11796 pinput->col = 1;
11797 pinput->base = ctxt->input->cur;
11798 pinput->cur = ctxt->input->cur;
11799 pinput->free = NULL;
11800
11801 /*
11802 * let's parse that entity knowing it's an external subset.
11803 */
11804 ctxt->inSubset = 2;
11805 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011806 if (ctxt->myDoc == NULL) {
11807 xmlErrMemory(ctxt, "New Doc failed");
11808 return(NULL);
11809 }
11810 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011811 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11812 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011813
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011814 if ((enc == XML_CHAR_ENCODING_NONE) &&
11815 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011816 /*
11817 * Get the 4 first bytes and decode the charset
11818 * if enc != XML_CHAR_ENCODING_NONE
11819 * plug some encoding conversion routines.
11820 */
11821 start[0] = RAW;
11822 start[1] = NXT(1);
11823 start[2] = NXT(2);
11824 start[3] = NXT(3);
11825 enc = xmlDetectCharEncoding(start, 4);
11826 if (enc != XML_CHAR_ENCODING_NONE) {
11827 xmlSwitchEncoding(ctxt, enc);
11828 }
11829 }
11830
Owen Taylor3473f882001-02-23 17:55:21 +000011831 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11832
11833 if (ctxt->myDoc != NULL) {
11834 if (ctxt->wellFormed) {
11835 ret = ctxt->myDoc->extSubset;
11836 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011837 if (ret != NULL) {
11838 xmlNodePtr tmp;
11839
11840 ret->doc = NULL;
11841 tmp = ret->children;
11842 while (tmp != NULL) {
11843 tmp->doc = NULL;
11844 tmp = tmp->next;
11845 }
11846 }
Owen Taylor3473f882001-02-23 17:55:21 +000011847 } else {
11848 ret = NULL;
11849 }
11850 xmlFreeDoc(ctxt->myDoc);
11851 ctxt->myDoc = NULL;
11852 }
11853 if (sax != NULL) ctxt->sax = NULL;
11854 xmlFreeParserCtxt(ctxt);
11855
11856 return(ret);
11857}
11858
11859/**
11860 * xmlSAXParseDTD:
11861 * @sax: the SAX handler block
11862 * @ExternalID: a NAME* containing the External ID of the DTD
11863 * @SystemID: a NAME* containing the URL to the DTD
11864 *
11865 * Load and parse an external subset.
11866 *
11867 * Returns the resulting xmlDtdPtr or NULL in case of error.
11868 */
11869
11870xmlDtdPtr
11871xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11872 const xmlChar *SystemID) {
11873 xmlDtdPtr ret = NULL;
11874 xmlParserCtxtPtr ctxt;
11875 xmlParserInputPtr input = NULL;
11876 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011877 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011878
11879 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11880
11881 ctxt = xmlNewParserCtxt();
11882 if (ctxt == NULL) {
11883 return(NULL);
11884 }
11885
11886 /*
11887 * Set-up the SAX context
11888 */
11889 if (sax != NULL) {
11890 if (ctxt->sax != NULL)
11891 xmlFree(ctxt->sax);
11892 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011893 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011894 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011895
11896 /*
11897 * Canonicalise the system ID
11898 */
11899 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011900 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011901 xmlFreeParserCtxt(ctxt);
11902 return(NULL);
11903 }
Owen Taylor3473f882001-02-23 17:55:21 +000011904
11905 /*
11906 * Ask the Entity resolver to load the damn thing
11907 */
11908
11909 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011910 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11911 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011912 if (input == NULL) {
11913 if (sax != NULL) ctxt->sax = NULL;
11914 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011915 if (systemIdCanonic != NULL)
11916 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011917 return(NULL);
11918 }
11919
11920 /*
11921 * plug some encoding conversion routines here.
11922 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011923 if (xmlPushInput(ctxt, input) < 0) {
11924 if (sax != NULL) ctxt->sax = NULL;
11925 xmlFreeParserCtxt(ctxt);
11926 if (systemIdCanonic != NULL)
11927 xmlFree(systemIdCanonic);
11928 return(NULL);
11929 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011930 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11931 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11932 xmlSwitchEncoding(ctxt, enc);
11933 }
Owen Taylor3473f882001-02-23 17:55:21 +000011934
11935 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011936 input->filename = (char *) systemIdCanonic;
11937 else
11938 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011939 input->line = 1;
11940 input->col = 1;
11941 input->base = ctxt->input->cur;
11942 input->cur = ctxt->input->cur;
11943 input->free = NULL;
11944
11945 /*
11946 * let's parse that entity knowing it's an external subset.
11947 */
11948 ctxt->inSubset = 2;
11949 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011950 if (ctxt->myDoc == NULL) {
11951 xmlErrMemory(ctxt, "New Doc failed");
11952 if (sax != NULL) ctxt->sax = NULL;
11953 xmlFreeParserCtxt(ctxt);
11954 return(NULL);
11955 }
11956 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011957 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11958 ExternalID, SystemID);
11959 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11960
11961 if (ctxt->myDoc != NULL) {
11962 if (ctxt->wellFormed) {
11963 ret = ctxt->myDoc->extSubset;
11964 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011965 if (ret != NULL) {
11966 xmlNodePtr tmp;
11967
11968 ret->doc = NULL;
11969 tmp = ret->children;
11970 while (tmp != NULL) {
11971 tmp->doc = NULL;
11972 tmp = tmp->next;
11973 }
11974 }
Owen Taylor3473f882001-02-23 17:55:21 +000011975 } else {
11976 ret = NULL;
11977 }
11978 xmlFreeDoc(ctxt->myDoc);
11979 ctxt->myDoc = NULL;
11980 }
11981 if (sax != NULL) ctxt->sax = NULL;
11982 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000011983
Owen Taylor3473f882001-02-23 17:55:21 +000011984 return(ret);
11985}
11986
Daniel Veillard4432df22003-09-28 18:58:27 +000011987
Owen Taylor3473f882001-02-23 17:55:21 +000011988/**
11989 * xmlParseDTD:
11990 * @ExternalID: a NAME* containing the External ID of the DTD
11991 * @SystemID: a NAME* containing the URL to the DTD
11992 *
11993 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000011994 *
Owen Taylor3473f882001-02-23 17:55:21 +000011995 * Returns the resulting xmlDtdPtr or NULL in case of error.
11996 */
11997
11998xmlDtdPtr
11999xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12000 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12001}
Daniel Veillard4432df22003-09-28 18:58:27 +000012002#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012003
12004/************************************************************************
12005 * *
12006 * Front ends when parsing an Entity *
12007 * *
12008 ************************************************************************/
12009
12010/**
Owen Taylor3473f882001-02-23 17:55:21 +000012011 * xmlParseCtxtExternalEntity:
12012 * @ctx: the existing parsing context
12013 * @URL: the URL for the entity to load
12014 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012015 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012016 *
12017 * Parse an external general entity within an existing parsing context
12018 * An external general parsed entity is well-formed if it matches the
12019 * production labeled extParsedEnt.
12020 *
12021 * [78] extParsedEnt ::= TextDecl? content
12022 *
12023 * Returns 0 if the entity is well formed, -1 in case of args problem and
12024 * the parser error code otherwise
12025 */
12026
12027int
12028xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012029 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012030 xmlParserCtxtPtr ctxt;
12031 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012032 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012033 xmlSAXHandlerPtr oldsax = NULL;
12034 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012035 xmlChar start[4];
12036 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012037 xmlParserInputPtr inputStream;
12038 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012039
Daniel Veillardce682bc2004-11-05 17:22:25 +000012040 if (ctx == NULL) return(-1);
12041
Daniel Veillard0161e632008-08-28 15:36:32 +000012042 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12043 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012044 return(XML_ERR_ENTITY_LOOP);
12045 }
12046
Daniel Veillardcda96922001-08-21 10:56:31 +000012047 if (lst != NULL)
12048 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012049 if ((URL == NULL) && (ID == NULL))
12050 return(-1);
12051 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12052 return(-1);
12053
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012054 ctxt = xmlNewParserCtxt();
12055 if (ctxt == NULL) {
12056 return(-1);
12057 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012058
Owen Taylor3473f882001-02-23 17:55:21 +000012059 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000012060 ctxt->_private = ctx->_private;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012061
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012062 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12063 if (inputStream == NULL) {
12064 xmlFreeParserCtxt(ctxt);
12065 return(-1);
12066 }
12067
12068 inputPush(ctxt, inputStream);
12069
12070 if ((ctxt->directory == NULL) && (directory == NULL))
12071 directory = xmlParserGetDirectory((char *)URL);
12072 if ((ctxt->directory == NULL) && (directory != NULL))
12073 ctxt->directory = directory;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012074
Owen Taylor3473f882001-02-23 17:55:21 +000012075 oldsax = ctxt->sax;
12076 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012077 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012078 newDoc = xmlNewDoc(BAD_CAST "1.0");
12079 if (newDoc == NULL) {
12080 xmlFreeParserCtxt(ctxt);
12081 return(-1);
12082 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012083 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012084 if (ctx->myDoc->dict) {
12085 newDoc->dict = ctx->myDoc->dict;
12086 xmlDictReference(newDoc->dict);
12087 }
Owen Taylor3473f882001-02-23 17:55:21 +000012088 if (ctx->myDoc != NULL) {
12089 newDoc->intSubset = ctx->myDoc->intSubset;
12090 newDoc->extSubset = ctx->myDoc->extSubset;
12091 }
12092 if (ctx->myDoc->URL != NULL) {
12093 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12094 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012095 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12096 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012097 ctxt->sax = oldsax;
12098 xmlFreeParserCtxt(ctxt);
12099 newDoc->intSubset = NULL;
12100 newDoc->extSubset = NULL;
12101 xmlFreeDoc(newDoc);
12102 return(-1);
12103 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012104 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012105 nodePush(ctxt, newDoc->children);
12106 if (ctx->myDoc == NULL) {
12107 ctxt->myDoc = newDoc;
12108 } else {
12109 ctxt->myDoc = ctx->myDoc;
12110 newDoc->children->doc = ctx->myDoc;
12111 }
12112
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012113 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012114 * Get the 4 first bytes and decode the charset
12115 * if enc != XML_CHAR_ENCODING_NONE
12116 * plug some encoding conversion routines.
12117 */
12118 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012119 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12120 start[0] = RAW;
12121 start[1] = NXT(1);
12122 start[2] = NXT(2);
12123 start[3] = NXT(3);
12124 enc = xmlDetectCharEncoding(start, 4);
12125 if (enc != XML_CHAR_ENCODING_NONE) {
12126 xmlSwitchEncoding(ctxt, enc);
12127 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012128 }
12129
Owen Taylor3473f882001-02-23 17:55:21 +000012130 /*
12131 * Parse a possible text declaration first
12132 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012133 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012134 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012135 /*
12136 * An XML-1.0 document can't reference an entity not XML-1.0
12137 */
12138 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12139 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12140 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12141 "Version mismatch between document and entity\n");
12142 }
Owen Taylor3473f882001-02-23 17:55:21 +000012143 }
12144
12145 /*
12146 * Doing validity checking on chunk doesn't make sense
12147 */
12148 ctxt->instate = XML_PARSER_CONTENT;
12149 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012150 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012151 ctxt->loadsubset = ctx->loadsubset;
12152 ctxt->depth = ctx->depth + 1;
12153 ctxt->replaceEntities = ctx->replaceEntities;
12154 if (ctxt->validate) {
12155 ctxt->vctxt.error = ctx->vctxt.error;
12156 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012157 } else {
12158 ctxt->vctxt.error = NULL;
12159 ctxt->vctxt.warning = NULL;
12160 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012161 ctxt->vctxt.nodeTab = NULL;
12162 ctxt->vctxt.nodeNr = 0;
12163 ctxt->vctxt.nodeMax = 0;
12164 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012165 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12166 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012167 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12168 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12169 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012170 ctxt->dictNames = ctx->dictNames;
12171 ctxt->attsDefault = ctx->attsDefault;
12172 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012173 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012174
12175 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012176
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012177 ctx->validate = ctxt->validate;
12178 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012179 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012180 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012181 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012182 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012183 }
12184 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012185 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012186 }
12187
12188 if (!ctxt->wellFormed) {
12189 if (ctxt->errNo == 0)
12190 ret = 1;
12191 else
12192 ret = ctxt->errNo;
12193 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012194 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012195 xmlNodePtr cur;
12196
12197 /*
12198 * Return the newly created nodeset after unlinking it from
12199 * they pseudo parent.
12200 */
12201 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012202 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012203 while (cur != NULL) {
12204 cur->parent = NULL;
12205 cur = cur->next;
12206 }
12207 newDoc->children->children = NULL;
12208 }
12209 ret = 0;
12210 }
12211 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012212 ctxt->dict = NULL;
12213 ctxt->attsDefault = NULL;
12214 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012215 xmlFreeParserCtxt(ctxt);
12216 newDoc->intSubset = NULL;
12217 newDoc->extSubset = NULL;
12218 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012219
Owen Taylor3473f882001-02-23 17:55:21 +000012220 return(ret);
12221}
12222
12223/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012224 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012225 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012226 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012227 * @sax: the SAX handler bloc (possibly NULL)
12228 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12229 * @depth: Used for loop detection, use 0
12230 * @URL: the URL for the entity to load
12231 * @ID: the System ID for the entity to load
12232 * @list: the return value for the set of parsed nodes
12233 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012234 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012235 *
12236 * Returns 0 if the entity is well formed, -1 in case of args problem and
12237 * the parser error code otherwise
12238 */
12239
Daniel Veillard7d515752003-09-26 19:12:37 +000012240static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012241xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12242 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012243 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012244 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012245 xmlParserCtxtPtr ctxt;
12246 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012247 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012248 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012249 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012250 xmlChar start[4];
12251 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012252
Daniel Veillard0161e632008-08-28 15:36:32 +000012253 if (((depth > 40) &&
12254 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12255 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012256 return(XML_ERR_ENTITY_LOOP);
12257 }
12258
Owen Taylor3473f882001-02-23 17:55:21 +000012259 if (list != NULL)
12260 *list = NULL;
12261 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012262 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012263 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012264 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012265
12266
12267 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000012268 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012269 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012270 if (oldctxt != NULL) {
12271 ctxt->_private = oldctxt->_private;
12272 ctxt->loadsubset = oldctxt->loadsubset;
12273 ctxt->validate = oldctxt->validate;
12274 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012275 ctxt->record_info = oldctxt->record_info;
12276 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12277 ctxt->node_seq.length = oldctxt->node_seq.length;
12278 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012279 } else {
12280 /*
12281 * Doing validity checking on chunk without context
12282 * doesn't make sense
12283 */
12284 ctxt->_private = NULL;
12285 ctxt->validate = 0;
12286 ctxt->external = 2;
12287 ctxt->loadsubset = 0;
12288 }
Owen Taylor3473f882001-02-23 17:55:21 +000012289 if (sax != NULL) {
12290 oldsax = ctxt->sax;
12291 ctxt->sax = sax;
12292 if (user_data != NULL)
12293 ctxt->userData = user_data;
12294 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012295 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012296 newDoc = xmlNewDoc(BAD_CAST "1.0");
12297 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012298 ctxt->node_seq.maximum = 0;
12299 ctxt->node_seq.length = 0;
12300 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012301 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012302 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012303 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012304 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012305 newDoc->intSubset = doc->intSubset;
12306 newDoc->extSubset = doc->extSubset;
12307 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012308 xmlDictReference(newDoc->dict);
12309
Owen Taylor3473f882001-02-23 17:55:21 +000012310 if (doc->URL != NULL) {
12311 newDoc->URL = xmlStrdup(doc->URL);
12312 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012313 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12314 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012315 if (sax != NULL)
12316 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012317 ctxt->node_seq.maximum = 0;
12318 ctxt->node_seq.length = 0;
12319 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012320 xmlFreeParserCtxt(ctxt);
12321 newDoc->intSubset = NULL;
12322 newDoc->extSubset = NULL;
12323 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012324 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012325 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012326 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012327 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012328 ctxt->myDoc = doc;
12329 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012330
Daniel Veillard0161e632008-08-28 15:36:32 +000012331 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012332 * Get the 4 first bytes and decode the charset
12333 * if enc != XML_CHAR_ENCODING_NONE
12334 * plug some encoding conversion routines.
12335 */
12336 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012337 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12338 start[0] = RAW;
12339 start[1] = NXT(1);
12340 start[2] = NXT(2);
12341 start[3] = NXT(3);
12342 enc = xmlDetectCharEncoding(start, 4);
12343 if (enc != XML_CHAR_ENCODING_NONE) {
12344 xmlSwitchEncoding(ctxt, enc);
12345 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012346 }
12347
Owen Taylor3473f882001-02-23 17:55:21 +000012348 /*
12349 * Parse a possible text declaration first
12350 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012351 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012352 xmlParseTextDecl(ctxt);
12353 }
12354
Owen Taylor3473f882001-02-23 17:55:21 +000012355 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012356 ctxt->depth = depth;
12357
12358 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012359
Daniel Veillard561b7f82002-03-20 21:55:57 +000012360 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012361 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012362 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012363 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012364 }
12365 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012366 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012367 }
12368
12369 if (!ctxt->wellFormed) {
12370 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012371 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012372 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012373 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012374 } else {
12375 if (list != NULL) {
12376 xmlNodePtr cur;
12377
12378 /*
12379 * Return the newly created nodeset after unlinking it from
12380 * they pseudo parent.
12381 */
12382 cur = newDoc->children->children;
12383 *list = cur;
12384 while (cur != NULL) {
12385 cur->parent = NULL;
12386 cur = cur->next;
12387 }
12388 newDoc->children->children = NULL;
12389 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012390 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012391 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012392
12393 /*
12394 * Record in the parent context the number of entities replacement
12395 * done when parsing that reference.
12396 */
12397 oldctxt->nbentities += ctxt->nbentities;
12398 /*
12399 * Also record the size of the entity parsed
12400 */
12401 if (ctxt->input != NULL) {
12402 oldctxt->sizeentities += ctxt->input->consumed;
12403 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12404 }
12405 /*
12406 * And record the last error if any
12407 */
12408 if (ctxt->lastError.code != XML_ERR_OK)
12409 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12410
Owen Taylor3473f882001-02-23 17:55:21 +000012411 if (sax != NULL)
12412 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012413 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12414 oldctxt->node_seq.length = ctxt->node_seq.length;
12415 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012416 ctxt->node_seq.maximum = 0;
12417 ctxt->node_seq.length = 0;
12418 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012419 xmlFreeParserCtxt(ctxt);
12420 newDoc->intSubset = NULL;
12421 newDoc->extSubset = NULL;
12422 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012423
Owen Taylor3473f882001-02-23 17:55:21 +000012424 return(ret);
12425}
12426
Daniel Veillard81273902003-09-30 00:43:48 +000012427#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012428/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012429 * xmlParseExternalEntity:
12430 * @doc: the document the chunk pertains to
12431 * @sax: the SAX handler bloc (possibly NULL)
12432 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12433 * @depth: Used for loop detection, use 0
12434 * @URL: the URL for the entity to load
12435 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012436 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012437 *
12438 * Parse an external general entity
12439 * An external general parsed entity is well-formed if it matches the
12440 * production labeled extParsedEnt.
12441 *
12442 * [78] extParsedEnt ::= TextDecl? content
12443 *
12444 * Returns 0 if the entity is well formed, -1 in case of args problem and
12445 * the parser error code otherwise
12446 */
12447
12448int
12449xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012450 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012451 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012452 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012453}
12454
12455/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012456 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012457 * @doc: the document the chunk pertains to
12458 * @sax: the SAX handler bloc (possibly NULL)
12459 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12460 * @depth: Used for loop detection, use 0
12461 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012462 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012463 *
12464 * Parse a well-balanced chunk of an XML document
12465 * called by the parser
12466 * The allowed sequence for the Well Balanced Chunk is the one defined by
12467 * the content production in the XML grammar:
12468 *
12469 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12470 *
12471 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12472 * the parser error code otherwise
12473 */
12474
12475int
12476xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012477 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012478 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12479 depth, string, lst, 0 );
12480}
Daniel Veillard81273902003-09-30 00:43:48 +000012481#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012482
12483/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012484 * xmlParseBalancedChunkMemoryInternal:
12485 * @oldctxt: the existing parsing context
12486 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12487 * @user_data: the user data field for the parser context
12488 * @lst: the return value for the set of parsed nodes
12489 *
12490 *
12491 * Parse a well-balanced chunk of an XML document
12492 * called by the parser
12493 * The allowed sequence for the Well Balanced Chunk is the one defined by
12494 * the content production in the XML grammar:
12495 *
12496 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12497 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012498 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12499 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012500 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012501 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012502 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012503 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012504static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012505xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12506 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12507 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012508 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012509 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012510 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012511 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012512 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012513 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012514 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012515
Daniel Veillard0161e632008-08-28 15:36:32 +000012516 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12517 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012518 return(XML_ERR_ENTITY_LOOP);
12519 }
12520
12521
12522 if (lst != NULL)
12523 *lst = NULL;
12524 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012525 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012526
12527 size = xmlStrlen(string);
12528
12529 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012530 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012531 if (user_data != NULL)
12532 ctxt->userData = user_data;
12533 else
12534 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012535 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12536 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012537 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12538 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12539 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012540
12541 oldsax = ctxt->sax;
12542 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012543 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012544 ctxt->replaceEntities = oldctxt->replaceEntities;
12545 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012546
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012547 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012548 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012549 newDoc = xmlNewDoc(BAD_CAST "1.0");
12550 if (newDoc == NULL) {
12551 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012552 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012553 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012554 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012555 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012556 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012557 newDoc->dict = ctxt->dict;
12558 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012559 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012560 } else {
12561 ctxt->myDoc = oldctxt->myDoc;
12562 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012563 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012564 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012565 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12566 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012567 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012568 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012569 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012570 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012571 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012572 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012573 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012574 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012575 ctxt->myDoc->children = NULL;
12576 ctxt->myDoc->last = NULL;
12577 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012578 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012579 ctxt->instate = XML_PARSER_CONTENT;
12580 ctxt->depth = oldctxt->depth + 1;
12581
Daniel Veillard328f48c2002-11-15 15:24:34 +000012582 ctxt->validate = 0;
12583 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012584 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12585 /*
12586 * ID/IDREF registration will be done in xmlValidateElement below
12587 */
12588 ctxt->loadsubset |= XML_SKIP_IDS;
12589 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012590 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012591 ctxt->attsDefault = oldctxt->attsDefault;
12592 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012593
Daniel Veillard68e9e742002-11-16 15:35:11 +000012594 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012595 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012596 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012597 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012598 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012599 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012600 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012601 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012602 }
12603
12604 if (!ctxt->wellFormed) {
12605 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012606 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012607 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012608 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012609 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012610 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012611 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012612
William M. Brack7b9154b2003-09-27 19:23:50 +000012613 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012614 xmlNodePtr cur;
12615
12616 /*
12617 * Return the newly created nodeset after unlinking it from
12618 * they pseudo parent.
12619 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012620 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012621 *lst = cur;
12622 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012623#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012624 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12625 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12626 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012627 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12628 oldctxt->myDoc, cur);
12629 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012630#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012631 cur->parent = NULL;
12632 cur = cur->next;
12633 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012634 ctxt->myDoc->children->children = NULL;
12635 }
12636 if (ctxt->myDoc != NULL) {
12637 xmlFreeNode(ctxt->myDoc->children);
12638 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012639 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012640 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012641
12642 /*
12643 * Record in the parent context the number of entities replacement
12644 * done when parsing that reference.
12645 */
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012646 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard0161e632008-08-28 15:36:32 +000012647 /*
12648 * Also record the last error if any
12649 */
12650 if (ctxt->lastError.code != XML_ERR_OK)
12651 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12652
Daniel Veillard328f48c2002-11-15 15:24:34 +000012653 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012654 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012655 ctxt->attsDefault = NULL;
12656 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012657 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012658 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012659 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012660 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012661
Daniel Veillard328f48c2002-11-15 15:24:34 +000012662 return(ret);
12663}
12664
Daniel Veillard29b17482004-08-16 00:39:03 +000012665/**
12666 * xmlParseInNodeContext:
12667 * @node: the context node
12668 * @data: the input string
12669 * @datalen: the input string length in bytes
12670 * @options: a combination of xmlParserOption
12671 * @lst: the return value for the set of parsed nodes
12672 *
12673 * Parse a well-balanced chunk of an XML document
12674 * within the context (DTD, namespaces, etc ...) of the given node.
12675 *
12676 * The allowed sequence for the data is a Well Balanced Chunk defined by
12677 * the content production in the XML grammar:
12678 *
12679 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12680 *
12681 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12682 * error code otherwise
12683 */
12684xmlParserErrors
12685xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12686 int options, xmlNodePtr *lst) {
12687#ifdef SAX2
12688 xmlParserCtxtPtr ctxt;
12689 xmlDocPtr doc = NULL;
12690 xmlNodePtr fake, cur;
12691 int nsnr = 0;
12692
12693 xmlParserErrors ret = XML_ERR_OK;
12694
12695 /*
12696 * check all input parameters, grab the document
12697 */
12698 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12699 return(XML_ERR_INTERNAL_ERROR);
12700 switch (node->type) {
12701 case XML_ELEMENT_NODE:
12702 case XML_ATTRIBUTE_NODE:
12703 case XML_TEXT_NODE:
12704 case XML_CDATA_SECTION_NODE:
12705 case XML_ENTITY_REF_NODE:
12706 case XML_PI_NODE:
12707 case XML_COMMENT_NODE:
12708 case XML_DOCUMENT_NODE:
12709 case XML_HTML_DOCUMENT_NODE:
12710 break;
12711 default:
12712 return(XML_ERR_INTERNAL_ERROR);
12713
12714 }
12715 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12716 (node->type != XML_DOCUMENT_NODE) &&
12717 (node->type != XML_HTML_DOCUMENT_NODE))
12718 node = node->parent;
12719 if (node == NULL)
12720 return(XML_ERR_INTERNAL_ERROR);
12721 if (node->type == XML_ELEMENT_NODE)
12722 doc = node->doc;
12723 else
12724 doc = (xmlDocPtr) node;
12725 if (doc == NULL)
12726 return(XML_ERR_INTERNAL_ERROR);
12727
12728 /*
12729 * allocate a context and set-up everything not related to the
12730 * node position in the tree
12731 */
12732 if (doc->type == XML_DOCUMENT_NODE)
12733 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12734#ifdef LIBXML_HTML_ENABLED
12735 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12736 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12737#endif
12738 else
12739 return(XML_ERR_INTERNAL_ERROR);
12740
12741 if (ctxt == NULL)
12742 return(XML_ERR_NO_MEMORY);
12743 fake = xmlNewComment(NULL);
12744 if (fake == NULL) {
12745 xmlFreeParserCtxt(ctxt);
12746 return(XML_ERR_NO_MEMORY);
12747 }
12748 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012749
12750 /*
12751 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12752 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12753 * we must wait until the last moment to free the original one.
12754 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012755 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012756 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012757 xmlDictFree(ctxt->dict);
12758 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012759 } else
12760 options |= XML_PARSE_NODICT;
12761
Daniel Veillard37334572008-07-31 08:20:02 +000012762 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012763 xmlDetectSAX2(ctxt);
12764 ctxt->myDoc = doc;
12765
12766 if (node->type == XML_ELEMENT_NODE) {
12767 nodePush(ctxt, node);
12768 /*
12769 * initialize the SAX2 namespaces stack
12770 */
12771 cur = node;
12772 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12773 xmlNsPtr ns = cur->nsDef;
12774 const xmlChar *iprefix, *ihref;
12775
12776 while (ns != NULL) {
12777 if (ctxt->dict) {
12778 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12779 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12780 } else {
12781 iprefix = ns->prefix;
12782 ihref = ns->href;
12783 }
12784
12785 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12786 nsPush(ctxt, iprefix, ihref);
12787 nsnr++;
12788 }
12789 ns = ns->next;
12790 }
12791 cur = cur->parent;
12792 }
12793 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012794 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012795
12796 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12797 /*
12798 * ID/IDREF registration will be done in xmlValidateElement below
12799 */
12800 ctxt->loadsubset |= XML_SKIP_IDS;
12801 }
12802
Daniel Veillard499cc922006-01-18 17:22:35 +000012803#ifdef LIBXML_HTML_ENABLED
12804 if (doc->type == XML_HTML_DOCUMENT_NODE)
12805 __htmlParseContent(ctxt);
12806 else
12807#endif
12808 xmlParseContent(ctxt);
12809
Daniel Veillard29b17482004-08-16 00:39:03 +000012810 nsPop(ctxt, nsnr);
12811 if ((RAW == '<') && (NXT(1) == '/')) {
12812 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12813 } else if (RAW != 0) {
12814 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12815 }
12816 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12817 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12818 ctxt->wellFormed = 0;
12819 }
12820
12821 if (!ctxt->wellFormed) {
12822 if (ctxt->errNo == 0)
12823 ret = XML_ERR_INTERNAL_ERROR;
12824 else
12825 ret = (xmlParserErrors)ctxt->errNo;
12826 } else {
12827 ret = XML_ERR_OK;
12828 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012829
Daniel Veillard29b17482004-08-16 00:39:03 +000012830 /*
12831 * Return the newly created nodeset after unlinking it from
12832 * the pseudo sibling.
12833 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012834
Daniel Veillard29b17482004-08-16 00:39:03 +000012835 cur = fake->next;
12836 fake->next = NULL;
12837 node->last = fake;
12838
12839 if (cur != NULL) {
12840 cur->prev = NULL;
12841 }
12842
12843 *lst = cur;
12844
12845 while (cur != NULL) {
12846 cur->parent = NULL;
12847 cur = cur->next;
12848 }
12849
12850 xmlUnlinkNode(fake);
12851 xmlFreeNode(fake);
12852
12853
12854 if (ret != XML_ERR_OK) {
12855 xmlFreeNodeList(*lst);
12856 *lst = NULL;
12857 }
William M. Brackc3f81342004-10-03 01:22:44 +000012858
William M. Brackb7b54de2004-10-06 16:38:01 +000012859 if (doc->dict != NULL)
12860 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012861 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012862
Daniel Veillard29b17482004-08-16 00:39:03 +000012863 return(ret);
12864#else /* !SAX2 */
12865 return(XML_ERR_INTERNAL_ERROR);
12866#endif
12867}
12868
Daniel Veillard81273902003-09-30 00:43:48 +000012869#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012870/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012871 * xmlParseBalancedChunkMemoryRecover:
12872 * @doc: the document the chunk pertains to
12873 * @sax: the SAX handler bloc (possibly NULL)
12874 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12875 * @depth: Used for loop detection, use 0
12876 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12877 * @lst: the return value for the set of parsed nodes
12878 * @recover: return nodes even if the data is broken (use 0)
12879 *
12880 *
12881 * Parse a well-balanced chunk of an XML document
12882 * called by the parser
12883 * The allowed sequence for the Well Balanced Chunk is the one defined by
12884 * the content production in the XML grammar:
12885 *
12886 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12887 *
12888 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12889 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012890 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012891 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012892 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12893 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012894 */
12895int
12896xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012897 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012898 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012899 xmlParserCtxtPtr ctxt;
12900 xmlDocPtr newDoc;
12901 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012902 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012903 int size;
12904 int ret = 0;
12905
Daniel Veillard0161e632008-08-28 15:36:32 +000012906 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000012907 return(XML_ERR_ENTITY_LOOP);
12908 }
12909
12910
Daniel Veillardcda96922001-08-21 10:56:31 +000012911 if (lst != NULL)
12912 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012913 if (string == NULL)
12914 return(-1);
12915
12916 size = xmlStrlen(string);
12917
12918 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12919 if (ctxt == NULL) return(-1);
12920 ctxt->userData = ctxt;
12921 if (sax != NULL) {
12922 oldsax = ctxt->sax;
12923 ctxt->sax = sax;
12924 if (user_data != NULL)
12925 ctxt->userData = user_data;
12926 }
12927 newDoc = xmlNewDoc(BAD_CAST "1.0");
12928 if (newDoc == NULL) {
12929 xmlFreeParserCtxt(ctxt);
12930 return(-1);
12931 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012932 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012933 if ((doc != NULL) && (doc->dict != NULL)) {
12934 xmlDictFree(ctxt->dict);
12935 ctxt->dict = doc->dict;
12936 xmlDictReference(ctxt->dict);
12937 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12938 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12939 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12940 ctxt->dictNames = 1;
12941 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012942 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012943 }
Owen Taylor3473f882001-02-23 17:55:21 +000012944 if (doc != NULL) {
12945 newDoc->intSubset = doc->intSubset;
12946 newDoc->extSubset = doc->extSubset;
12947 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012948 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12949 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012950 if (sax != NULL)
12951 ctxt->sax = oldsax;
12952 xmlFreeParserCtxt(ctxt);
12953 newDoc->intSubset = NULL;
12954 newDoc->extSubset = NULL;
12955 xmlFreeDoc(newDoc);
12956 return(-1);
12957 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012958 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12959 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012960 if (doc == NULL) {
12961 ctxt->myDoc = newDoc;
12962 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012963 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012964 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012965 /* Ensure that doc has XML spec namespace */
12966 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12967 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012968 }
12969 ctxt->instate = XML_PARSER_CONTENT;
12970 ctxt->depth = depth;
12971
12972 /*
12973 * Doing validity checking on chunk doesn't make sense
12974 */
12975 ctxt->validate = 0;
12976 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012977 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012978
Daniel Veillardb39bc392002-10-26 19:29:51 +000012979 if ( doc != NULL ){
12980 content = doc->children;
12981 doc->children = NULL;
12982 xmlParseContent(ctxt);
12983 doc->children = content;
12984 }
12985 else {
12986 xmlParseContent(ctxt);
12987 }
Owen Taylor3473f882001-02-23 17:55:21 +000012988 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012989 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012990 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012991 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012992 }
12993 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012994 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012995 }
12996
12997 if (!ctxt->wellFormed) {
12998 if (ctxt->errNo == 0)
12999 ret = 1;
13000 else
13001 ret = ctxt->errNo;
13002 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013003 ret = 0;
13004 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013005
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013006 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13007 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013008
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013009 /*
13010 * Return the newly created nodeset after unlinking it from
13011 * they pseudo parent.
13012 */
13013 cur = newDoc->children->children;
13014 *lst = cur;
13015 while (cur != NULL) {
13016 xmlSetTreeDoc(cur, doc);
13017 cur->parent = NULL;
13018 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013019 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013020 newDoc->children->children = NULL;
13021 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013022
13023 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013024 ctxt->sax = oldsax;
13025 xmlFreeParserCtxt(ctxt);
13026 newDoc->intSubset = NULL;
13027 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013028 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013029 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013030
Owen Taylor3473f882001-02-23 17:55:21 +000013031 return(ret);
13032}
13033
13034/**
13035 * xmlSAXParseEntity:
13036 * @sax: the SAX handler block
13037 * @filename: the filename
13038 *
13039 * parse an XML external entity out of context and build a tree.
13040 * It use the given SAX function block to handle the parsing callback.
13041 * If sax is NULL, fallback to the default DOM tree building routines.
13042 *
13043 * [78] extParsedEnt ::= TextDecl? content
13044 *
13045 * This correspond to a "Well Balanced" chunk
13046 *
13047 * Returns the resulting document tree
13048 */
13049
13050xmlDocPtr
13051xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13052 xmlDocPtr ret;
13053 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013054
13055 ctxt = xmlCreateFileParserCtxt(filename);
13056 if (ctxt == NULL) {
13057 return(NULL);
13058 }
13059 if (sax != NULL) {
13060 if (ctxt->sax != NULL)
13061 xmlFree(ctxt->sax);
13062 ctxt->sax = sax;
13063 ctxt->userData = NULL;
13064 }
13065
Owen Taylor3473f882001-02-23 17:55:21 +000013066 xmlParseExtParsedEnt(ctxt);
13067
13068 if (ctxt->wellFormed)
13069 ret = ctxt->myDoc;
13070 else {
13071 ret = NULL;
13072 xmlFreeDoc(ctxt->myDoc);
13073 ctxt->myDoc = NULL;
13074 }
13075 if (sax != NULL)
13076 ctxt->sax = NULL;
13077 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013078
Owen Taylor3473f882001-02-23 17:55:21 +000013079 return(ret);
13080}
13081
13082/**
13083 * xmlParseEntity:
13084 * @filename: the filename
13085 *
13086 * parse an XML external entity out of context and build a tree.
13087 *
13088 * [78] extParsedEnt ::= TextDecl? content
13089 *
13090 * This correspond to a "Well Balanced" chunk
13091 *
13092 * Returns the resulting document tree
13093 */
13094
13095xmlDocPtr
13096xmlParseEntity(const char *filename) {
13097 return(xmlSAXParseEntity(NULL, filename));
13098}
Daniel Veillard81273902003-09-30 00:43:48 +000013099#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013100
13101/**
13102 * xmlCreateEntityParserCtxt:
13103 * @URL: the entity URL
13104 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013105 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000013106 *
13107 * Create a parser context for an external entity
13108 * Automatic support for ZLIB/Compress compressed document is provided
13109 * by default if found at compile-time.
13110 *
13111 * Returns the new parser context or NULL
13112 */
13113xmlParserCtxtPtr
13114xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13115 const xmlChar *base) {
13116 xmlParserCtxtPtr ctxt;
13117 xmlParserInputPtr inputStream;
13118 char *directory = NULL;
13119 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013120
Owen Taylor3473f882001-02-23 17:55:21 +000013121 ctxt = xmlNewParserCtxt();
13122 if (ctxt == NULL) {
13123 return(NULL);
13124 }
13125
13126 uri = xmlBuildURI(URL, base);
13127
13128 if (uri == NULL) {
13129 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13130 if (inputStream == NULL) {
13131 xmlFreeParserCtxt(ctxt);
13132 return(NULL);
13133 }
13134
13135 inputPush(ctxt, inputStream);
13136
13137 if ((ctxt->directory == NULL) && (directory == NULL))
13138 directory = xmlParserGetDirectory((char *)URL);
13139 if ((ctxt->directory == NULL) && (directory != NULL))
13140 ctxt->directory = directory;
13141 } else {
13142 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13143 if (inputStream == NULL) {
13144 xmlFree(uri);
13145 xmlFreeParserCtxt(ctxt);
13146 return(NULL);
13147 }
13148
13149 inputPush(ctxt, inputStream);
13150
13151 if ((ctxt->directory == NULL) && (directory == NULL))
13152 directory = xmlParserGetDirectory((char *)uri);
13153 if ((ctxt->directory == NULL) && (directory != NULL))
13154 ctxt->directory = directory;
13155 xmlFree(uri);
13156 }
Owen Taylor3473f882001-02-23 17:55:21 +000013157 return(ctxt);
13158}
13159
13160/************************************************************************
13161 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013162 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013163 * *
13164 ************************************************************************/
13165
13166/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013167 * xmlCreateURLParserCtxt:
13168 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013169 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013170 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013171 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013172 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013173 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013174 *
13175 * Returns the new parser context or NULL
13176 */
13177xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013178xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013179{
13180 xmlParserCtxtPtr ctxt;
13181 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013182 char *directory = NULL;
13183
Owen Taylor3473f882001-02-23 17:55:21 +000013184 ctxt = xmlNewParserCtxt();
13185 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013186 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013187 return(NULL);
13188 }
13189
Daniel Veillarddf292f72005-01-16 19:00:15 +000013190 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013191 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013192 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013193
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013194 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013195 if (inputStream == NULL) {
13196 xmlFreeParserCtxt(ctxt);
13197 return(NULL);
13198 }
13199
Owen Taylor3473f882001-02-23 17:55:21 +000013200 inputPush(ctxt, inputStream);
13201 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013202 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013203 if ((ctxt->directory == NULL) && (directory != NULL))
13204 ctxt->directory = directory;
13205
13206 return(ctxt);
13207}
13208
Daniel Veillard61b93382003-11-03 14:28:31 +000013209/**
13210 * xmlCreateFileParserCtxt:
13211 * @filename: the filename
13212 *
13213 * Create a parser context for a file content.
13214 * Automatic support for ZLIB/Compress compressed document is provided
13215 * by default if found at compile-time.
13216 *
13217 * Returns the new parser context or NULL
13218 */
13219xmlParserCtxtPtr
13220xmlCreateFileParserCtxt(const char *filename)
13221{
13222 return(xmlCreateURLParserCtxt(filename, 0));
13223}
13224
Daniel Veillard81273902003-09-30 00:43:48 +000013225#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013226/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013227 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013228 * @sax: the SAX handler block
13229 * @filename: the filename
13230 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13231 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013232 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013233 *
13234 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13235 * compressed document is provided by default if found at compile-time.
13236 * It use the given SAX function block to handle the parsing callback.
13237 * If sax is NULL, fallback to the default DOM tree building routines.
13238 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013239 * User data (void *) is stored within the parser context in the
13240 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013241 *
Owen Taylor3473f882001-02-23 17:55:21 +000013242 * Returns the resulting document tree
13243 */
13244
13245xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013246xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13247 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013248 xmlDocPtr ret;
13249 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013250
Daniel Veillard635ef722001-10-29 11:48:19 +000013251 xmlInitParser();
13252
Owen Taylor3473f882001-02-23 17:55:21 +000013253 ctxt = xmlCreateFileParserCtxt(filename);
13254 if (ctxt == NULL) {
13255 return(NULL);
13256 }
13257 if (sax != NULL) {
13258 if (ctxt->sax != NULL)
13259 xmlFree(ctxt->sax);
13260 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013261 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013262 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013263 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013264 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013265 }
Owen Taylor3473f882001-02-23 17:55:21 +000013266
Daniel Veillard37d2d162008-03-14 10:54:00 +000013267 if (ctxt->directory == NULL)
13268 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013269
Daniel Veillarddad3f682002-11-17 16:47:27 +000013270 ctxt->recovery = recovery;
13271
Owen Taylor3473f882001-02-23 17:55:21 +000013272 xmlParseDocument(ctxt);
13273
William M. Brackc07329e2003-09-08 01:57:30 +000013274 if ((ctxt->wellFormed) || recovery) {
13275 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013276 if (ret != NULL) {
13277 if (ctxt->input->buf->compressed > 0)
13278 ret->compression = 9;
13279 else
13280 ret->compression = ctxt->input->buf->compressed;
13281 }
William M. Brackc07329e2003-09-08 01:57:30 +000013282 }
Owen Taylor3473f882001-02-23 17:55:21 +000013283 else {
13284 ret = NULL;
13285 xmlFreeDoc(ctxt->myDoc);
13286 ctxt->myDoc = NULL;
13287 }
13288 if (sax != NULL)
13289 ctxt->sax = NULL;
13290 xmlFreeParserCtxt(ctxt);
13291
13292 return(ret);
13293}
13294
13295/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013296 * xmlSAXParseFile:
13297 * @sax: the SAX handler block
13298 * @filename: the filename
13299 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13300 * documents
13301 *
13302 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13303 * compressed document is provided by default if found at compile-time.
13304 * It use the given SAX function block to handle the parsing callback.
13305 * If sax is NULL, fallback to the default DOM tree building routines.
13306 *
13307 * Returns the resulting document tree
13308 */
13309
13310xmlDocPtr
13311xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13312 int recovery) {
13313 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13314}
13315
13316/**
Owen Taylor3473f882001-02-23 17:55:21 +000013317 * xmlRecoverDoc:
13318 * @cur: a pointer to an array of xmlChar
13319 *
13320 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013321 * In the case the document is not Well Formed, a attempt to build a
13322 * tree is tried anyway
13323 *
13324 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013325 */
13326
13327xmlDocPtr
13328xmlRecoverDoc(xmlChar *cur) {
13329 return(xmlSAXParseDoc(NULL, cur, 1));
13330}
13331
13332/**
13333 * xmlParseFile:
13334 * @filename: the filename
13335 *
13336 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13337 * compressed document is provided by default if found at compile-time.
13338 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013339 * Returns the resulting document tree if the file was wellformed,
13340 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013341 */
13342
13343xmlDocPtr
13344xmlParseFile(const char *filename) {
13345 return(xmlSAXParseFile(NULL, filename, 0));
13346}
13347
13348/**
13349 * xmlRecoverFile:
13350 * @filename: the filename
13351 *
13352 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13353 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013354 * In the case the document is not Well Formed, it attempts to build
13355 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013356 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013357 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013358 */
13359
13360xmlDocPtr
13361xmlRecoverFile(const char *filename) {
13362 return(xmlSAXParseFile(NULL, filename, 1));
13363}
13364
13365
13366/**
13367 * xmlSetupParserForBuffer:
13368 * @ctxt: an XML parser context
13369 * @buffer: a xmlChar * buffer
13370 * @filename: a file name
13371 *
13372 * Setup the parser context to parse a new buffer; Clears any prior
13373 * contents from the parser context. The buffer parameter must not be
13374 * NULL, but the filename parameter can be
13375 */
13376void
13377xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13378 const char* filename)
13379{
13380 xmlParserInputPtr input;
13381
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013382 if ((ctxt == NULL) || (buffer == NULL))
13383 return;
13384
Owen Taylor3473f882001-02-23 17:55:21 +000013385 input = xmlNewInputStream(ctxt);
13386 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013387 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013388 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013389 return;
13390 }
13391
13392 xmlClearParserCtxt(ctxt);
13393 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013394 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013395 input->base = buffer;
13396 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013397 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013398 inputPush(ctxt, input);
13399}
13400
13401/**
13402 * xmlSAXUserParseFile:
13403 * @sax: a SAX handler
13404 * @user_data: The user data returned on SAX callbacks
13405 * @filename: a file name
13406 *
13407 * parse an XML file and call the given SAX handler routines.
13408 * Automatic support for ZLIB/Compress compressed document is provided
13409 *
13410 * Returns 0 in case of success or a error number otherwise
13411 */
13412int
13413xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13414 const char *filename) {
13415 int ret = 0;
13416 xmlParserCtxtPtr ctxt;
13417
13418 ctxt = xmlCreateFileParserCtxt(filename);
13419 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013420 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013421 xmlFree(ctxt->sax);
13422 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013423 xmlDetectSAX2(ctxt);
13424
Owen Taylor3473f882001-02-23 17:55:21 +000013425 if (user_data != NULL)
13426 ctxt->userData = user_data;
13427
13428 xmlParseDocument(ctxt);
13429
13430 if (ctxt->wellFormed)
13431 ret = 0;
13432 else {
13433 if (ctxt->errNo != 0)
13434 ret = ctxt->errNo;
13435 else
13436 ret = -1;
13437 }
13438 if (sax != NULL)
13439 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013440 if (ctxt->myDoc != NULL) {
13441 xmlFreeDoc(ctxt->myDoc);
13442 ctxt->myDoc = NULL;
13443 }
Owen Taylor3473f882001-02-23 17:55:21 +000013444 xmlFreeParserCtxt(ctxt);
13445
13446 return ret;
13447}
Daniel Veillard81273902003-09-30 00:43:48 +000013448#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013449
13450/************************************************************************
13451 * *
13452 * Front ends when parsing from memory *
13453 * *
13454 ************************************************************************/
13455
13456/**
13457 * xmlCreateMemoryParserCtxt:
13458 * @buffer: a pointer to a char array
13459 * @size: the size of the array
13460 *
13461 * Create a parser context for an XML in-memory document.
13462 *
13463 * Returns the new parser context or NULL
13464 */
13465xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013466xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013467 xmlParserCtxtPtr ctxt;
13468 xmlParserInputPtr input;
13469 xmlParserInputBufferPtr buf;
13470
13471 if (buffer == NULL)
13472 return(NULL);
13473 if (size <= 0)
13474 return(NULL);
13475
13476 ctxt = xmlNewParserCtxt();
13477 if (ctxt == NULL)
13478 return(NULL);
13479
Daniel Veillard53350552003-09-18 13:35:51 +000013480 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013481 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013482 if (buf == NULL) {
13483 xmlFreeParserCtxt(ctxt);
13484 return(NULL);
13485 }
Owen Taylor3473f882001-02-23 17:55:21 +000013486
13487 input = xmlNewInputStream(ctxt);
13488 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013489 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013490 xmlFreeParserCtxt(ctxt);
13491 return(NULL);
13492 }
13493
13494 input->filename = NULL;
13495 input->buf = buf;
13496 input->base = input->buf->buffer->content;
13497 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013498 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013499
13500 inputPush(ctxt, input);
13501 return(ctxt);
13502}
13503
Daniel Veillard81273902003-09-30 00:43:48 +000013504#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013505/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013506 * xmlSAXParseMemoryWithData:
13507 * @sax: the SAX handler block
13508 * @buffer: an pointer to a char array
13509 * @size: the size of the array
13510 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13511 * documents
13512 * @data: the userdata
13513 *
13514 * parse an XML in-memory block and use the given SAX function block
13515 * to handle the parsing callback. If sax is NULL, fallback to the default
13516 * DOM tree building routines.
13517 *
13518 * User data (void *) is stored within the parser context in the
13519 * context's _private member, so it is available nearly everywhere in libxml
13520 *
13521 * Returns the resulting document tree
13522 */
13523
13524xmlDocPtr
13525xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13526 int size, int recovery, void *data) {
13527 xmlDocPtr ret;
13528 xmlParserCtxtPtr ctxt;
13529
13530 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13531 if (ctxt == NULL) return(NULL);
13532 if (sax != NULL) {
13533 if (ctxt->sax != NULL)
13534 xmlFree(ctxt->sax);
13535 ctxt->sax = sax;
13536 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013537 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013538 if (data!=NULL) {
13539 ctxt->_private=data;
13540 }
13541
Daniel Veillardadba5f12003-04-04 16:09:01 +000013542 ctxt->recovery = recovery;
13543
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013544 xmlParseDocument(ctxt);
13545
13546 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13547 else {
13548 ret = NULL;
13549 xmlFreeDoc(ctxt->myDoc);
13550 ctxt->myDoc = NULL;
13551 }
13552 if (sax != NULL)
13553 ctxt->sax = NULL;
13554 xmlFreeParserCtxt(ctxt);
13555
13556 return(ret);
13557}
13558
13559/**
Owen Taylor3473f882001-02-23 17:55:21 +000013560 * xmlSAXParseMemory:
13561 * @sax: the SAX handler block
13562 * @buffer: an pointer to a char array
13563 * @size: the size of the array
13564 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13565 * documents
13566 *
13567 * parse an XML in-memory block and use the given SAX function block
13568 * to handle the parsing callback. If sax is NULL, fallback to the default
13569 * DOM tree building routines.
13570 *
13571 * Returns the resulting document tree
13572 */
13573xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013574xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13575 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013576 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013577}
13578
13579/**
13580 * xmlParseMemory:
13581 * @buffer: an pointer to a char array
13582 * @size: the size of the array
13583 *
13584 * parse an XML in-memory block and build a tree.
13585 *
13586 * Returns the resulting document tree
13587 */
13588
Daniel Veillard50822cb2001-07-26 20:05:51 +000013589xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013590 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13591}
13592
13593/**
13594 * xmlRecoverMemory:
13595 * @buffer: an pointer to a char array
13596 * @size: the size of the array
13597 *
13598 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013599 * In the case the document is not Well Formed, an attempt to
13600 * build a tree is tried anyway
13601 *
13602 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013603 */
13604
Daniel Veillard50822cb2001-07-26 20:05:51 +000013605xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013606 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13607}
13608
13609/**
13610 * xmlSAXUserParseMemory:
13611 * @sax: a SAX handler
13612 * @user_data: The user data returned on SAX callbacks
13613 * @buffer: an in-memory XML document input
13614 * @size: the length of the XML document in bytes
13615 *
13616 * A better SAX parsing routine.
13617 * parse an XML in-memory buffer and call the given SAX handler routines.
13618 *
13619 * Returns 0 in case of success or a error number otherwise
13620 */
13621int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013622 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013623 int ret = 0;
13624 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013625
13626 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13627 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013628 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13629 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013630 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013631 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013632
Daniel Veillard30211a02001-04-26 09:33:18 +000013633 if (user_data != NULL)
13634 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013635
13636 xmlParseDocument(ctxt);
13637
13638 if (ctxt->wellFormed)
13639 ret = 0;
13640 else {
13641 if (ctxt->errNo != 0)
13642 ret = ctxt->errNo;
13643 else
13644 ret = -1;
13645 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013646 if (sax != NULL)
13647 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013648 if (ctxt->myDoc != NULL) {
13649 xmlFreeDoc(ctxt->myDoc);
13650 ctxt->myDoc = NULL;
13651 }
Owen Taylor3473f882001-02-23 17:55:21 +000013652 xmlFreeParserCtxt(ctxt);
13653
13654 return ret;
13655}
Daniel Veillard81273902003-09-30 00:43:48 +000013656#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013657
13658/**
13659 * xmlCreateDocParserCtxt:
13660 * @cur: a pointer to an array of xmlChar
13661 *
13662 * Creates a parser context for an XML in-memory document.
13663 *
13664 * Returns the new parser context or NULL
13665 */
13666xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013667xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013668 int len;
13669
13670 if (cur == NULL)
13671 return(NULL);
13672 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013673 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013674}
13675
Daniel Veillard81273902003-09-30 00:43:48 +000013676#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013677/**
13678 * xmlSAXParseDoc:
13679 * @sax: the SAX handler block
13680 * @cur: a pointer to an array of xmlChar
13681 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13682 * documents
13683 *
13684 * parse an XML in-memory document and build a tree.
13685 * It use the given SAX function block to handle the parsing callback.
13686 * If sax is NULL, fallback to the default DOM tree building routines.
13687 *
13688 * Returns the resulting document tree
13689 */
13690
13691xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013692xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013693 xmlDocPtr ret;
13694 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013695 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013696
Daniel Veillard38936062004-11-04 17:45:11 +000013697 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013698
13699
13700 ctxt = xmlCreateDocParserCtxt(cur);
13701 if (ctxt == NULL) return(NULL);
13702 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013703 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013704 ctxt->sax = sax;
13705 ctxt->userData = NULL;
13706 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013707 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013708
13709 xmlParseDocument(ctxt);
13710 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13711 else {
13712 ret = NULL;
13713 xmlFreeDoc(ctxt->myDoc);
13714 ctxt->myDoc = NULL;
13715 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013716 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013717 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013718 xmlFreeParserCtxt(ctxt);
13719
13720 return(ret);
13721}
13722
13723/**
13724 * xmlParseDoc:
13725 * @cur: a pointer to an array of xmlChar
13726 *
13727 * parse an XML in-memory document and build a tree.
13728 *
13729 * Returns the resulting document tree
13730 */
13731
13732xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013733xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013734 return(xmlSAXParseDoc(NULL, cur, 0));
13735}
Daniel Veillard81273902003-09-30 00:43:48 +000013736#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013737
Daniel Veillard81273902003-09-30 00:43:48 +000013738#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013739/************************************************************************
13740 * *
13741 * Specific function to keep track of entities references *
13742 * and used by the XSLT debugger *
13743 * *
13744 ************************************************************************/
13745
13746static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13747
13748/**
13749 * xmlAddEntityReference:
13750 * @ent : A valid entity
13751 * @firstNode : A valid first node for children of entity
13752 * @lastNode : A valid last node of children entity
13753 *
13754 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13755 */
13756static void
13757xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13758 xmlNodePtr lastNode)
13759{
13760 if (xmlEntityRefFunc != NULL) {
13761 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13762 }
13763}
13764
13765
13766/**
13767 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013768 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013769 *
13770 * Set the function to call call back when a xml reference has been made
13771 */
13772void
13773xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13774{
13775 xmlEntityRefFunc = func;
13776}
Daniel Veillard81273902003-09-30 00:43:48 +000013777#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013778
13779/************************************************************************
13780 * *
13781 * Miscellaneous *
13782 * *
13783 ************************************************************************/
13784
13785#ifdef LIBXML_XPATH_ENABLED
13786#include <libxml/xpath.h>
13787#endif
13788
Daniel Veillardffa3c742005-07-21 13:24:09 +000013789extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013790static int xmlParserInitialized = 0;
13791
13792/**
13793 * xmlInitParser:
13794 *
13795 * Initialization function for the XML parser.
13796 * This is not reentrant. Call once before processing in case of
13797 * use in multithreaded programs.
13798 */
13799
13800void
13801xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013802 if (xmlParserInitialized != 0)
13803 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013804
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013805#ifdef LIBXML_THREAD_ENABLED
13806 __xmlGlobalInitMutexLock();
13807 if (xmlParserInitialized == 0) {
13808#endif
13809 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13810 (xmlGenericError == NULL))
13811 initGenericErrorDefaultFunc(NULL);
13812 xmlInitGlobals();
13813 xmlInitThreads();
13814 xmlInitMemory();
13815 xmlInitCharEncodingHandlers();
13816 xmlDefaultSAXHandlerInit();
13817 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013818#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013819 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013820#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013821#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013822 htmlInitAutoClose();
13823 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013824#endif
13825#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013826 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013827#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013828 xmlParserInitialized = 1;
13829#ifdef LIBXML_THREAD_ENABLED
13830 }
13831 __xmlGlobalInitMutexUnlock();
13832#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013833}
13834
13835/**
13836 * xmlCleanupParser:
13837 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013838 * This function name is somewhat misleading. It does not clean up
13839 * parser state, it cleans up memory allocated by the library itself.
13840 * It is a cleanup function for the XML library. It tries to reclaim all
13841 * related global memory allocated for the library processing.
13842 * It doesn't deallocate any document related memory. One should
13843 * call xmlCleanupParser() only when the process has finished using
13844 * the library and all XML/HTML documents built with it.
13845 * See also xmlInitParser() which has the opposite function of preparing
13846 * the library for operations.
Owen Taylor3473f882001-02-23 17:55:21 +000013847 */
13848
13849void
13850xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013851 if (!xmlParserInitialized)
13852 return;
13853
Owen Taylor3473f882001-02-23 17:55:21 +000013854 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013855#ifdef LIBXML_CATALOG_ENABLED
13856 xmlCatalogCleanup();
13857#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013858 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013859 xmlCleanupInputCallbacks();
13860#ifdef LIBXML_OUTPUT_ENABLED
13861 xmlCleanupOutputCallbacks();
13862#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013863#ifdef LIBXML_SCHEMAS_ENABLED
13864 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013865 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013866#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013867 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013868 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013869 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013870 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013871 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013872}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013873
13874/************************************************************************
13875 * *
13876 * New set (2.6.0) of simpler and more flexible APIs *
13877 * *
13878 ************************************************************************/
13879
13880/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013881 * DICT_FREE:
13882 * @str: a string
13883 *
13884 * Free a string if it is not owned by the "dict" dictionnary in the
13885 * current scope
13886 */
13887#define DICT_FREE(str) \
13888 if ((str) && ((!dict) || \
13889 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13890 xmlFree((char *)(str));
13891
13892/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013893 * xmlCtxtReset:
13894 * @ctxt: an XML parser context
13895 *
13896 * Reset a parser context
13897 */
13898void
13899xmlCtxtReset(xmlParserCtxtPtr ctxt)
13900{
13901 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013902 xmlDictPtr dict;
13903
13904 if (ctxt == NULL)
13905 return;
13906
13907 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013908
13909 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13910 xmlFreeInputStream(input);
13911 }
13912 ctxt->inputNr = 0;
13913 ctxt->input = NULL;
13914
13915 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013916 if (ctxt->spaceTab != NULL) {
13917 ctxt->spaceTab[0] = -1;
13918 ctxt->space = &ctxt->spaceTab[0];
13919 } else {
13920 ctxt->space = NULL;
13921 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013922
13923
13924 ctxt->nodeNr = 0;
13925 ctxt->node = NULL;
13926
13927 ctxt->nameNr = 0;
13928 ctxt->name = NULL;
13929
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013930 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013931 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013932 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013933 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013934 DICT_FREE(ctxt->directory);
13935 ctxt->directory = NULL;
13936 DICT_FREE(ctxt->extSubURI);
13937 ctxt->extSubURI = NULL;
13938 DICT_FREE(ctxt->extSubSystem);
13939 ctxt->extSubSystem = NULL;
13940 if (ctxt->myDoc != NULL)
13941 xmlFreeDoc(ctxt->myDoc);
13942 ctxt->myDoc = NULL;
13943
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013944 ctxt->standalone = -1;
13945 ctxt->hasExternalSubset = 0;
13946 ctxt->hasPErefs = 0;
13947 ctxt->html = 0;
13948 ctxt->external = 0;
13949 ctxt->instate = XML_PARSER_START;
13950 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013951
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013952 ctxt->wellFormed = 1;
13953 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013954 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013955 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013956#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013957 ctxt->vctxt.userData = ctxt;
13958 ctxt->vctxt.error = xmlParserValidityError;
13959 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013960#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013961 ctxt->record_info = 0;
13962 ctxt->nbChars = 0;
13963 ctxt->checkIndex = 0;
13964 ctxt->inSubset = 0;
13965 ctxt->errNo = XML_ERR_OK;
13966 ctxt->depth = 0;
13967 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13968 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000013969 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000013970 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013971 xmlInitNodeInfoSeq(&ctxt->node_seq);
13972
13973 if (ctxt->attsDefault != NULL) {
13974 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13975 ctxt->attsDefault = NULL;
13976 }
13977 if (ctxt->attsSpecial != NULL) {
13978 xmlHashFree(ctxt->attsSpecial, NULL);
13979 ctxt->attsSpecial = NULL;
13980 }
13981
Daniel Veillard4432df22003-09-28 18:58:27 +000013982#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013983 if (ctxt->catalogs != NULL)
13984 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013985#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013986 if (ctxt->lastError.code != XML_ERR_OK)
13987 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013988}
13989
13990/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013991 * xmlCtxtResetPush:
13992 * @ctxt: an XML parser context
13993 * @chunk: a pointer to an array of chars
13994 * @size: number of chars in the array
13995 * @filename: an optional file name or URI
13996 * @encoding: the document encoding, or NULL
13997 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013998 * Reset a push parser context
13999 *
14000 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014001 */
14002int
14003xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14004 int size, const char *filename, const char *encoding)
14005{
14006 xmlParserInputPtr inputStream;
14007 xmlParserInputBufferPtr buf;
14008 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14009
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014010 if (ctxt == NULL)
14011 return(1);
14012
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014013 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14014 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14015
14016 buf = xmlAllocParserInputBuffer(enc);
14017 if (buf == NULL)
14018 return(1);
14019
14020 if (ctxt == NULL) {
14021 xmlFreeParserInputBuffer(buf);
14022 return(1);
14023 }
14024
14025 xmlCtxtReset(ctxt);
14026
14027 if (ctxt->pushTab == NULL) {
14028 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14029 sizeof(xmlChar *));
14030 if (ctxt->pushTab == NULL) {
14031 xmlErrMemory(ctxt, NULL);
14032 xmlFreeParserInputBuffer(buf);
14033 return(1);
14034 }
14035 }
14036
14037 if (filename == NULL) {
14038 ctxt->directory = NULL;
14039 } else {
14040 ctxt->directory = xmlParserGetDirectory(filename);
14041 }
14042
14043 inputStream = xmlNewInputStream(ctxt);
14044 if (inputStream == NULL) {
14045 xmlFreeParserInputBuffer(buf);
14046 return(1);
14047 }
14048
14049 if (filename == NULL)
14050 inputStream->filename = NULL;
14051 else
14052 inputStream->filename = (char *)
14053 xmlCanonicPath((const xmlChar *) filename);
14054 inputStream->buf = buf;
14055 inputStream->base = inputStream->buf->buffer->content;
14056 inputStream->cur = inputStream->buf->buffer->content;
14057 inputStream->end =
14058 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14059
14060 inputPush(ctxt, inputStream);
14061
14062 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14063 (ctxt->input->buf != NULL)) {
14064 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14065 int cur = ctxt->input->cur - ctxt->input->base;
14066
14067 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14068
14069 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14070 ctxt->input->cur = ctxt->input->base + cur;
14071 ctxt->input->end =
14072 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14073 use];
14074#ifdef DEBUG_PUSH
14075 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14076#endif
14077 }
14078
14079 if (encoding != NULL) {
14080 xmlCharEncodingHandlerPtr hdlr;
14081
Daniel Veillard37334572008-07-31 08:20:02 +000014082 if (ctxt->encoding != NULL)
14083 xmlFree((xmlChar *) ctxt->encoding);
14084 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14085
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014086 hdlr = xmlFindCharEncodingHandler(encoding);
14087 if (hdlr != NULL) {
14088 xmlSwitchToEncoding(ctxt, hdlr);
14089 } else {
14090 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14091 "Unsupported encoding %s\n", BAD_CAST encoding);
14092 }
14093 } else if (enc != XML_CHAR_ENCODING_NONE) {
14094 xmlSwitchEncoding(ctxt, enc);
14095 }
14096
14097 return(0);
14098}
14099
Daniel Veillard37334572008-07-31 08:20:02 +000014100
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014101/**
Daniel Veillard37334572008-07-31 08:20:02 +000014102 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014103 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014104 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014105 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014106 *
14107 * Applies the options to the parser context
14108 *
14109 * Returns 0 in case of success, the set of unknown or unimplemented options
14110 * in case of error.
14111 */
Daniel Veillard37334572008-07-31 08:20:02 +000014112static int
14113xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014114{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014115 if (ctxt == NULL)
14116 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014117 if (encoding != NULL) {
14118 if (ctxt->encoding != NULL)
14119 xmlFree((xmlChar *) ctxt->encoding);
14120 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14121 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014122 if (options & XML_PARSE_RECOVER) {
14123 ctxt->recovery = 1;
14124 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014125 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014126 } else
14127 ctxt->recovery = 0;
14128 if (options & XML_PARSE_DTDLOAD) {
14129 ctxt->loadsubset = XML_DETECT_IDS;
14130 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014131 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014132 } else
14133 ctxt->loadsubset = 0;
14134 if (options & XML_PARSE_DTDATTR) {
14135 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14136 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014137 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014138 }
14139 if (options & XML_PARSE_NOENT) {
14140 ctxt->replaceEntities = 1;
14141 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14142 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014143 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014144 } else
14145 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014146 if (options & XML_PARSE_PEDANTIC) {
14147 ctxt->pedantic = 1;
14148 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014149 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014150 } else
14151 ctxt->pedantic = 0;
14152 if (options & XML_PARSE_NOBLANKS) {
14153 ctxt->keepBlanks = 0;
14154 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14155 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014156 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014157 } else
14158 ctxt->keepBlanks = 1;
14159 if (options & XML_PARSE_DTDVALID) {
14160 ctxt->validate = 1;
14161 if (options & XML_PARSE_NOWARNING)
14162 ctxt->vctxt.warning = NULL;
14163 if (options & XML_PARSE_NOERROR)
14164 ctxt->vctxt.error = NULL;
14165 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014166 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014167 } else
14168 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014169 if (options & XML_PARSE_NOWARNING) {
14170 ctxt->sax->warning = NULL;
14171 options -= XML_PARSE_NOWARNING;
14172 }
14173 if (options & XML_PARSE_NOERROR) {
14174 ctxt->sax->error = NULL;
14175 ctxt->sax->fatalError = NULL;
14176 options -= XML_PARSE_NOERROR;
14177 }
Daniel Veillard81273902003-09-30 00:43:48 +000014178#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014179 if (options & XML_PARSE_SAX1) {
14180 ctxt->sax->startElement = xmlSAX2StartElement;
14181 ctxt->sax->endElement = xmlSAX2EndElement;
14182 ctxt->sax->startElementNs = NULL;
14183 ctxt->sax->endElementNs = NULL;
14184 ctxt->sax->initialized = 1;
14185 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014186 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014187 }
Daniel Veillard81273902003-09-30 00:43:48 +000014188#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014189 if (options & XML_PARSE_NODICT) {
14190 ctxt->dictNames = 0;
14191 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014192 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014193 } else {
14194 ctxt->dictNames = 1;
14195 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014196 if (options & XML_PARSE_NOCDATA) {
14197 ctxt->sax->cdataBlock = NULL;
14198 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014199 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014200 }
14201 if (options & XML_PARSE_NSCLEAN) {
14202 ctxt->options |= XML_PARSE_NSCLEAN;
14203 options -= XML_PARSE_NSCLEAN;
14204 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014205 if (options & XML_PARSE_NONET) {
14206 ctxt->options |= XML_PARSE_NONET;
14207 options -= XML_PARSE_NONET;
14208 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014209 if (options & XML_PARSE_COMPACT) {
14210 ctxt->options |= XML_PARSE_COMPACT;
14211 options -= XML_PARSE_COMPACT;
14212 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014213 if (options & XML_PARSE_OLD10) {
14214 ctxt->options |= XML_PARSE_OLD10;
14215 options -= XML_PARSE_OLD10;
14216 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014217 if (options & XML_PARSE_NOBASEFIX) {
14218 ctxt->options |= XML_PARSE_NOBASEFIX;
14219 options -= XML_PARSE_NOBASEFIX;
14220 }
14221 if (options & XML_PARSE_HUGE) {
14222 ctxt->options |= XML_PARSE_HUGE;
14223 options -= XML_PARSE_HUGE;
14224 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014225 if (options & XML_PARSE_OLDSAX) {
14226 ctxt->options |= XML_PARSE_OLDSAX;
14227 options -= XML_PARSE_OLDSAX;
14228 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014229 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014230 return (options);
14231}
14232
14233/**
Daniel Veillard37334572008-07-31 08:20:02 +000014234 * xmlCtxtUseOptions:
14235 * @ctxt: an XML parser context
14236 * @options: a combination of xmlParserOption
14237 *
14238 * Applies the options to the parser context
14239 *
14240 * Returns 0 in case of success, the set of unknown or unimplemented options
14241 * in case of error.
14242 */
14243int
14244xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14245{
14246 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14247}
14248
14249/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014250 * xmlDoRead:
14251 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014252 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014253 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014254 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014255 * @reuse: keep the context for reuse
14256 *
14257 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014258 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014259 * Returns the resulting document tree or NULL
14260 */
14261static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014262xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14263 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014264{
14265 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014266
14267 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014268 if (encoding != NULL) {
14269 xmlCharEncodingHandlerPtr hdlr;
14270
14271 hdlr = xmlFindCharEncodingHandler(encoding);
14272 if (hdlr != NULL)
14273 xmlSwitchToEncoding(ctxt, hdlr);
14274 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014275 if ((URL != NULL) && (ctxt->input != NULL) &&
14276 (ctxt->input->filename == NULL))
14277 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014278 xmlParseDocument(ctxt);
14279 if ((ctxt->wellFormed) || ctxt->recovery)
14280 ret = ctxt->myDoc;
14281 else {
14282 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014283 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014284 xmlFreeDoc(ctxt->myDoc);
14285 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014286 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014287 ctxt->myDoc = NULL;
14288 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014289 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014290 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014291
14292 return (ret);
14293}
14294
14295/**
14296 * xmlReadDoc:
14297 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014298 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014299 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014300 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014301 *
14302 * parse an XML in-memory document and build a tree.
14303 *
14304 * Returns the resulting document tree
14305 */
14306xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014307xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014308{
14309 xmlParserCtxtPtr ctxt;
14310
14311 if (cur == NULL)
14312 return (NULL);
14313
14314 ctxt = xmlCreateDocParserCtxt(cur);
14315 if (ctxt == NULL)
14316 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014317 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014318}
14319
14320/**
14321 * xmlReadFile:
14322 * @filename: a file or URL
14323 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014324 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014325 *
14326 * parse an XML file from the filesystem or the network.
14327 *
14328 * Returns the resulting document tree
14329 */
14330xmlDocPtr
14331xmlReadFile(const char *filename, const char *encoding, int options)
14332{
14333 xmlParserCtxtPtr ctxt;
14334
Daniel Veillard61b93382003-11-03 14:28:31 +000014335 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014336 if (ctxt == NULL)
14337 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014338 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014339}
14340
14341/**
14342 * xmlReadMemory:
14343 * @buffer: a pointer to a char array
14344 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014345 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014346 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014347 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014348 *
14349 * parse an XML in-memory document and build a tree.
14350 *
14351 * Returns the resulting document tree
14352 */
14353xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014354xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014355{
14356 xmlParserCtxtPtr ctxt;
14357
14358 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14359 if (ctxt == NULL)
14360 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014361 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014362}
14363
14364/**
14365 * xmlReadFd:
14366 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014367 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014368 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014369 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014370 *
14371 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014372 * NOTE that the file descriptor will not be closed when the
14373 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014374 *
14375 * Returns the resulting document tree
14376 */
14377xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014378xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014379{
14380 xmlParserCtxtPtr ctxt;
14381 xmlParserInputBufferPtr input;
14382 xmlParserInputPtr stream;
14383
14384 if (fd < 0)
14385 return (NULL);
14386
14387 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14388 if (input == NULL)
14389 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014390 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014391 ctxt = xmlNewParserCtxt();
14392 if (ctxt == NULL) {
14393 xmlFreeParserInputBuffer(input);
14394 return (NULL);
14395 }
14396 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14397 if (stream == NULL) {
14398 xmlFreeParserInputBuffer(input);
14399 xmlFreeParserCtxt(ctxt);
14400 return (NULL);
14401 }
14402 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014403 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014404}
14405
14406/**
14407 * xmlReadIO:
14408 * @ioread: an I/O read function
14409 * @ioclose: an I/O close function
14410 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014411 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014412 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014413 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014414 *
14415 * parse an XML document from I/O functions and source and build a tree.
14416 *
14417 * Returns the resulting document tree
14418 */
14419xmlDocPtr
14420xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014421 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014422{
14423 xmlParserCtxtPtr ctxt;
14424 xmlParserInputBufferPtr input;
14425 xmlParserInputPtr stream;
14426
14427 if (ioread == NULL)
14428 return (NULL);
14429
14430 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14431 XML_CHAR_ENCODING_NONE);
14432 if (input == NULL)
14433 return (NULL);
14434 ctxt = xmlNewParserCtxt();
14435 if (ctxt == NULL) {
14436 xmlFreeParserInputBuffer(input);
14437 return (NULL);
14438 }
14439 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14440 if (stream == NULL) {
14441 xmlFreeParserInputBuffer(input);
14442 xmlFreeParserCtxt(ctxt);
14443 return (NULL);
14444 }
14445 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014446 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014447}
14448
14449/**
14450 * xmlCtxtReadDoc:
14451 * @ctxt: an XML parser context
14452 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014453 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014454 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014455 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014456 *
14457 * parse an XML in-memory document and build a tree.
14458 * This reuses the existing @ctxt parser context
14459 *
14460 * Returns the resulting document tree
14461 */
14462xmlDocPtr
14463xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014464 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014465{
14466 xmlParserInputPtr stream;
14467
14468 if (cur == NULL)
14469 return (NULL);
14470 if (ctxt == NULL)
14471 return (NULL);
14472
14473 xmlCtxtReset(ctxt);
14474
14475 stream = xmlNewStringInputStream(ctxt, cur);
14476 if (stream == NULL) {
14477 return (NULL);
14478 }
14479 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014480 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014481}
14482
14483/**
14484 * xmlCtxtReadFile:
14485 * @ctxt: an XML parser context
14486 * @filename: a file or URL
14487 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014488 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014489 *
14490 * parse an XML file from the filesystem or the network.
14491 * This reuses the existing @ctxt parser context
14492 *
14493 * Returns the resulting document tree
14494 */
14495xmlDocPtr
14496xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14497 const char *encoding, int options)
14498{
14499 xmlParserInputPtr stream;
14500
14501 if (filename == NULL)
14502 return (NULL);
14503 if (ctxt == NULL)
14504 return (NULL);
14505
14506 xmlCtxtReset(ctxt);
14507
Daniel Veillard29614c72004-11-26 10:47:26 +000014508 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014509 if (stream == NULL) {
14510 return (NULL);
14511 }
14512 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014513 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014514}
14515
14516/**
14517 * xmlCtxtReadMemory:
14518 * @ctxt: an XML parser context
14519 * @buffer: a pointer to a char array
14520 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014521 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014522 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014523 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014524 *
14525 * parse an XML in-memory document and build a tree.
14526 * This reuses the existing @ctxt parser context
14527 *
14528 * Returns the resulting document tree
14529 */
14530xmlDocPtr
14531xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014532 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014533{
14534 xmlParserInputBufferPtr input;
14535 xmlParserInputPtr stream;
14536
14537 if (ctxt == NULL)
14538 return (NULL);
14539 if (buffer == NULL)
14540 return (NULL);
14541
14542 xmlCtxtReset(ctxt);
14543
14544 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14545 if (input == NULL) {
14546 return(NULL);
14547 }
14548
14549 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14550 if (stream == NULL) {
14551 xmlFreeParserInputBuffer(input);
14552 return(NULL);
14553 }
14554
14555 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014556 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014557}
14558
14559/**
14560 * xmlCtxtReadFd:
14561 * @ctxt: an XML parser context
14562 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014563 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014564 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014565 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014566 *
14567 * parse an XML from a file descriptor and build a tree.
14568 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014569 * NOTE that the file descriptor will not be closed when the
14570 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014571 *
14572 * Returns the resulting document tree
14573 */
14574xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014575xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14576 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014577{
14578 xmlParserInputBufferPtr input;
14579 xmlParserInputPtr stream;
14580
14581 if (fd < 0)
14582 return (NULL);
14583 if (ctxt == NULL)
14584 return (NULL);
14585
14586 xmlCtxtReset(ctxt);
14587
14588
14589 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14590 if (input == NULL)
14591 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014592 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014593 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14594 if (stream == NULL) {
14595 xmlFreeParserInputBuffer(input);
14596 return (NULL);
14597 }
14598 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014599 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014600}
14601
14602/**
14603 * xmlCtxtReadIO:
14604 * @ctxt: an XML parser context
14605 * @ioread: an I/O read function
14606 * @ioclose: an I/O close function
14607 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014608 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014609 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014610 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014611 *
14612 * parse an XML document from I/O functions and source and build a tree.
14613 * This reuses the existing @ctxt parser context
14614 *
14615 * Returns the resulting document tree
14616 */
14617xmlDocPtr
14618xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14619 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014620 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014621 const char *encoding, int options)
14622{
14623 xmlParserInputBufferPtr input;
14624 xmlParserInputPtr stream;
14625
14626 if (ioread == NULL)
14627 return (NULL);
14628 if (ctxt == NULL)
14629 return (NULL);
14630
14631 xmlCtxtReset(ctxt);
14632
14633 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14634 XML_CHAR_ENCODING_NONE);
14635 if (input == NULL)
14636 return (NULL);
14637 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14638 if (stream == NULL) {
14639 xmlFreeParserInputBuffer(input);
14640 return (NULL);
14641 }
14642 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014643 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014644}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014645
14646#define bottom_parser
14647#include "elfgcchack.h"