blob: 3501b4e3221d4a0d200d0f96c09b5ca21a59cef0 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
86/************************************************************************
87 * *
88 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
89 * *
90 ************************************************************************/
91
92#define XML_PARSER_BIG_ENTITY 1000
93#define XML_PARSER_LOT_ENTITY 5000
94
95/*
96 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
97 * replacement over the size in byte of the input indicates that you have
98 * and eponential behaviour. A value of 10 correspond to at least 3 entity
99 * replacement per byte of input.
100 */
101#define XML_PARSER_NON_LINEAR 10
102
103/*
104 * xmlParserEntityCheck
105 *
106 * Function to check non-linear entity expansion behaviour
107 * This is here to detect and stop exponential linear entity expansion
108 * This is not a limitation of the parser but a safety
109 * boundary feature. It can be disabled with the XML_PARSE_HUGE
110 * parser option.
111 */
112static int
113xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
114 xmlEntityPtr ent)
115{
Daniel Veillardcba68392008-08-29 12:43:40 +0000116 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000117
118 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
119 return (0);
120 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
121 return (1);
122 if (size != 0) {
123 /*
124 * Do the check based on the replacement size of the entity
125 */
126 if (size < XML_PARSER_BIG_ENTITY)
127 return(0);
128
129 /*
130 * A limit on the amount of text data reasonably used
131 */
132 if (ctxt->input != NULL) {
133 consumed = ctxt->input->consumed +
134 (ctxt->input->cur - ctxt->input->base);
135 }
136 consumed += ctxt->sizeentities;
137
138 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
139 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
140 return (0);
141 } else if (ent != NULL) {
142 /*
143 * use the number of parsed entities in the replacement
144 */
145 size = ent->checked;
146
147 /*
148 * The amount of data parsed counting entities size only once
149 */
150 if (ctxt->input != NULL) {
151 consumed = ctxt->input->consumed +
152 (ctxt->input->cur - ctxt->input->base);
153 }
154 consumed += ctxt->sizeentities;
155
156 /*
157 * Check the density of entities for the amount of data
158 * knowing an entity reference will take at least 3 bytes
159 */
160 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
161 return (0);
162 } else {
163 /*
164 * strange we got no data for checking just return
165 */
166 return (0);
167 }
168
169 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
170 return (1);
171}
172
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000173/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000174 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000175 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000176 * arbitrary depth limit for the XML documents that we allow to
177 * process. This is not a limitation of the parser but a safety
178 * boundary feature. It can be disabled with the XML_PARSE_HUGE
179 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000180 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000181unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000182
Daniel Veillard0fb18932003-09-07 09:14:37 +0000183
Daniel Veillard0161e632008-08-28 15:36:32 +0000184
185#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000186#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000187#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000188#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
189
Owen Taylor3473f882001-02-23 17:55:21 +0000190/*
Owen Taylor3473f882001-02-23 17:55:21 +0000191 * List of XML prefixed PI allowed by W3C specs
192 */
193
Daniel Veillardb44025c2001-10-11 22:55:55 +0000194static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000195 "xml-stylesheet",
196 NULL
197};
198
Daniel Veillarda07050d2003-10-19 14:46:32 +0000199
Owen Taylor3473f882001-02-23 17:55:21 +0000200/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000201xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
202 const xmlChar **str);
203
Daniel Veillard7d515752003-09-26 19:12:37 +0000204static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000205xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
206 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000207 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000208 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard37334572008-07-31 08:20:02 +0000210static int
211xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
212 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000213#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000214static void
215xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
216 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000218
Daniel Veillard7d515752003-09-26 19:12:37 +0000219static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000220xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
221 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000222
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000223static int
224xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
225
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226/************************************************************************
227 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000228 * Some factorized error routines *
229 * *
230 ************************************************************************/
231
232/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000233 * xmlErrAttributeDup:
234 * @ctxt: an XML parser context
235 * @prefix: the attribute prefix
236 * @localname: the attribute localname
237 *
238 * Handle a redefinition of attribute error
239 */
240static void
241xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
242 const xmlChar * localname)
243{
Daniel Veillard157fee02003-10-31 10:36:03 +0000244 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
245 (ctxt->instate == XML_PARSER_EOF))
246 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000247 if (ctxt != NULL)
248 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000249 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000250 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
252 (const char *) localname, NULL, NULL, 0, 0,
253 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000254 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000256 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
257 (const char *) prefix, (const char *) localname,
258 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
259 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000260 if (ctxt != NULL) {
261 ctxt->wellFormed = 0;
262 if (ctxt->recovery == 0)
263 ctxt->disableSAX = 1;
264 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265}
266
267/**
268 * xmlFatalErr:
269 * @ctxt: an XML parser context
270 * @error: the error number
271 * @extra: extra information string
272 *
273 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
274 */
275static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000276xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277{
278 const char *errmsg;
279
Daniel Veillard157fee02003-10-31 10:36:03 +0000280 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
281 (ctxt->instate == XML_PARSER_EOF))
282 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 switch (error) {
284 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000285 errmsg = "CharRef: invalid hexadecimal value\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "CharRef: invalid decimal value\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "CharRef: invalid value\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "internal error";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "PEReference at end of document\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "PEReference in prolog\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "PEReference in epilog\n";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "PEReference: no name\n";
307 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000309 errmsg = "PEReference: expecting ';'\n";
310 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000311 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000312 errmsg = "Detected an entity reference loop\n";
313 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000314 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000315 errmsg = "EntityValue: \" or ' expected\n";
316 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000318 errmsg = "PEReferences forbidden in internal subset\n";
319 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000321 errmsg = "EntityValue: \" or ' expected\n";
322 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000323 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000324 errmsg = "AttValue: \" or ' expected\n";
325 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000326 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 errmsg = "Unescaped '<' not allowed in attributes values\n";
328 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000329 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 errmsg = "SystemLiteral \" or ' expected\n";
331 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000332 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 errmsg = "Unfinished System or Public ID \" or ' expected\n";
334 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000335 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000336 errmsg = "Sequence ']]>' not allowed in content\n";
337 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000338 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
340 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000341 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 errmsg = "PUBLIC, the Public Identifier is missing\n";
343 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000344 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 errmsg = "Comment must not contain '--' (double-hyphen)\n";
346 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000347 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 errmsg = "xmlParsePI : no target name\n";
349 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000350 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 errmsg = "Invalid PI name\n";
352 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000353 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 errmsg = "NOTATION: Name expected here\n";
355 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 errmsg = "'>' required to close NOTATION declaration\n";
358 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000359 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 errmsg = "Entity value required\n";
361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 errmsg = "Fragment not allowed";
364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 errmsg = "'(' required to start ATTLIST enumeration\n";
367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 errmsg = "NmToken expected in ATTLIST enumeration\n";
370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 errmsg = "')' required to finish ATTLIST enumeration\n";
373 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000374 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
379 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 errmsg = "ContentDecl : Name or '(' expected\n";
382 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
385 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000386 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 errmsg =
388 "PEReference: forbidden within markup decl in internal subset\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg = "expected '>'\n";
392 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000393 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 errmsg = "XML conditional section '[' expected\n";
395 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 errmsg = "Content error in the external subset\n";
398 break;
399 case XML_ERR_CONDSEC_INVALID_KEYWORD:
400 errmsg =
401 "conditional section INCLUDE or IGNORE keyword expected\n";
402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "XML conditional section not closed\n";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 errmsg = "Text declaration '<?xml' required\n";
408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000410 errmsg = "parsing XML declaration: '?>' expected\n";
411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000413 errmsg = "external parsed entities cannot be standalone\n";
414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000416 errmsg = "EntityRef: expecting ';'\n";
417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000419 errmsg = "DOCTYPE improperly terminated\n";
420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000422 errmsg = "EndTag: '</' not found\n";
423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000425 errmsg = "expected '='\n";
426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg = "String not closed expecting \" or '\n";
429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000431 errmsg = "String not started expecting ' or \"\n";
432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000434 errmsg = "Invalid XML encoding name\n";
435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000437 errmsg = "standalone accepts only 'yes' or 'no'\n";
438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000440 errmsg = "Document is empty\n";
441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 errmsg = "Extra content at the end of the document\n";
444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 errmsg = "chunk is not well balanced\n";
447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 errmsg = "extra content at the end of well balanced chunk\n";
450 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000451 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 errmsg = "Malformed declaration expecting version\n";
453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 case:
456 errmsg = "\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 default:
460 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000461 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000462 if (ctxt != NULL)
463 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000464 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
466 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000467 if (ctxt != NULL) {
468 ctxt->wellFormed = 0;
469 if (ctxt->recovery == 0)
470 ctxt->disableSAX = 1;
471 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000472}
473
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474/**
475 * xmlFatalErrMsg:
476 * @ctxt: an XML parser context
477 * @error: the error number
478 * @msg: the error message
479 *
480 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
481 */
482static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
484 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000485{
Daniel Veillard157fee02003-10-31 10:36:03 +0000486 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
487 (ctxt->instate == XML_PARSER_EOF))
488 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL)
490 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000491 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000493 if (ctxt != NULL) {
494 ctxt->wellFormed = 0;
495 if (ctxt->recovery == 0)
496 ctxt->disableSAX = 1;
497 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000498}
499
500/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000501 * xmlWarningMsg:
502 * @ctxt: an XML parser context
503 * @error: the error number
504 * @msg: the error message
505 * @str1: extra data
506 * @str2: extra data
507 *
508 * Handle a warning.
509 */
510static void
511xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, const xmlChar *str2)
513{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000514 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000515
Daniel Veillard157fee02003-10-31 10:36:03 +0000516 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
517 (ctxt->instate == XML_PARSER_EOF))
518 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000519 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
520 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000521 schannel = ctxt->sax->serror;
522 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 (ctxt->sax) ? ctxt->sax->warning : NULL,
524 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000525 ctxt, NULL, XML_FROM_PARSER, error,
526 XML_ERR_WARNING, NULL, 0,
527 (const char *) str1, (const char *) str2, NULL, 0, 0,
528 msg, (const char *) str1, (const char *) str2);
529}
530
531/**
532 * xmlValidityError:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @str1: extra data
537 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000538 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000539 */
540static void
541xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000542 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000543{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000544 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000545
546 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
547 (ctxt->instate == XML_PARSER_EOF))
548 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000549 if (ctxt != NULL) {
550 ctxt->errNo = error;
551 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
552 schannel = ctxt->sax->serror;
553 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000554 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000555 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000556 ctxt, NULL, XML_FROM_DTD, error,
557 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000558 (const char *) str2, NULL, 0, 0,
559 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000560 if (ctxt != NULL) {
561 ctxt->valid = 0;
562 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000563}
564
565/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000566 * xmlFatalErrMsgInt:
567 * @ctxt: an XML parser context
568 * @error: the error number
569 * @msg: the error message
570 * @val: an integer value
571 *
572 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
573 */
574static void
575xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000577{
Daniel Veillard157fee02003-10-31 10:36:03 +0000578 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
579 (ctxt->instate == XML_PARSER_EOF))
580 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000581 if (ctxt != NULL)
582 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000583 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000584 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
585 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000586 if (ctxt != NULL) {
587 ctxt->wellFormed = 0;
588 if (ctxt->recovery == 0)
589 ctxt->disableSAX = 1;
590 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000591}
592
593/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000594 * xmlFatalErrMsgStrIntStr:
595 * @ctxt: an XML parser context
596 * @error: the error number
597 * @msg: the error message
598 * @str1: an string info
599 * @val: an integer value
600 * @str2: an string info
601 *
602 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
603 */
604static void
605xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606 const char *msg, const xmlChar *str1, int val,
607 const xmlChar *str2)
608{
Daniel Veillard157fee02003-10-31 10:36:03 +0000609 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
610 (ctxt->instate == XML_PARSER_EOF))
611 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000612 if (ctxt != NULL)
613 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000614 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000615 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
616 NULL, 0, (const char *) str1, (const char *) str2,
617 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000618 if (ctxt != NULL) {
619 ctxt->wellFormed = 0;
620 if (ctxt->recovery == 0)
621 ctxt->disableSAX = 1;
622 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000623}
624
625/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000626 * xmlFatalErrMsgStr:
627 * @ctxt: an XML parser context
628 * @error: the error number
629 * @msg: the error message
630 * @val: a string value
631 *
632 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
633 */
634static void
635xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000636 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000637{
Daniel Veillard157fee02003-10-31 10:36:03 +0000638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000641 if (ctxt != NULL)
642 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000643 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000644 XML_FROM_PARSER, error, XML_ERR_FATAL,
645 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
646 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000647 if (ctxt != NULL) {
648 ctxt->wellFormed = 0;
649 if (ctxt->recovery == 0)
650 ctxt->disableSAX = 1;
651 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000652}
653
654/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000655 * xmlErrMsgStr:
656 * @ctxt: an XML parser context
657 * @error: the error number
658 * @msg: the error message
659 * @val: a string value
660 *
661 * Handle a non fatal parser error
662 */
663static void
664xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
665 const char *msg, const xmlChar * val)
666{
Daniel Veillard157fee02003-10-31 10:36:03 +0000667 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
668 (ctxt->instate == XML_PARSER_EOF))
669 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000670 if (ctxt != NULL)
671 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000672 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000673 XML_FROM_PARSER, error, XML_ERR_ERROR,
674 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
675 val);
676}
677
678/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000679 * xmlNsErr:
680 * @ctxt: an XML parser context
681 * @error: the error number
682 * @msg: the message
683 * @info1: extra information string
684 * @info2: extra information string
685 *
686 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
687 */
688static void
689xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
690 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000691 const xmlChar * info1, const xmlChar * info2,
692 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000693{
Daniel Veillard157fee02003-10-31 10:36:03 +0000694 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
695 (ctxt->instate == XML_PARSER_EOF))
696 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000697 if (ctxt != NULL)
698 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000699 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000700 XML_ERR_ERROR, NULL, 0, (const char *) info1,
701 (const char *) info2, (const char *) info3, 0, 0, msg,
702 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000705}
706
Daniel Veillard37334572008-07-31 08:20:02 +0000707/**
708 * xmlNsWarn
709 * @ctxt: an XML parser context
710 * @error: the error number
711 * @msg: the message
712 * @info1: extra information string
713 * @info2: extra information string
714 *
715 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
716 */
717static void
718xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
719 const char *msg,
720 const xmlChar * info1, const xmlChar * info2,
721 const xmlChar * info3)
722{
723 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
724 (ctxt->instate == XML_PARSER_EOF))
725 return;
726 if (ctxt != NULL)
727 ctxt->errNo = error;
728 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
729 XML_ERR_WARNING, NULL, 0, (const char *) info1,
730 (const char *) info2, (const char *) info3, 0, 0, msg,
731 info1, info2, info3);
732}
733
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000734/************************************************************************
735 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736 * Library wide options *
737 * *
738 ************************************************************************/
739
740/**
741 * xmlHasFeature:
742 * @feature: the feature to be examined
743 *
744 * Examines if the library has been compiled with a given feature.
745 *
746 * Returns a non-zero value if the feature exist, otherwise zero.
747 * Returns zero (0) if the feature does not exist or an unknown
748 * unknown feature is requested, non-zero otherwise.
749 */
750int
751xmlHasFeature(xmlFeature feature)
752{
753 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_THREAD_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_TREE_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_OUTPUT_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef LIBXML_PUSH_ENABLED
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_READER_ENABLED
780 return(1);
781#else
782 return(0);
783#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000784 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000785#ifdef LIBXML_PATTERN_ENABLED
786 return(1);
787#else
788 return(0);
789#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000790 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000791#ifdef LIBXML_WRITER_ENABLED
792 return(1);
793#else
794 return(0);
795#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000796 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797#ifdef LIBXML_SAX1_ENABLED
798 return(1);
799#else
800 return(0);
801#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000802 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000803#ifdef LIBXML_FTP_ENABLED
804 return(1);
805#else
806 return(0);
807#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000808 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000809#ifdef LIBXML_HTTP_ENABLED
810 return(1);
811#else
812 return(0);
813#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_VALID_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_HTML_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_LEGACY_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_C14N_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_CATALOG_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_XPATH_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_XPTR_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_XINCLUDE_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_ICONV_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_ISO8859X_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_UNICODE_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_REGEXP_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_AUTOMATA_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_EXPR_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_SCHEMAS_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_SCHEMATRON_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_MODULES_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_DEBUG_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef DEBUG_MEMORY_LOCATION
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_DEBUG_RUNTIME
930 return(1);
931#else
932 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000933#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000934 case XML_WITH_ZLIB:
935#ifdef LIBXML_ZLIB_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940 default:
941 break;
942 }
943 return(0);
944}
945
946/************************************************************************
947 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000948 * SAX2 defaulted attributes handling *
949 * *
950 ************************************************************************/
951
952/**
953 * xmlDetectSAX2:
954 * @ctxt: an XML parser context
955 *
956 * Do the SAX2 detection and specific intialization
957 */
958static void
959xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
960 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000961#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000962 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
963 ((ctxt->sax->startElementNs != NULL) ||
964 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000965#else
966 ctxt->sax2 = 1;
967#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000968
969 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
970 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
971 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000972 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
973 (ctxt->str_xml_ns == NULL)) {
974 xmlErrMemory(ctxt, NULL);
975 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000976}
977
Daniel Veillarde57ec792003-09-10 10:50:59 +0000978typedef struct _xmlDefAttrs xmlDefAttrs;
979typedef xmlDefAttrs *xmlDefAttrsPtr;
980struct _xmlDefAttrs {
981 int nbAttrs; /* number of defaulted attributes on that element */
982 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000983 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000984};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985
986/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000987 * xmlAttrNormalizeSpace:
988 * @src: the source string
989 * @dst: the target string
990 *
991 * Normalize the space in non CDATA attribute values:
992 * If the attribute type is not CDATA, then the XML processor MUST further
993 * process the normalized attribute value by discarding any leading and
994 * trailing space (#x20) characters, and by replacing sequences of space
995 * (#x20) characters by a single space (#x20) character.
996 * Note that the size of dst need to be at least src, and if one doesn't need
997 * to preserve dst (and it doesn't come from a dictionary or read-only) then
998 * passing src as dst is just fine.
999 *
1000 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1001 * is needed.
1002 */
1003static xmlChar *
1004xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1005{
1006 if ((src == NULL) || (dst == NULL))
1007 return(NULL);
1008
1009 while (*src == 0x20) src++;
1010 while (*src != 0) {
1011 if (*src == 0x20) {
1012 while (*src == 0x20) src++;
1013 if (*src != 0)
1014 *dst++ = 0x20;
1015 } else {
1016 *dst++ = *src++;
1017 }
1018 }
1019 *dst = 0;
1020 if (dst == src)
1021 return(NULL);
1022 return(dst);
1023}
1024
1025/**
1026 * xmlAttrNormalizeSpace2:
1027 * @src: the source string
1028 *
1029 * Normalize the space in non CDATA attribute values, a slightly more complex
1030 * front end to avoid allocation problems when running on attribute values
1031 * coming from the input.
1032 *
1033 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1034 * is needed.
1035 */
1036static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001037xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001038{
1039 int i;
1040 int remove_head = 0;
1041 int need_realloc = 0;
1042 const xmlChar *cur;
1043
1044 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1045 return(NULL);
1046 i = *len;
1047 if (i <= 0)
1048 return(NULL);
1049
1050 cur = src;
1051 while (*cur == 0x20) {
1052 cur++;
1053 remove_head++;
1054 }
1055 while (*cur != 0) {
1056 if (*cur == 0x20) {
1057 cur++;
1058 if ((*cur == 0x20) || (*cur == 0)) {
1059 need_realloc = 1;
1060 break;
1061 }
1062 } else
1063 cur++;
1064 }
1065 if (need_realloc) {
1066 xmlChar *ret;
1067
1068 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1069 if (ret == NULL) {
1070 xmlErrMemory(ctxt, NULL);
1071 return(NULL);
1072 }
1073 xmlAttrNormalizeSpace(ret, ret);
1074 *len = (int) strlen((const char *)ret);
1075 return(ret);
1076 } else if (remove_head) {
1077 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001078 memmove(src, src + remove_head, 1 + *len);
1079 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001080 }
1081 return(NULL);
1082}
1083
1084/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001085 * xmlAddDefAttrs:
1086 * @ctxt: an XML parser context
1087 * @fullname: the element fullname
1088 * @fullattr: the attribute fullname
1089 * @value: the attribute value
1090 *
1091 * Add a defaulted attribute for an element
1092 */
1093static void
1094xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1095 const xmlChar *fullname,
1096 const xmlChar *fullattr,
1097 const xmlChar *value) {
1098 xmlDefAttrsPtr defaults;
1099 int len;
1100 const xmlChar *name;
1101 const xmlChar *prefix;
1102
Daniel Veillard6a31b832008-03-26 14:06:44 +00001103 /*
1104 * Allows to detect attribute redefinitions
1105 */
1106 if (ctxt->attsSpecial != NULL) {
1107 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1108 return;
1109 }
1110
Daniel Veillarde57ec792003-09-10 10:50:59 +00001111 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001112 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001113 if (ctxt->attsDefault == NULL)
1114 goto mem_error;
1115 }
1116
1117 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001118 * split the element name into prefix:localname , the string found
1119 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 */
1121 name = xmlSplitQName3(fullname, &len);
1122 if (name == NULL) {
1123 name = xmlDictLookup(ctxt->dict, fullname, -1);
1124 prefix = NULL;
1125 } else {
1126 name = xmlDictLookup(ctxt->dict, name, -1);
1127 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1128 }
1129
1130 /*
1131 * make sure there is some storage
1132 */
1133 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1134 if (defaults == NULL) {
1135 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001136 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001137 if (defaults == NULL)
1138 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001139 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001140 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001141 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1142 defaults, NULL) < 0) {
1143 xmlFree(defaults);
1144 goto mem_error;
1145 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001146 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001147 xmlDefAttrsPtr temp;
1148
1149 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001151 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001152 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001153 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001155 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1156 defaults, NULL) < 0) {
1157 xmlFree(defaults);
1158 goto mem_error;
1159 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001160 }
1161
1162 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001163 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001164 * are within the DTD and hen not associated to namespace names.
1165 */
1166 name = xmlSplitQName3(fullattr, &len);
1167 if (name == NULL) {
1168 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1169 prefix = NULL;
1170 } else {
1171 name = xmlDictLookup(ctxt->dict, name, -1);
1172 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1173 }
1174
Daniel Veillardae0765b2008-07-31 19:54:59 +00001175 defaults->values[5 * defaults->nbAttrs] = name;
1176 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 /* intern the string and precompute the end */
1178 len = xmlStrlen(value);
1179 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001180 defaults->values[5 * defaults->nbAttrs + 2] = value;
1181 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1182 if (ctxt->external)
1183 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1184 else
1185 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001186 defaults->nbAttrs++;
1187
1188 return;
1189
1190mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001191 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 return;
1193}
1194
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001195/**
1196 * xmlAddSpecialAttr:
1197 * @ctxt: an XML parser context
1198 * @fullname: the element fullname
1199 * @fullattr: the attribute fullname
1200 * @type: the attribute type
1201 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001202 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001203 */
1204static void
1205xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1206 const xmlChar *fullname,
1207 const xmlChar *fullattr,
1208 int type)
1209{
1210 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001211 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001212 if (ctxt->attsSpecial == NULL)
1213 goto mem_error;
1214 }
1215
Daniel Veillardac4118d2008-01-11 05:27:32 +00001216 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1217 return;
1218
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001219 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1220 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001221 return;
1222
1223mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001224 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001225 return;
1226}
1227
Daniel Veillard4432df22003-09-28 18:58:27 +00001228/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001229 * xmlCleanSpecialAttrCallback:
1230 *
1231 * Removes CDATA attributes from the special attribute table
1232 */
1233static void
1234xmlCleanSpecialAttrCallback(void *payload, void *data,
1235 const xmlChar *fullname, const xmlChar *fullattr,
1236 const xmlChar *unused ATTRIBUTE_UNUSED) {
1237 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1238
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001239 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001240 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1241 }
1242}
1243
1244/**
1245 * xmlCleanSpecialAttr:
1246 * @ctxt: an XML parser context
1247 *
1248 * Trim the list of attributes defined to remove all those of type
1249 * CDATA as they are not special. This call should be done when finishing
1250 * to parse the DTD and before starting to parse the document root.
1251 */
1252static void
1253xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1254{
1255 if (ctxt->attsSpecial == NULL)
1256 return;
1257
1258 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1259
1260 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1261 xmlHashFree(ctxt->attsSpecial, NULL);
1262 ctxt->attsSpecial = NULL;
1263 }
1264 return;
1265}
1266
1267/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001268 * xmlCheckLanguageID:
1269 * @lang: pointer to the string value
1270 *
1271 * Checks that the value conforms to the LanguageID production:
1272 *
1273 * NOTE: this is somewhat deprecated, those productions were removed from
1274 * the XML Second edition.
1275 *
1276 * [33] LanguageID ::= Langcode ('-' Subcode)*
1277 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1278 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1279 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1280 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1281 * [38] Subcode ::= ([a-z] | [A-Z])+
1282 *
1283 * Returns 1 if correct 0 otherwise
1284 **/
1285int
1286xmlCheckLanguageID(const xmlChar * lang)
1287{
1288 const xmlChar *cur = lang;
1289
1290 if (cur == NULL)
1291 return (0);
1292 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1293 ((cur[0] == 'I') && (cur[1] == '-'))) {
1294 /*
1295 * IANA code
1296 */
1297 cur += 2;
1298 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1299 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1300 cur++;
1301 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1302 ((cur[0] == 'X') && (cur[1] == '-'))) {
1303 /*
1304 * User code
1305 */
1306 cur += 2;
1307 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1308 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1309 cur++;
1310 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1311 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1312 /*
1313 * ISO639
1314 */
1315 cur++;
1316 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1317 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1318 cur++;
1319 else
1320 return (0);
1321 } else
1322 return (0);
1323 while (cur[0] != 0) { /* non input consuming */
1324 if (cur[0] != '-')
1325 return (0);
1326 cur++;
1327 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1328 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1329 cur++;
1330 else
1331 return (0);
1332 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1333 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1334 cur++;
1335 }
1336 return (1);
1337}
1338
Owen Taylor3473f882001-02-23 17:55:21 +00001339/************************************************************************
1340 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001341 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001342 * *
1343 ************************************************************************/
1344
1345xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1346 const xmlChar ** str);
1347
Daniel Veillard0fb18932003-09-07 09:14:37 +00001348#ifdef SAX2
1349/**
1350 * nsPush:
1351 * @ctxt: an XML parser context
1352 * @prefix: the namespace prefix or NULL
1353 * @URL: the namespace name
1354 *
1355 * Pushes a new parser namespace on top of the ns stack
1356 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001357 * Returns -1 in case of error, -2 if the namespace should be discarded
1358 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001359 */
1360static int
1361nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1362{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001363 if (ctxt->options & XML_PARSE_NSCLEAN) {
1364 int i;
1365 for (i = 0;i < ctxt->nsNr;i += 2) {
1366 if (ctxt->nsTab[i] == prefix) {
1367 /* in scope */
1368 if (ctxt->nsTab[i + 1] == URL)
1369 return(-2);
1370 /* out of scope keep it */
1371 break;
1372 }
1373 }
1374 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001375 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1376 ctxt->nsMax = 10;
1377 ctxt->nsNr = 0;
1378 ctxt->nsTab = (const xmlChar **)
1379 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1380 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001381 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001382 ctxt->nsMax = 0;
1383 return (-1);
1384 }
1385 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001386 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001387 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001388 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1389 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1390 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001391 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001392 ctxt->nsMax /= 2;
1393 return (-1);
1394 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001395 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001396 }
1397 ctxt->nsTab[ctxt->nsNr++] = prefix;
1398 ctxt->nsTab[ctxt->nsNr++] = URL;
1399 return (ctxt->nsNr);
1400}
1401/**
1402 * nsPop:
1403 * @ctxt: an XML parser context
1404 * @nr: the number to pop
1405 *
1406 * Pops the top @nr parser prefix/namespace from the ns stack
1407 *
1408 * Returns the number of namespaces removed
1409 */
1410static int
1411nsPop(xmlParserCtxtPtr ctxt, int nr)
1412{
1413 int i;
1414
1415 if (ctxt->nsTab == NULL) return(0);
1416 if (ctxt->nsNr < nr) {
1417 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1418 nr = ctxt->nsNr;
1419 }
1420 if (ctxt->nsNr <= 0)
1421 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001422
Daniel Veillard0fb18932003-09-07 09:14:37 +00001423 for (i = 0;i < nr;i++) {
1424 ctxt->nsNr--;
1425 ctxt->nsTab[ctxt->nsNr] = NULL;
1426 }
1427 return(nr);
1428}
1429#endif
1430
1431static int
1432xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1433 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001434 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001435 int maxatts;
1436
1437 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001438 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001439 atts = (const xmlChar **)
1440 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001441 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001442 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001443 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1444 if (attallocs == NULL) goto mem_error;
1445 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001446 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001447 } else if (nr + 5 > ctxt->maxatts) {
1448 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001449 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1450 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001451 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001452 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001453 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1454 (maxatts / 5) * sizeof(int));
1455 if (attallocs == NULL) goto mem_error;
1456 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001457 ctxt->maxatts = maxatts;
1458 }
1459 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001460mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001461 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001462 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001463}
1464
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001465/**
1466 * inputPush:
1467 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001468 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001469 *
1470 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001471 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001472 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001473 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001474int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001475inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1476{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001477 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001478 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001479 if (ctxt->inputNr >= ctxt->inputMax) {
1480 ctxt->inputMax *= 2;
1481 ctxt->inputTab =
1482 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1483 ctxt->inputMax *
1484 sizeof(ctxt->inputTab[0]));
1485 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001486 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001487 xmlFreeInputStream(value);
1488 ctxt->inputMax /= 2;
1489 value = NULL;
1490 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001491 }
1492 }
1493 ctxt->inputTab[ctxt->inputNr] = value;
1494 ctxt->input = value;
1495 return (ctxt->inputNr++);
1496}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001497/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001498 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001499 * @ctxt: an XML parser context
1500 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001501 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001502 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001503 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001504 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001505xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001506inputPop(xmlParserCtxtPtr ctxt)
1507{
1508 xmlParserInputPtr ret;
1509
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001510 if (ctxt == NULL)
1511 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001512 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001513 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001514 ctxt->inputNr--;
1515 if (ctxt->inputNr > 0)
1516 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1517 else
1518 ctxt->input = NULL;
1519 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001520 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001521 return (ret);
1522}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001523/**
1524 * nodePush:
1525 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001526 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001527 *
1528 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001529 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001530 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001531 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001532int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001533nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1534{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001535 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001536 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001537 xmlNodePtr *tmp;
1538
1539 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1540 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001541 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001542 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001543 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001544 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001545 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001546 ctxt->nodeTab = tmp;
1547 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001548 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001549 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1550 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001551 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001552 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001553 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001554 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001555 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001556 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001557 ctxt->nodeTab[ctxt->nodeNr] = value;
1558 ctxt->node = value;
1559 return (ctxt->nodeNr++);
1560}
Daniel Veillard8915c152008-08-26 13:05:34 +00001561
Daniel Veillard1c732d22002-11-30 11:22:59 +00001562/**
1563 * nodePop:
1564 * @ctxt: an XML parser context
1565 *
1566 * Pops the top element node from the node stack
1567 *
1568 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001569 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001570xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001571nodePop(xmlParserCtxtPtr ctxt)
1572{
1573 xmlNodePtr ret;
1574
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001575 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001576 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001577 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 ctxt->nodeNr--;
1579 if (ctxt->nodeNr > 0)
1580 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1581 else
1582 ctxt->node = NULL;
1583 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001584 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001585 return (ret);
1586}
Daniel Veillarda2351322004-06-27 12:08:10 +00001587
1588#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001589/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001590 * nameNsPush:
1591 * @ctxt: an XML parser context
1592 * @value: the element name
1593 * @prefix: the element prefix
1594 * @URI: the element namespace name
1595 *
1596 * Pushes a new element name/prefix/URL on top of the name stack
1597 *
1598 * Returns -1 in case of error, the index in the stack otherwise
1599 */
1600static int
1601nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1602 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1603{
1604 if (ctxt->nameNr >= ctxt->nameMax) {
1605 const xmlChar * *tmp;
1606 void **tmp2;
1607 ctxt->nameMax *= 2;
1608 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1609 ctxt->nameMax *
1610 sizeof(ctxt->nameTab[0]));
1611 if (tmp == NULL) {
1612 ctxt->nameMax /= 2;
1613 goto mem_error;
1614 }
1615 ctxt->nameTab = tmp;
1616 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1617 ctxt->nameMax * 3 *
1618 sizeof(ctxt->pushTab[0]));
1619 if (tmp2 == NULL) {
1620 ctxt->nameMax /= 2;
1621 goto mem_error;
1622 }
1623 ctxt->pushTab = tmp2;
1624 }
1625 ctxt->nameTab[ctxt->nameNr] = value;
1626 ctxt->name = value;
1627 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1628 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001629 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001630 return (ctxt->nameNr++);
1631mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001632 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001633 return (-1);
1634}
1635/**
1636 * nameNsPop:
1637 * @ctxt: an XML parser context
1638 *
1639 * Pops the top element/prefix/URI name from the name stack
1640 *
1641 * Returns the name just removed
1642 */
1643static const xmlChar *
1644nameNsPop(xmlParserCtxtPtr ctxt)
1645{
1646 const xmlChar *ret;
1647
1648 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001649 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001650 ctxt->nameNr--;
1651 if (ctxt->nameNr > 0)
1652 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1653 else
1654 ctxt->name = NULL;
1655 ret = ctxt->nameTab[ctxt->nameNr];
1656 ctxt->nameTab[ctxt->nameNr] = NULL;
1657 return (ret);
1658}
Daniel Veillarda2351322004-06-27 12:08:10 +00001659#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001660
1661/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001662 * namePush:
1663 * @ctxt: an XML parser context
1664 * @value: the element name
1665 *
1666 * Pushes a new element name on top of the name stack
1667 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001668 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001670int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001671namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001672{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001673 if (ctxt == NULL) return (-1);
1674
Daniel Veillard1c732d22002-11-30 11:22:59 +00001675 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001676 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001677 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 ctxt->nameMax *
1680 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001681 if (tmp == NULL) {
1682 ctxt->nameMax /= 2;
1683 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001684 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001685 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 }
1687 ctxt->nameTab[ctxt->nameNr] = value;
1688 ctxt->name = value;
1689 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001690mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001691 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001692 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001693}
1694/**
1695 * namePop:
1696 * @ctxt: an XML parser context
1697 *
1698 * Pops the top element name from the name stack
1699 *
1700 * Returns the name just removed
1701 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001702const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001703namePop(xmlParserCtxtPtr ctxt)
1704{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001705 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001707 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1708 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001709 ctxt->nameNr--;
1710 if (ctxt->nameNr > 0)
1711 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1712 else
1713 ctxt->name = NULL;
1714 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001715 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001716 return (ret);
1717}
Owen Taylor3473f882001-02-23 17:55:21 +00001718
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001719static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001720 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001721 int *tmp;
1722
Owen Taylor3473f882001-02-23 17:55:21 +00001723 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001724 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1725 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1726 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001727 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001728 ctxt->spaceMax /=2;
1729 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001730 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001731 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001732 }
1733 ctxt->spaceTab[ctxt->spaceNr] = val;
1734 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1735 return(ctxt->spaceNr++);
1736}
1737
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001738static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001739 int ret;
1740 if (ctxt->spaceNr <= 0) return(0);
1741 ctxt->spaceNr--;
1742 if (ctxt->spaceNr > 0)
1743 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1744 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001745 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001746 ret = ctxt->spaceTab[ctxt->spaceNr];
1747 ctxt->spaceTab[ctxt->spaceNr] = -1;
1748 return(ret);
1749}
1750
1751/*
1752 * Macros for accessing the content. Those should be used only by the parser,
1753 * and not exported.
1754 *
1755 * Dirty macros, i.e. one often need to make assumption on the context to
1756 * use them
1757 *
1758 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1759 * To be used with extreme caution since operations consuming
1760 * characters may move the input buffer to a different location !
1761 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1762 * This should be used internally by the parser
1763 * only to compare to ASCII values otherwise it would break when
1764 * running with UTF-8 encoding.
1765 * RAW same as CUR but in the input buffer, bypass any token
1766 * extraction that may have been done
1767 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1768 * to compare on ASCII based substring.
1769 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001770 * strings without newlines within the parser.
1771 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1772 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001773 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1774 *
1775 * NEXT Skip to the next character, this does the proper decoding
1776 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001777 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001778 * CUR_CHAR(l) returns the current unicode character (int), set l
1779 * to the number of xmlChars used for the encoding [0-5].
1780 * CUR_SCHAR same but operate on a string instead of the context
1781 * COPY_BUF copy the current unicode char to the target buffer, increment
1782 * the index
1783 * GROW, SHRINK handling of input buffers
1784 */
1785
Daniel Veillardfdc91562002-07-01 21:52:03 +00001786#define RAW (*ctxt->input->cur)
1787#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001788#define NXT(val) ctxt->input->cur[(val)]
1789#define CUR_PTR ctxt->input->cur
1790
Daniel Veillarda07050d2003-10-19 14:46:32 +00001791#define CMP4( s, c1, c2, c3, c4 ) \
1792 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1793 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1794#define CMP5( s, c1, c2, c3, c4, c5 ) \
1795 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1796#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1797 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1798#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1799 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1800#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1801 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1802#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1803 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1804 ((unsigned char *) s)[ 8 ] == c9 )
1805#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1806 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1807 ((unsigned char *) s)[ 9 ] == c10 )
1808
Owen Taylor3473f882001-02-23 17:55:21 +00001809#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001810 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001811 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001812 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001813 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1814 xmlPopInput(ctxt); \
1815 } while (0)
1816
Daniel Veillard0b787f32004-03-26 17:29:53 +00001817#define SKIPL(val) do { \
1818 int skipl; \
1819 for(skipl=0; skipl<val; skipl++) { \
1820 if (*(ctxt->input->cur) == '\n') { \
1821 ctxt->input->line++; ctxt->input->col = 1; \
1822 } else ctxt->input->col++; \
1823 ctxt->nbChars++; \
1824 ctxt->input->cur++; \
1825 } \
1826 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1827 if ((*ctxt->input->cur == 0) && \
1828 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1829 xmlPopInput(ctxt); \
1830 } while (0)
1831
Daniel Veillarda880b122003-04-21 21:36:41 +00001832#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001833 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1834 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001835 xmlSHRINK (ctxt);
1836
1837static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1838 xmlParserInputShrink(ctxt->input);
1839 if ((*ctxt->input->cur == 0) &&
1840 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1841 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001842 }
Owen Taylor3473f882001-02-23 17:55:21 +00001843
Daniel Veillarda880b122003-04-21 21:36:41 +00001844#define GROW if ((ctxt->progressive == 0) && \
1845 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001846 xmlGROW (ctxt);
1847
1848static void xmlGROW (xmlParserCtxtPtr ctxt) {
1849 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1850 if ((*ctxt->input->cur == 0) &&
1851 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1852 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001853}
Owen Taylor3473f882001-02-23 17:55:21 +00001854
1855#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1856
1857#define NEXT xmlNextChar(ctxt)
1858
Daniel Veillard21a0f912001-02-25 19:54:14 +00001859#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001860 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001861 ctxt->input->cur++; \
1862 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001863 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001864 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1865 }
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867#define NEXTL(l) do { \
1868 if (*(ctxt->input->cur) == '\n') { \
1869 ctxt->input->line++; ctxt->input->col = 1; \
1870 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001871 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001872 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001873 } while (0)
1874
1875#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1876#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1877
1878#define COPY_BUF(l,b,i,v) \
1879 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001880 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001881
1882/**
1883 * xmlSkipBlankChars:
1884 * @ctxt: the XML parser context
1885 *
1886 * skip all blanks character found at that point in the input streams.
1887 * It pops up finished entities in the process if allowable at that point.
1888 *
1889 * Returns the number of space chars skipped
1890 */
1891
1892int
1893xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001894 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001895
1896 /*
1897 * It's Okay to use CUR/NEXT here since all the blanks are on
1898 * the ASCII range.
1899 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001900 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1901 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001902 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001903 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001904 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001905 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001906 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001907 if (*cur == '\n') {
1908 ctxt->input->line++; ctxt->input->col = 1;
1909 }
1910 cur++;
1911 res++;
1912 if (*cur == 0) {
1913 ctxt->input->cur = cur;
1914 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1915 cur = ctxt->input->cur;
1916 }
1917 }
1918 ctxt->input->cur = cur;
1919 } else {
1920 int cur;
1921 do {
1922 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001923 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001924 NEXT;
1925 cur = CUR;
1926 res++;
1927 }
1928 while ((cur == 0) && (ctxt->inputNr > 1) &&
1929 (ctxt->instate != XML_PARSER_COMMENT)) {
1930 xmlPopInput(ctxt);
1931 cur = CUR;
1932 }
1933 /*
1934 * Need to handle support of entities branching here
1935 */
1936 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1937 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1938 }
Owen Taylor3473f882001-02-23 17:55:21 +00001939 return(res);
1940}
1941
1942/************************************************************************
1943 * *
1944 * Commodity functions to handle entities *
1945 * *
1946 ************************************************************************/
1947
1948/**
1949 * xmlPopInput:
1950 * @ctxt: an XML parser context
1951 *
1952 * xmlPopInput: the current input pointed by ctxt->input came to an end
1953 * pop it and return the next char.
1954 *
1955 * Returns the current xmlChar in the parser context
1956 */
1957xmlChar
1958xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001959 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001960 if (xmlParserDebugEntities)
1961 xmlGenericError(xmlGenericErrorContext,
1962 "Popping input %d\n", ctxt->inputNr);
1963 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001964 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001965 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1966 return(xmlPopInput(ctxt));
1967 return(CUR);
1968}
1969
1970/**
1971 * xmlPushInput:
1972 * @ctxt: an XML parser context
1973 * @input: an XML parser input fragment (entity, XML fragment ...).
1974 *
1975 * xmlPushInput: switch to a new input stream which is stacked on top
1976 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001977 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001978 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001979int
Owen Taylor3473f882001-02-23 17:55:21 +00001980xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001981 int ret;
1982 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001983
1984 if (xmlParserDebugEntities) {
1985 if ((ctxt->input != NULL) && (ctxt->input->filename))
1986 xmlGenericError(xmlGenericErrorContext,
1987 "%s(%d): ", ctxt->input->filename,
1988 ctxt->input->line);
1989 xmlGenericError(xmlGenericErrorContext,
1990 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1991 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001992 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001994 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001995}
1996
1997/**
1998 * xmlParseCharRef:
1999 * @ctxt: an XML parser context
2000 *
2001 * parse Reference declarations
2002 *
2003 * [66] CharRef ::= '&#' [0-9]+ ';' |
2004 * '&#x' [0-9a-fA-F]+ ';'
2005 *
2006 * [ WFC: Legal Character ]
2007 * Characters referred to using character references must match the
2008 * production for Char.
2009 *
2010 * Returns the value parsed (as an int), 0 in case of error
2011 */
2012int
2013xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002014 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002015 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002016 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002017
Owen Taylor3473f882001-02-23 17:55:21 +00002018 /*
2019 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2020 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002021 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002022 (NXT(2) == 'x')) {
2023 SKIP(3);
2024 GROW;
2025 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002026 if (count++ > 20) {
2027 count = 0;
2028 GROW;
2029 }
2030 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002031 val = val * 16 + (CUR - '0');
2032 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2033 val = val * 16 + (CUR - 'a') + 10;
2034 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2035 val = val * 16 + (CUR - 'A') + 10;
2036 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002037 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002038 val = 0;
2039 break;
2040 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002041 if (val > 0x10FFFF)
2042 outofrange = val;
2043
Owen Taylor3473f882001-02-23 17:55:21 +00002044 NEXT;
2045 count++;
2046 }
2047 if (RAW == ';') {
2048 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002049 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002050 ctxt->nbChars ++;
2051 ctxt->input->cur++;
2052 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002053 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002054 SKIP(2);
2055 GROW;
2056 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002057 if (count++ > 20) {
2058 count = 0;
2059 GROW;
2060 }
2061 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002062 val = val * 10 + (CUR - '0');
2063 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002064 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002065 val = 0;
2066 break;
2067 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002068 if (val > 0x10FFFF)
2069 outofrange = val;
2070
Owen Taylor3473f882001-02-23 17:55:21 +00002071 NEXT;
2072 count++;
2073 }
2074 if (RAW == ';') {
2075 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002076 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002077 ctxt->nbChars ++;
2078 ctxt->input->cur++;
2079 }
2080 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002081 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002082 }
2083
2084 /*
2085 * [ WFC: Legal Character ]
2086 * Characters referred to using character references must match the
2087 * production for Char.
2088 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002089 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 return(val);
2091 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002092 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2093 "xmlParseCharRef: invalid xmlChar value %d\n",
2094 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002095 }
2096 return(0);
2097}
2098
2099/**
2100 * xmlParseStringCharRef:
2101 * @ctxt: an XML parser context
2102 * @str: a pointer to an index in the string
2103 *
2104 * parse Reference declarations, variant parsing from a string rather
2105 * than an an input flow.
2106 *
2107 * [66] CharRef ::= '&#' [0-9]+ ';' |
2108 * '&#x' [0-9a-fA-F]+ ';'
2109 *
2110 * [ WFC: Legal Character ]
2111 * Characters referred to using character references must match the
2112 * production for Char.
2113 *
2114 * Returns the value parsed (as an int), 0 in case of error, str will be
2115 * updated to the current value of the index
2116 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002117static int
Owen Taylor3473f882001-02-23 17:55:21 +00002118xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2119 const xmlChar *ptr;
2120 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002121 unsigned int val = 0;
2122 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002123
2124 if ((str == NULL) || (*str == NULL)) return(0);
2125 ptr = *str;
2126 cur = *ptr;
2127 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2128 ptr += 3;
2129 cur = *ptr;
2130 while (cur != ';') { /* Non input consuming loop */
2131 if ((cur >= '0') && (cur <= '9'))
2132 val = val * 16 + (cur - '0');
2133 else if ((cur >= 'a') && (cur <= 'f'))
2134 val = val * 16 + (cur - 'a') + 10;
2135 else if ((cur >= 'A') && (cur <= 'F'))
2136 val = val * 16 + (cur - 'A') + 10;
2137 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002138 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002139 val = 0;
2140 break;
2141 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002142 if (val > 0x10FFFF)
2143 outofrange = val;
2144
Owen Taylor3473f882001-02-23 17:55:21 +00002145 ptr++;
2146 cur = *ptr;
2147 }
2148 if (cur == ';')
2149 ptr++;
2150 } else if ((cur == '&') && (ptr[1] == '#')){
2151 ptr += 2;
2152 cur = *ptr;
2153 while (cur != ';') { /* Non input consuming loops */
2154 if ((cur >= '0') && (cur <= '9'))
2155 val = val * 10 + (cur - '0');
2156 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002157 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002158 val = 0;
2159 break;
2160 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002161 if (val > 0x10FFFF)
2162 outofrange = val;
2163
Owen Taylor3473f882001-02-23 17:55:21 +00002164 ptr++;
2165 cur = *ptr;
2166 }
2167 if (cur == ';')
2168 ptr++;
2169 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002170 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002171 return(0);
2172 }
2173 *str = ptr;
2174
2175 /*
2176 * [ WFC: Legal Character ]
2177 * Characters referred to using character references must match the
2178 * production for Char.
2179 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002180 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002181 return(val);
2182 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002183 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2184 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2185 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002186 }
2187 return(0);
2188}
2189
2190/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002191 * xmlNewBlanksWrapperInputStream:
2192 * @ctxt: an XML parser context
2193 * @entity: an Entity pointer
2194 *
2195 * Create a new input stream for wrapping
2196 * blanks around a PEReference
2197 *
2198 * Returns the new input stream or NULL
2199 */
2200
2201static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2202
Daniel Veillardf4862f02002-09-10 11:13:43 +00002203static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002204xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2205 xmlParserInputPtr input;
2206 xmlChar *buffer;
2207 size_t length;
2208 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002209 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2210 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002211 return(NULL);
2212 }
2213 if (xmlParserDebugEntities)
2214 xmlGenericError(xmlGenericErrorContext,
2215 "new blanks wrapper for entity: %s\n", entity->name);
2216 input = xmlNewInputStream(ctxt);
2217 if (input == NULL) {
2218 return(NULL);
2219 }
2220 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002221 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002222 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002223 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002224 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002225 return(NULL);
2226 }
2227 buffer [0] = ' ';
2228 buffer [1] = '%';
2229 buffer [length-3] = ';';
2230 buffer [length-2] = ' ';
2231 buffer [length-1] = 0;
2232 memcpy(buffer + 2, entity->name, length - 5);
2233 input->free = deallocblankswrapper;
2234 input->base = buffer;
2235 input->cur = buffer;
2236 input->length = length;
2237 input->end = &buffer[length];
2238 return(input);
2239}
2240
2241/**
Owen Taylor3473f882001-02-23 17:55:21 +00002242 * xmlParserHandlePEReference:
2243 * @ctxt: the parser context
2244 *
2245 * [69] PEReference ::= '%' Name ';'
2246 *
2247 * [ WFC: No Recursion ]
2248 * A parsed entity must not contain a recursive
2249 * reference to itself, either directly or indirectly.
2250 *
2251 * [ WFC: Entity Declared ]
2252 * In a document without any DTD, a document with only an internal DTD
2253 * subset which contains no parameter entity references, or a document
2254 * with "standalone='yes'", ... ... The declaration of a parameter
2255 * entity must precede any reference to it...
2256 *
2257 * [ VC: Entity Declared ]
2258 * In a document with an external subset or external parameter entities
2259 * with "standalone='no'", ... ... The declaration of a parameter entity
2260 * must precede any reference to it...
2261 *
2262 * [ WFC: In DTD ]
2263 * Parameter-entity references may only appear in the DTD.
2264 * NOTE: misleading but this is handled.
2265 *
2266 * A PEReference may have been detected in the current input stream
2267 * the handling is done accordingly to
2268 * http://www.w3.org/TR/REC-xml#entproc
2269 * i.e.
2270 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002271 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002272 */
2273void
2274xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002275 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002276 xmlEntityPtr entity = NULL;
2277 xmlParserInputPtr input;
2278
Owen Taylor3473f882001-02-23 17:55:21 +00002279 if (RAW != '%') return;
2280 switch(ctxt->instate) {
2281 case XML_PARSER_CDATA_SECTION:
2282 return;
2283 case XML_PARSER_COMMENT:
2284 return;
2285 case XML_PARSER_START_TAG:
2286 return;
2287 case XML_PARSER_END_TAG:
2288 return;
2289 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002290 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002291 return;
2292 case XML_PARSER_PROLOG:
2293 case XML_PARSER_START:
2294 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002295 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002296 return;
2297 case XML_PARSER_ENTITY_DECL:
2298 case XML_PARSER_CONTENT:
2299 case XML_PARSER_ATTRIBUTE_VALUE:
2300 case XML_PARSER_PI:
2301 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002302 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002303 /* we just ignore it there */
2304 return;
2305 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002306 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002307 return;
2308 case XML_PARSER_ENTITY_VALUE:
2309 /*
2310 * NOTE: in the case of entity values, we don't do the
2311 * substitution here since we need the literal
2312 * entity value to be able to save the internal
2313 * subset of the document.
2314 * This will be handled by xmlStringDecodeEntities
2315 */
2316 return;
2317 case XML_PARSER_DTD:
2318 /*
2319 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2320 * In the internal DTD subset, parameter-entity references
2321 * can occur only where markup declarations can occur, not
2322 * within markup declarations.
2323 * In that case this is handled in xmlParseMarkupDecl
2324 */
2325 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2326 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002327 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002328 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002329 break;
2330 case XML_PARSER_IGNORE:
2331 return;
2332 }
2333
2334 NEXT;
2335 name = xmlParseName(ctxt);
2336 if (xmlParserDebugEntities)
2337 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002338 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002339 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002340 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 } else {
2342 if (RAW == ';') {
2343 NEXT;
2344 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2345 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2346 if (entity == NULL) {
2347
2348 /*
2349 * [ WFC: Entity Declared ]
2350 * In a document without any DTD, a document with only an
2351 * internal DTD subset which contains no parameter entity
2352 * references, or a document with "standalone='yes'", ...
2353 * ... The declaration of a parameter entity must precede
2354 * any reference to it...
2355 */
2356 if ((ctxt->standalone == 1) ||
2357 ((ctxt->hasExternalSubset == 0) &&
2358 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002359 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002360 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002361 } else {
2362 /*
2363 * [ VC: Entity Declared ]
2364 * In a document with an external subset or external
2365 * parameter entities with "standalone='no'", ...
2366 * ... The declaration of a parameter entity must precede
2367 * any reference to it...
2368 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002369 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2370 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2371 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002372 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002373 } else
2374 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2375 "PEReference: %%%s; not found\n",
2376 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002377 ctxt->valid = 0;
2378 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002379 } else if (ctxt->input->free != deallocblankswrapper) {
2380 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002381 if (xmlPushInput(ctxt, input) < 0)
2382 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002383 } else {
2384 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2385 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002386 xmlChar start[4];
2387 xmlCharEncoding enc;
2388
Owen Taylor3473f882001-02-23 17:55:21 +00002389 /*
2390 * handle the extra spaces added before and after
2391 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002392 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002393 */
2394 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002395 if (xmlPushInput(ctxt, input) < 0)
2396 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002397
2398 /*
2399 * Get the 4 first bytes and decode the charset
2400 * if enc != XML_CHAR_ENCODING_NONE
2401 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002402 * Note that, since we may have some non-UTF8
2403 * encoding (like UTF16, bug 135229), the 'length'
2404 * is not known, but we can calculate based upon
2405 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002406 */
2407 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002408 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002409 start[0] = RAW;
2410 start[1] = NXT(1);
2411 start[2] = NXT(2);
2412 start[3] = NXT(3);
2413 enc = xmlDetectCharEncoding(start, 4);
2414 if (enc != XML_CHAR_ENCODING_NONE) {
2415 xmlSwitchEncoding(ctxt, enc);
2416 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002417 }
2418
Owen Taylor3473f882001-02-23 17:55:21 +00002419 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002420 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2421 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002422 xmlParseTextDecl(ctxt);
2423 }
Owen Taylor3473f882001-02-23 17:55:21 +00002424 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002425 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2426 "PEReference: %s is not a parameter entity\n",
2427 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002428 }
2429 }
2430 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002431 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002432 }
Owen Taylor3473f882001-02-23 17:55:21 +00002433 }
2434}
2435
2436/*
2437 * Macro used to grow the current buffer.
2438 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002439#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002440 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002441 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002442 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002443 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002444 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002445 if (tmp == NULL) goto mem_error; \
2446 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002447}
2448
2449/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002450 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002451 * @ctxt: the parser context
2452 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002453 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002454 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2455 * @end: an end marker xmlChar, 0 if none
2456 * @end2: an end marker xmlChar, 0 if none
2457 * @end3: an end marker xmlChar, 0 if none
2458 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002459 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002460 *
2461 * [67] Reference ::= EntityRef | CharRef
2462 *
2463 * [69] PEReference ::= '%' Name ';'
2464 *
2465 * Returns A newly allocated string with the substitution done. The caller
2466 * must deallocate it !
2467 */
2468xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002469xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2470 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002471 xmlChar *buffer = NULL;
2472 int buffer_size = 0;
2473
2474 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002475 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002476 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002477 xmlEntityPtr ent;
2478 int c,l;
2479 int nbchars = 0;
2480
Daniel Veillarda82b1822004-11-08 16:24:57 +00002481 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002482 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002483 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002484
Daniel Veillard0161e632008-08-28 15:36:32 +00002485 if (((ctxt->depth > 40) &&
2486 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2487 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002488 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002489 return(NULL);
2490 }
2491
2492 /*
2493 * allocate a translation buffer.
2494 */
2495 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002496 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002497 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002498
2499 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002500 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002501 * we are operating on already parsed values.
2502 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002503 if (str < last)
2504 c = CUR_SCHAR(str, l);
2505 else
2506 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002507 while ((c != 0) && (c != end) && /* non input consuming loop */
2508 (c != end2) && (c != end3)) {
2509
2510 if (c == 0) break;
2511 if ((c == '&') && (str[1] == '#')) {
2512 int val = xmlParseStringCharRef(ctxt, &str);
2513 if (val != 0) {
2514 COPY_BUF(0,buffer,nbchars,val);
2515 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002516 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002517 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002518 }
Owen Taylor3473f882001-02-23 17:55:21 +00002519 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2520 if (xmlParserDebugEntities)
2521 xmlGenericError(xmlGenericErrorContext,
2522 "String decoding Entity Reference: %.30s\n",
2523 str);
2524 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002525 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2526 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002527 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002528 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002529 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002530 if ((ent != NULL) &&
2531 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2532 if (ent->content != NULL) {
2533 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002534 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002535 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002536 }
Owen Taylor3473f882001-02-23 17:55:21 +00002537 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002538 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2539 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002540 }
2541 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002542 ctxt->depth++;
2543 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2544 0, 0, 0);
2545 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002546
Owen Taylor3473f882001-02-23 17:55:21 +00002547 if (rep != NULL) {
2548 current = rep;
2549 while (*current != 0) { /* non input consuming loop */
2550 buffer[nbchars++] = *current++;
2551 if (nbchars >
2552 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002553 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2554 goto int_error;
2555 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002556 }
2557 }
2558 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002559 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002560 }
2561 } else if (ent != NULL) {
2562 int i = xmlStrlen(ent->name);
2563 const xmlChar *cur = ent->name;
2564
2565 buffer[nbchars++] = '&';
2566 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002567 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002568 }
2569 for (;i > 0;i--)
2570 buffer[nbchars++] = *cur++;
2571 buffer[nbchars++] = ';';
2572 }
2573 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2574 if (xmlParserDebugEntities)
2575 xmlGenericError(xmlGenericErrorContext,
2576 "String decoding PE Reference: %.30s\n", str);
2577 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002578 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2579 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002580 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002581 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002582 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002583 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002584 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002585 }
Owen Taylor3473f882001-02-23 17:55:21 +00002586 ctxt->depth++;
2587 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2588 0, 0, 0);
2589 ctxt->depth--;
2590 if (rep != NULL) {
2591 current = rep;
2592 while (*current != 0) { /* non input consuming loop */
2593 buffer[nbchars++] = *current++;
2594 if (nbchars >
2595 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002596 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2597 goto int_error;
2598 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002599 }
2600 }
2601 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002602 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002603 }
2604 }
2605 } else {
2606 COPY_BUF(l,buffer,nbchars,c);
2607 str += l;
2608 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002609 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002610 }
2611 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002612 if (str < last)
2613 c = CUR_SCHAR(str, l);
2614 else
2615 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002616 }
2617 buffer[nbchars++] = 0;
2618 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002619
2620mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002621 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002622int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002623 if (rep != NULL)
2624 xmlFree(rep);
2625 if (buffer != NULL)
2626 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002627 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002628}
2629
Daniel Veillarde57ec792003-09-10 10:50:59 +00002630/**
2631 * xmlStringDecodeEntities:
2632 * @ctxt: the parser context
2633 * @str: the input string
2634 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2635 * @end: an end marker xmlChar, 0 if none
2636 * @end2: an end marker xmlChar, 0 if none
2637 * @end3: an end marker xmlChar, 0 if none
2638 *
2639 * Takes a entity string content and process to do the adequate substitutions.
2640 *
2641 * [67] Reference ::= EntityRef | CharRef
2642 *
2643 * [69] PEReference ::= '%' Name ';'
2644 *
2645 * Returns A newly allocated string with the substitution done. The caller
2646 * must deallocate it !
2647 */
2648xmlChar *
2649xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2650 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002651 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002652 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2653 end, end2, end3));
2654}
Owen Taylor3473f882001-02-23 17:55:21 +00002655
2656/************************************************************************
2657 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002658 * Commodity functions, cleanup needed ? *
2659 * *
2660 ************************************************************************/
2661
2662/**
2663 * areBlanks:
2664 * @ctxt: an XML parser context
2665 * @str: a xmlChar *
2666 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002667 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002668 *
2669 * Is this a sequence of blank chars that one can ignore ?
2670 *
2671 * Returns 1 if ignorable 0 otherwise.
2672 */
2673
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002674static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2675 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002676 int i, ret;
2677 xmlNodePtr lastChild;
2678
Daniel Veillard05c13a22001-09-09 08:38:09 +00002679 /*
2680 * Don't spend time trying to differentiate them, the same callback is
2681 * used !
2682 */
2683 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002684 return(0);
2685
Owen Taylor3473f882001-02-23 17:55:21 +00002686 /*
2687 * Check for xml:space value.
2688 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002689 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2690 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002691 return(0);
2692
2693 /*
2694 * Check that the string is made of blanks
2695 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002696 if (blank_chars == 0) {
2697 for (i = 0;i < len;i++)
2698 if (!(IS_BLANK_CH(str[i]))) return(0);
2699 }
Owen Taylor3473f882001-02-23 17:55:21 +00002700
2701 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002702 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002703 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002704 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002705 if (ctxt->myDoc != NULL) {
2706 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2707 if (ret == 0) return(1);
2708 if (ret == 1) return(0);
2709 }
2710
2711 /*
2712 * Otherwise, heuristic :-\
2713 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002714 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002715 if ((ctxt->node->children == NULL) &&
2716 (RAW == '<') && (NXT(1) == '/')) return(0);
2717
2718 lastChild = xmlGetLastChild(ctxt->node);
2719 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002720 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2721 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002722 } else if (xmlNodeIsText(lastChild))
2723 return(0);
2724 else if ((ctxt->node->children != NULL) &&
2725 (xmlNodeIsText(ctxt->node->children)))
2726 return(0);
2727 return(1);
2728}
2729
Owen Taylor3473f882001-02-23 17:55:21 +00002730/************************************************************************
2731 * *
2732 * Extra stuff for namespace support *
2733 * Relates to http://www.w3.org/TR/WD-xml-names *
2734 * *
2735 ************************************************************************/
2736
2737/**
2738 * xmlSplitQName:
2739 * @ctxt: an XML parser context
2740 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002741 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002742 *
2743 * parse an UTF8 encoded XML qualified name string
2744 *
2745 * [NS 5] QName ::= (Prefix ':')? LocalPart
2746 *
2747 * [NS 6] Prefix ::= NCName
2748 *
2749 * [NS 7] LocalPart ::= NCName
2750 *
2751 * Returns the local part, and prefix is updated
2752 * to get the Prefix if any.
2753 */
2754
2755xmlChar *
2756xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2757 xmlChar buf[XML_MAX_NAMELEN + 5];
2758 xmlChar *buffer = NULL;
2759 int len = 0;
2760 int max = XML_MAX_NAMELEN;
2761 xmlChar *ret = NULL;
2762 const xmlChar *cur = name;
2763 int c;
2764
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002765 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002766 *prefix = NULL;
2767
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002768 if (cur == NULL) return(NULL);
2769
Owen Taylor3473f882001-02-23 17:55:21 +00002770#ifndef XML_XML_NAMESPACE
2771 /* xml: prefix is not really a namespace */
2772 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2773 (cur[2] == 'l') && (cur[3] == ':'))
2774 return(xmlStrdup(name));
2775#endif
2776
Daniel Veillard597bc482003-07-24 16:08:28 +00002777 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002778 if (cur[0] == ':')
2779 return(xmlStrdup(name));
2780
2781 c = *cur++;
2782 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2783 buf[len++] = c;
2784 c = *cur++;
2785 }
2786 if (len >= max) {
2787 /*
2788 * Okay someone managed to make a huge name, so he's ready to pay
2789 * for the processing speed.
2790 */
2791 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002792
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002793 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002794 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002795 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002796 return(NULL);
2797 }
2798 memcpy(buffer, buf, len);
2799 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2800 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002801 xmlChar *tmp;
2802
Owen Taylor3473f882001-02-23 17:55:21 +00002803 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002804 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002805 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002806 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002807 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002808 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002809 return(NULL);
2810 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002811 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002812 }
2813 buffer[len++] = c;
2814 c = *cur++;
2815 }
2816 buffer[len] = 0;
2817 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002818
Daniel Veillard597bc482003-07-24 16:08:28 +00002819 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002820 if (buffer != NULL)
2821 xmlFree(buffer);
2822 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002823 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002824 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002825
Owen Taylor3473f882001-02-23 17:55:21 +00002826 if (buffer == NULL)
2827 ret = xmlStrndup(buf, len);
2828 else {
2829 ret = buffer;
2830 buffer = NULL;
2831 max = XML_MAX_NAMELEN;
2832 }
2833
2834
2835 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002836 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002837 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002838 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002839 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002840 }
Owen Taylor3473f882001-02-23 17:55:21 +00002841 len = 0;
2842
Daniel Veillardbb284f42002-10-16 18:02:47 +00002843 /*
2844 * Check that the first character is proper to start
2845 * a new name
2846 */
2847 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2848 ((c >= 0x41) && (c <= 0x5A)) ||
2849 (c == '_') || (c == ':'))) {
2850 int l;
2851 int first = CUR_SCHAR(cur, l);
2852
2853 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002854 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002855 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002856 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002857 }
2858 }
2859 cur++;
2860
Owen Taylor3473f882001-02-23 17:55:21 +00002861 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2862 buf[len++] = c;
2863 c = *cur++;
2864 }
2865 if (len >= max) {
2866 /*
2867 * Okay someone managed to make a huge name, so he's ready to pay
2868 * for the processing speed.
2869 */
2870 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002871
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002872 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002873 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002874 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002875 return(NULL);
2876 }
2877 memcpy(buffer, buf, len);
2878 while (c != 0) { /* tested bigname2.xml */
2879 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002880 xmlChar *tmp;
2881
Owen Taylor3473f882001-02-23 17:55:21 +00002882 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002883 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002884 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002885 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002886 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002887 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(NULL);
2889 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002890 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002891 }
2892 buffer[len++] = c;
2893 c = *cur++;
2894 }
2895 buffer[len] = 0;
2896 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002897
Owen Taylor3473f882001-02-23 17:55:21 +00002898 if (buffer == NULL)
2899 ret = xmlStrndup(buf, len);
2900 else {
2901 ret = buffer;
2902 }
2903 }
2904
2905 return(ret);
2906}
2907
2908/************************************************************************
2909 * *
2910 * The parser itself *
2911 * Relates to http://www.w3.org/TR/REC-xml *
2912 * *
2913 ************************************************************************/
2914
Daniel Veillard34e3f642008-07-29 09:02:27 +00002915/************************************************************************
2916 * *
2917 * Routines to parse Name, NCName and NmToken *
2918 * *
2919 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00002920#ifdef DEBUG
2921static unsigned long nbParseName = 0;
2922static unsigned long nbParseNmToken = 0;
2923static unsigned long nbParseNCName = 0;
2924static unsigned long nbParseNCNameComplex = 0;
2925static unsigned long nbParseNameComplex = 0;
2926static unsigned long nbParseStringName = 0;
2927#endif
2928
Daniel Veillard34e3f642008-07-29 09:02:27 +00002929/*
2930 * The two following functions are related to the change of accepted
2931 * characters for Name and NmToken in the Revision 5 of XML-1.0
2932 * They correspond to the modified production [4] and the new production [4a]
2933 * changes in that revision. Also note that the macros used for the
2934 * productions Letter, Digit, CombiningChar and Extender are not needed
2935 * anymore.
2936 * We still keep compatibility to pre-revision5 parsing semantic if the
2937 * new XML_PARSE_OLD10 option is given to the parser.
2938 */
2939static int
2940xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2941 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2942 /*
2943 * Use the new checks of production [4] [4a] amd [5] of the
2944 * Update 5 of XML-1.0
2945 */
2946 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2947 (((c >= 'a') && (c <= 'z')) ||
2948 ((c >= 'A') && (c <= 'Z')) ||
2949 (c == '_') || (c == ':') ||
2950 ((c >= 0xC0) && (c <= 0xD6)) ||
2951 ((c >= 0xD8) && (c <= 0xF6)) ||
2952 ((c >= 0xF8) && (c <= 0x2FF)) ||
2953 ((c >= 0x370) && (c <= 0x37D)) ||
2954 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2955 ((c >= 0x200C) && (c <= 0x200D)) ||
2956 ((c >= 0x2070) && (c <= 0x218F)) ||
2957 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2958 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2959 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2960 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2961 ((c >= 0x10000) && (c <= 0xEFFFF))))
2962 return(1);
2963 } else {
2964 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2965 return(1);
2966 }
2967 return(0);
2968}
2969
2970static int
2971xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2972 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2973 /*
2974 * Use the new checks of production [4] [4a] amd [5] of the
2975 * Update 5 of XML-1.0
2976 */
2977 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2978 (((c >= 'a') && (c <= 'z')) ||
2979 ((c >= 'A') && (c <= 'Z')) ||
2980 ((c >= '0') && (c <= '9')) || /* !start */
2981 (c == '_') || (c == ':') ||
2982 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2983 ((c >= 0xC0) && (c <= 0xD6)) ||
2984 ((c >= 0xD8) && (c <= 0xF6)) ||
2985 ((c >= 0xF8) && (c <= 0x2FF)) ||
2986 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2987 ((c >= 0x370) && (c <= 0x37D)) ||
2988 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2989 ((c >= 0x200C) && (c <= 0x200D)) ||
2990 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2991 ((c >= 0x2070) && (c <= 0x218F)) ||
2992 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2993 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2994 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2995 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2996 ((c >= 0x10000) && (c <= 0xEFFFF))))
2997 return(1);
2998 } else {
2999 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3000 (c == '.') || (c == '-') ||
3001 (c == '_') || (c == ':') ||
3002 (IS_COMBINING(c)) ||
3003 (IS_EXTENDER(c)))
3004 return(1);
3005 }
3006 return(0);
3007}
3008
Daniel Veillarde57ec792003-09-10 10:50:59 +00003009static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003010 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003011
Daniel Veillard34e3f642008-07-29 09:02:27 +00003012static const xmlChar *
3013xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3014 int len = 0, l;
3015 int c;
3016 int count = 0;
3017
Daniel Veillardc6561462009-03-25 10:22:31 +00003018#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003019 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003020#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003021
3022 /*
3023 * Handler for more complex cases
3024 */
3025 GROW;
3026 c = CUR_CHAR(l);
3027 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3028 /*
3029 * Use the new checks of production [4] [4a] amd [5] of the
3030 * Update 5 of XML-1.0
3031 */
3032 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3033 (!(((c >= 'a') && (c <= 'z')) ||
3034 ((c >= 'A') && (c <= 'Z')) ||
3035 (c == '_') || (c == ':') ||
3036 ((c >= 0xC0) && (c <= 0xD6)) ||
3037 ((c >= 0xD8) && (c <= 0xF6)) ||
3038 ((c >= 0xF8) && (c <= 0x2FF)) ||
3039 ((c >= 0x370) && (c <= 0x37D)) ||
3040 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3041 ((c >= 0x200C) && (c <= 0x200D)) ||
3042 ((c >= 0x2070) && (c <= 0x218F)) ||
3043 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3044 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3045 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3046 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3047 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3048 return(NULL);
3049 }
3050 len += l;
3051 NEXTL(l);
3052 c = CUR_CHAR(l);
3053 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3054 (((c >= 'a') && (c <= 'z')) ||
3055 ((c >= 'A') && (c <= 'Z')) ||
3056 ((c >= '0') && (c <= '9')) || /* !start */
3057 (c == '_') || (c == ':') ||
3058 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3059 ((c >= 0xC0) && (c <= 0xD6)) ||
3060 ((c >= 0xD8) && (c <= 0xF6)) ||
3061 ((c >= 0xF8) && (c <= 0x2FF)) ||
3062 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3063 ((c >= 0x370) && (c <= 0x37D)) ||
3064 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3065 ((c >= 0x200C) && (c <= 0x200D)) ||
3066 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3067 ((c >= 0x2070) && (c <= 0x218F)) ||
3068 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3069 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3070 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3071 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3072 ((c >= 0x10000) && (c <= 0xEFFFF))
3073 )) {
3074 if (count++ > 100) {
3075 count = 0;
3076 GROW;
3077 }
3078 len += l;
3079 NEXTL(l);
3080 c = CUR_CHAR(l);
3081 }
3082 } else {
3083 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3084 (!IS_LETTER(c) && (c != '_') &&
3085 (c != ':'))) {
3086 return(NULL);
3087 }
3088 len += l;
3089 NEXTL(l);
3090 c = CUR_CHAR(l);
3091
3092 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3093 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3094 (c == '.') || (c == '-') ||
3095 (c == '_') || (c == ':') ||
3096 (IS_COMBINING(c)) ||
3097 (IS_EXTENDER(c)))) {
3098 if (count++ > 100) {
3099 count = 0;
3100 GROW;
3101 }
3102 len += l;
3103 NEXTL(l);
3104 c = CUR_CHAR(l);
3105 }
3106 }
3107 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3108 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3109 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3110}
3111
Owen Taylor3473f882001-02-23 17:55:21 +00003112/**
3113 * xmlParseName:
3114 * @ctxt: an XML parser context
3115 *
3116 * parse an XML name.
3117 *
3118 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3119 * CombiningChar | Extender
3120 *
3121 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3122 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003123 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003124 *
3125 * Returns the Name parsed or NULL
3126 */
3127
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003128const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003129xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003130 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003131 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003132 int count = 0;
3133
3134 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003135
Daniel Veillardc6561462009-03-25 10:22:31 +00003136#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003137 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003138#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003139
Daniel Veillard48b2f892001-02-25 16:11:03 +00003140 /*
3141 * Accelerator for simple ASCII names
3142 */
3143 in = ctxt->input->cur;
3144 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3145 ((*in >= 0x41) && (*in <= 0x5A)) ||
3146 (*in == '_') || (*in == ':')) {
3147 in++;
3148 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3149 ((*in >= 0x41) && (*in <= 0x5A)) ||
3150 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003151 (*in == '_') || (*in == '-') ||
3152 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003153 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003154 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003155 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003156 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003157 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003158 ctxt->nbChars += count;
3159 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003160 if (ret == NULL)
3161 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003162 return(ret);
3163 }
3164 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003165 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003166 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003167}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003168
Daniel Veillard34e3f642008-07-29 09:02:27 +00003169static const xmlChar *
3170xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3171 int len = 0, l;
3172 int c;
3173 int count = 0;
3174
Daniel Veillardc6561462009-03-25 10:22:31 +00003175#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003176 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003177#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003178
3179 /*
3180 * Handler for more complex cases
3181 */
3182 GROW;
3183 c = CUR_CHAR(l);
3184 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3185 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3186 return(NULL);
3187 }
3188
3189 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3190 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3191 if (count++ > 100) {
3192 count = 0;
3193 GROW;
3194 }
3195 len += l;
3196 NEXTL(l);
3197 c = CUR_CHAR(l);
3198 }
3199 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3200}
3201
3202/**
3203 * xmlParseNCName:
3204 * @ctxt: an XML parser context
3205 * @len: lenght of the string parsed
3206 *
3207 * parse an XML name.
3208 *
3209 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3210 * CombiningChar | Extender
3211 *
3212 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3213 *
3214 * Returns the Name parsed or NULL
3215 */
3216
3217static const xmlChar *
3218xmlParseNCName(xmlParserCtxtPtr ctxt) {
3219 const xmlChar *in;
3220 const xmlChar *ret;
3221 int count = 0;
3222
Daniel Veillardc6561462009-03-25 10:22:31 +00003223#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003224 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003225#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003226
3227 /*
3228 * Accelerator for simple ASCII names
3229 */
3230 in = ctxt->input->cur;
3231 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3232 ((*in >= 0x41) && (*in <= 0x5A)) ||
3233 (*in == '_')) {
3234 in++;
3235 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3236 ((*in >= 0x41) && (*in <= 0x5A)) ||
3237 ((*in >= 0x30) && (*in <= 0x39)) ||
3238 (*in == '_') || (*in == '-') ||
3239 (*in == '.'))
3240 in++;
3241 if ((*in > 0) && (*in < 0x80)) {
3242 count = in - ctxt->input->cur;
3243 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3244 ctxt->input->cur = in;
3245 ctxt->nbChars += count;
3246 ctxt->input->col += count;
3247 if (ret == NULL) {
3248 xmlErrMemory(ctxt, NULL);
3249 }
3250 return(ret);
3251 }
3252 }
3253 return(xmlParseNCNameComplex(ctxt));
3254}
3255
Daniel Veillard46de64e2002-05-29 08:21:33 +00003256/**
3257 * xmlParseNameAndCompare:
3258 * @ctxt: an XML parser context
3259 *
3260 * parse an XML name and compares for match
3261 * (specialized for endtag parsing)
3262 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003263 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3264 * and the name for mismatch
3265 */
3266
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003267static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003268xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003269 register const xmlChar *cmp = other;
3270 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003271 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003272
3273 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003274
Daniel Veillard46de64e2002-05-29 08:21:33 +00003275 in = ctxt->input->cur;
3276 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003277 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003278 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003279 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003280 }
William M. Brack76e95df2003-10-18 16:20:14 +00003281 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003282 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003283 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003284 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003285 }
3286 /* failure (or end of input buffer), check with full function */
3287 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003288 /* strings coming from the dictionnary direct compare possible */
3289 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003290 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003291 }
3292 return ret;
3293}
3294
Owen Taylor3473f882001-02-23 17:55:21 +00003295/**
3296 * xmlParseStringName:
3297 * @ctxt: an XML parser context
3298 * @str: a pointer to the string pointer (IN/OUT)
3299 *
3300 * parse an XML name.
3301 *
3302 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3303 * CombiningChar | Extender
3304 *
3305 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3306 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003307 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003308 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003309 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003310 * is updated to the current location in the string.
3311 */
3312
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003313static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003314xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3315 xmlChar buf[XML_MAX_NAMELEN + 5];
3316 const xmlChar *cur = *str;
3317 int len = 0, l;
3318 int c;
3319
Daniel Veillardc6561462009-03-25 10:22:31 +00003320#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003321 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003322#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323
Owen Taylor3473f882001-02-23 17:55:21 +00003324 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003325 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003326 return(NULL);
3327 }
3328
Daniel Veillard34e3f642008-07-29 09:02:27 +00003329 COPY_BUF(l,buf,len,c);
3330 cur += l;
3331 c = CUR_SCHAR(cur, l);
3332 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003333 COPY_BUF(l,buf,len,c);
3334 cur += l;
3335 c = CUR_SCHAR(cur, l);
3336 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3337 /*
3338 * Okay someone managed to make a huge name, so he's ready to pay
3339 * for the processing speed.
3340 */
3341 xmlChar *buffer;
3342 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003343
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003344 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003345 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003346 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003347 return(NULL);
3348 }
3349 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003350 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003351 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003352 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003353 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003354 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003355 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003357 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003358 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003359 return(NULL);
3360 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003361 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003362 }
3363 COPY_BUF(l,buffer,len,c);
3364 cur += l;
3365 c = CUR_SCHAR(cur, l);
3366 }
3367 buffer[len] = 0;
3368 *str = cur;
3369 return(buffer);
3370 }
3371 }
3372 *str = cur;
3373 return(xmlStrndup(buf, len));
3374}
3375
3376/**
3377 * xmlParseNmtoken:
3378 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003379 *
Owen Taylor3473f882001-02-23 17:55:21 +00003380 * parse an XML Nmtoken.
3381 *
3382 * [7] Nmtoken ::= (NameChar)+
3383 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003384 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003385 *
3386 * Returns the Nmtoken parsed or NULL
3387 */
3388
3389xmlChar *
3390xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3391 xmlChar buf[XML_MAX_NAMELEN + 5];
3392 int len = 0, l;
3393 int c;
3394 int count = 0;
3395
Daniel Veillardc6561462009-03-25 10:22:31 +00003396#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003397 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003398#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003399
Owen Taylor3473f882001-02-23 17:55:21 +00003400 GROW;
3401 c = CUR_CHAR(l);
3402
Daniel Veillard34e3f642008-07-29 09:02:27 +00003403 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003404 if (count++ > 100) {
3405 count = 0;
3406 GROW;
3407 }
3408 COPY_BUF(l,buf,len,c);
3409 NEXTL(l);
3410 c = CUR_CHAR(l);
3411 if (len >= XML_MAX_NAMELEN) {
3412 /*
3413 * Okay someone managed to make a huge token, so he's ready to pay
3414 * for the processing speed.
3415 */
3416 xmlChar *buffer;
3417 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003418
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003419 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003420 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003421 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003422 return(NULL);
3423 }
3424 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003425 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003426 if (count++ > 100) {
3427 count = 0;
3428 GROW;
3429 }
3430 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003431 xmlChar *tmp;
3432
Owen Taylor3473f882001-02-23 17:55:21 +00003433 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003434 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003435 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003436 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003437 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003438 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003439 return(NULL);
3440 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003441 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003442 }
3443 COPY_BUF(l,buffer,len,c);
3444 NEXTL(l);
3445 c = CUR_CHAR(l);
3446 }
3447 buffer[len] = 0;
3448 return(buffer);
3449 }
3450 }
3451 if (len == 0)
3452 return(NULL);
3453 return(xmlStrndup(buf, len));
3454}
3455
3456/**
3457 * xmlParseEntityValue:
3458 * @ctxt: an XML parser context
3459 * @orig: if non-NULL store a copy of the original entity value
3460 *
3461 * parse a value for ENTITY declarations
3462 *
3463 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3464 * "'" ([^%&'] | PEReference | Reference)* "'"
3465 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003466 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003467 */
3468
3469xmlChar *
3470xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3471 xmlChar *buf = NULL;
3472 int len = 0;
3473 int size = XML_PARSER_BUFFER_SIZE;
3474 int c, l;
3475 xmlChar stop;
3476 xmlChar *ret = NULL;
3477 const xmlChar *cur = NULL;
3478 xmlParserInputPtr input;
3479
3480 if (RAW == '"') stop = '"';
3481 else if (RAW == '\'') stop = '\'';
3482 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003483 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003484 return(NULL);
3485 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003486 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003487 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003488 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003489 return(NULL);
3490 }
3491
3492 /*
3493 * The content of the entity definition is copied in a buffer.
3494 */
3495
3496 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3497 input = ctxt->input;
3498 GROW;
3499 NEXT;
3500 c = CUR_CHAR(l);
3501 /*
3502 * NOTE: 4.4.5 Included in Literal
3503 * When a parameter entity reference appears in a literal entity
3504 * value, ... a single or double quote character in the replacement
3505 * text is always treated as a normal data character and will not
3506 * terminate the literal.
3507 * In practice it means we stop the loop only when back at parsing
3508 * the initial entity and the quote is found
3509 */
William M. Brack871611b2003-10-18 04:53:14 +00003510 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003511 (ctxt->input != input))) {
3512 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003513 xmlChar *tmp;
3514
Owen Taylor3473f882001-02-23 17:55:21 +00003515 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003516 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3517 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003518 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003519 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003520 return(NULL);
3521 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003522 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003523 }
3524 COPY_BUF(l,buf,len,c);
3525 NEXTL(l);
3526 /*
3527 * Pop-up of finished entities.
3528 */
3529 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3530 xmlPopInput(ctxt);
3531
3532 GROW;
3533 c = CUR_CHAR(l);
3534 if (c == 0) {
3535 GROW;
3536 c = CUR_CHAR(l);
3537 }
3538 }
3539 buf[len] = 0;
3540
3541 /*
3542 * Raise problem w.r.t. '&' and '%' being used in non-entities
3543 * reference constructs. Note Charref will be handled in
3544 * xmlStringDecodeEntities()
3545 */
3546 cur = buf;
3547 while (*cur != 0) { /* non input consuming */
3548 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3549 xmlChar *name;
3550 xmlChar tmp = *cur;
3551
3552 cur++;
3553 name = xmlParseStringName(ctxt, &cur);
3554 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003555 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003556 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003557 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003558 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003559 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3560 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003561 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003562 }
3563 if (name != NULL)
3564 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003565 if (*cur == 0)
3566 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003567 }
3568 cur++;
3569 }
3570
3571 /*
3572 * Then PEReference entities are substituted.
3573 */
3574 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003575 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003576 xmlFree(buf);
3577 } else {
3578 NEXT;
3579 /*
3580 * NOTE: 4.4.7 Bypassed
3581 * When a general entity reference appears in the EntityValue in
3582 * an entity declaration, it is bypassed and left as is.
3583 * so XML_SUBSTITUTE_REF is not set here.
3584 */
3585 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3586 0, 0, 0);
3587 if (orig != NULL)
3588 *orig = buf;
3589 else
3590 xmlFree(buf);
3591 }
3592
3593 return(ret);
3594}
3595
3596/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003597 * xmlParseAttValueComplex:
3598 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003599 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003600 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003601 *
3602 * parse a value for an attribute, this is the fallback function
3603 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003604 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003605 *
3606 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3607 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003608static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003609xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003610 xmlChar limit = 0;
3611 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003612 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003613 int len = 0;
3614 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003615 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003616 xmlChar *current = NULL;
3617 xmlEntityPtr ent;
3618
Owen Taylor3473f882001-02-23 17:55:21 +00003619 if (NXT(0) == '"') {
3620 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3621 limit = '"';
3622 NEXT;
3623 } else if (NXT(0) == '\'') {
3624 limit = '\'';
3625 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3626 NEXT;
3627 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003628 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003629 return(NULL);
3630 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003631
Owen Taylor3473f882001-02-23 17:55:21 +00003632 /*
3633 * allocate a translation buffer.
3634 */
3635 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003636 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003637 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003638
3639 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003640 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003641 */
3642 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003643 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003644 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003645 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003646 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003647 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003648 if (NXT(1) == '#') {
3649 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003650
Owen Taylor3473f882001-02-23 17:55:21 +00003651 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003652 if (ctxt->replaceEntities) {
3653 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003654 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003655 }
3656 buf[len++] = '&';
3657 } else {
3658 /*
3659 * The reparsing will be done in xmlStringGetNodeList()
3660 * called by the attribute() function in SAX.c
3661 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003662 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003663 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003664 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003665 buf[len++] = '&';
3666 buf[len++] = '#';
3667 buf[len++] = '3';
3668 buf[len++] = '8';
3669 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003670 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003671 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003672 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003673 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003674 }
Owen Taylor3473f882001-02-23 17:55:21 +00003675 len += xmlCopyChar(0, &buf[len], val);
3676 }
3677 } else {
3678 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003679 ctxt->nbentities++;
3680 if (ent != NULL)
3681 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003682 if ((ent != NULL) &&
3683 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3684 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003685 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003686 }
3687 if ((ctxt->replaceEntities == 0) &&
3688 (ent->content[0] == '&')) {
3689 buf[len++] = '&';
3690 buf[len++] = '#';
3691 buf[len++] = '3';
3692 buf[len++] = '8';
3693 buf[len++] = ';';
3694 } else {
3695 buf[len++] = ent->content[0];
3696 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003697 } else if ((ent != NULL) &&
3698 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003699 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3700 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003701 XML_SUBSTITUTE_REF,
3702 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003703 if (rep != NULL) {
3704 current = rep;
3705 while (*current != 0) { /* non input consuming */
3706 buf[len++] = *current++;
3707 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003708 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003709 }
3710 }
3711 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003712 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003713 }
3714 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003715 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003716 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003717 }
Owen Taylor3473f882001-02-23 17:55:21 +00003718 if (ent->content != NULL)
3719 buf[len++] = ent->content[0];
3720 }
3721 } else if (ent != NULL) {
3722 int i = xmlStrlen(ent->name);
3723 const xmlChar *cur = ent->name;
3724
3725 /*
3726 * This may look absurd but is needed to detect
3727 * entities problems
3728 */
3729 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3730 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003731 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003732 XML_SUBSTITUTE_REF, 0, 0, 0);
3733 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003734 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003735 rep = NULL;
3736 }
Owen Taylor3473f882001-02-23 17:55:21 +00003737 }
3738
3739 /*
3740 * Just output the reference
3741 */
3742 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003743 while (len > buf_size - i - 10) {
3744 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003745 }
3746 for (;i > 0;i--)
3747 buf[len++] = *cur++;
3748 buf[len++] = ';';
3749 }
3750 }
3751 } else {
3752 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003753 if ((len != 0) || (!normalize)) {
3754 if ((!normalize) || (!in_space)) {
3755 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003756 while (len > buf_size - 10) {
3757 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003758 }
3759 }
3760 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003761 }
3762 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003763 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003764 COPY_BUF(l,buf,len,c);
3765 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003766 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003767 }
3768 }
3769 NEXTL(l);
3770 }
3771 GROW;
3772 c = CUR_CHAR(l);
3773 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003774 if ((in_space) && (normalize)) {
3775 while (buf[len - 1] == 0x20) len--;
3776 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003777 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003778 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003779 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003780 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003781 if ((c != 0) && (!IS_CHAR(c))) {
3782 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3783 "invalid character in attribute value\n");
3784 } else {
3785 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3786 "AttValue: ' expected\n");
3787 }
Owen Taylor3473f882001-02-23 17:55:21 +00003788 } else
3789 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003790 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003791 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003792
3793mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003794 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003795 if (buf != NULL)
3796 xmlFree(buf);
3797 if (rep != NULL)
3798 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003799 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003800}
3801
3802/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003803 * xmlParseAttValue:
3804 * @ctxt: an XML parser context
3805 *
3806 * parse a value for an attribute
3807 * Note: the parser won't do substitution of entities here, this
3808 * will be handled later in xmlStringGetNodeList
3809 *
3810 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3811 * "'" ([^<&'] | Reference)* "'"
3812 *
3813 * 3.3.3 Attribute-Value Normalization:
3814 * Before the value of an attribute is passed to the application or
3815 * checked for validity, the XML processor must normalize it as follows:
3816 * - a character reference is processed by appending the referenced
3817 * character to the attribute value
3818 * - an entity reference is processed by recursively processing the
3819 * replacement text of the entity
3820 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3821 * appending #x20 to the normalized value, except that only a single
3822 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3823 * parsed entity or the literal entity value of an internal parsed entity
3824 * - other characters are processed by appending them to the normalized value
3825 * If the declared value is not CDATA, then the XML processor must further
3826 * process the normalized attribute value by discarding any leading and
3827 * trailing space (#x20) characters, and by replacing sequences of space
3828 * (#x20) characters by a single space (#x20) character.
3829 * All attributes for which no declaration has been read should be treated
3830 * by a non-validating parser as if declared CDATA.
3831 *
3832 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3833 */
3834
3835
3836xmlChar *
3837xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003838 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003839 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003840}
3841
3842/**
Owen Taylor3473f882001-02-23 17:55:21 +00003843 * xmlParseSystemLiteral:
3844 * @ctxt: an XML parser context
3845 *
3846 * parse an XML Literal
3847 *
3848 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3849 *
3850 * Returns the SystemLiteral parsed or NULL
3851 */
3852
3853xmlChar *
3854xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3855 xmlChar *buf = NULL;
3856 int len = 0;
3857 int size = XML_PARSER_BUFFER_SIZE;
3858 int cur, l;
3859 xmlChar stop;
3860 int state = ctxt->instate;
3861 int count = 0;
3862
3863 SHRINK;
3864 if (RAW == '"') {
3865 NEXT;
3866 stop = '"';
3867 } else if (RAW == '\'') {
3868 NEXT;
3869 stop = '\'';
3870 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003871 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003872 return(NULL);
3873 }
3874
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003875 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003876 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003877 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003878 return(NULL);
3879 }
3880 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3881 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003882 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003883 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003884 xmlChar *tmp;
3885
Owen Taylor3473f882001-02-23 17:55:21 +00003886 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003887 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3888 if (tmp == NULL) {
3889 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003890 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003891 ctxt->instate = (xmlParserInputState) state;
3892 return(NULL);
3893 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003894 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003895 }
3896 count++;
3897 if (count > 50) {
3898 GROW;
3899 count = 0;
3900 }
3901 COPY_BUF(l,buf,len,cur);
3902 NEXTL(l);
3903 cur = CUR_CHAR(l);
3904 if (cur == 0) {
3905 GROW;
3906 SHRINK;
3907 cur = CUR_CHAR(l);
3908 }
3909 }
3910 buf[len] = 0;
3911 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003912 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003913 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003914 } else {
3915 NEXT;
3916 }
3917 return(buf);
3918}
3919
3920/**
3921 * xmlParsePubidLiteral:
3922 * @ctxt: an XML parser context
3923 *
3924 * parse an XML public literal
3925 *
3926 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3927 *
3928 * Returns the PubidLiteral parsed or NULL.
3929 */
3930
3931xmlChar *
3932xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3933 xmlChar *buf = NULL;
3934 int len = 0;
3935 int size = XML_PARSER_BUFFER_SIZE;
3936 xmlChar cur;
3937 xmlChar stop;
3938 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003939 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003940
3941 SHRINK;
3942 if (RAW == '"') {
3943 NEXT;
3944 stop = '"';
3945 } else if (RAW == '\'') {
3946 NEXT;
3947 stop = '\'';
3948 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003949 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003950 return(NULL);
3951 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003952 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003953 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003954 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003955 return(NULL);
3956 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003957 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003958 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003959 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003960 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003961 xmlChar *tmp;
3962
Owen Taylor3473f882001-02-23 17:55:21 +00003963 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003964 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3965 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003966 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003967 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003968 return(NULL);
3969 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003970 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003971 }
3972 buf[len++] = cur;
3973 count++;
3974 if (count > 50) {
3975 GROW;
3976 count = 0;
3977 }
3978 NEXT;
3979 cur = CUR;
3980 if (cur == 0) {
3981 GROW;
3982 SHRINK;
3983 cur = CUR;
3984 }
3985 }
3986 buf[len] = 0;
3987 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003988 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003989 } else {
3990 NEXT;
3991 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003992 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003993 return(buf);
3994}
3995
Daniel Veillard48b2f892001-02-25 16:11:03 +00003996void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003997
3998/*
3999 * used for the test in the inner loop of the char data testing
4000 */
4001static const unsigned char test_char_data[256] = {
4002 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4003 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4004 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4005 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4006 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4007 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4008 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4009 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4010 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4011 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4012 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4013 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4014 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4015 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4016 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4017 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4018 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4019 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4020 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4021 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4022 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4023 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4025 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4028 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4029 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4030 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4031 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4032 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4033 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4034};
4035
Owen Taylor3473f882001-02-23 17:55:21 +00004036/**
4037 * xmlParseCharData:
4038 * @ctxt: an XML parser context
4039 * @cdata: int indicating whether we are within a CDATA section
4040 *
4041 * parse a CharData section.
4042 * if we are within a CDATA section ']]>' marks an end of section.
4043 *
4044 * The right angle bracket (>) may be represented using the string "&gt;",
4045 * and must, for compatibility, be escaped using "&gt;" or a character
4046 * reference when it appears in the string "]]>" in content, when that
4047 * string is not marking the end of a CDATA section.
4048 *
4049 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4050 */
4051
4052void
4053xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004054 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004055 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004056 int line = ctxt->input->line;
4057 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004058 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004059
4060 SHRINK;
4061 GROW;
4062 /*
4063 * Accelerated common case where input don't need to be
4064 * modified before passing it to the handler.
4065 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004066 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004067 in = ctxt->input->cur;
4068 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004069get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004070 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004071 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004072 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004073 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004074 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004075 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004076 goto get_more_space;
4077 }
4078 if (*in == '<') {
4079 nbchar = in - ctxt->input->cur;
4080 if (nbchar > 0) {
4081 const xmlChar *tmp = ctxt->input->cur;
4082 ctxt->input->cur = in;
4083
Daniel Veillard34099b42004-11-04 17:34:35 +00004084 if ((ctxt->sax != NULL) &&
4085 (ctxt->sax->ignorableWhitespace !=
4086 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004087 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004088 if (ctxt->sax->ignorableWhitespace != NULL)
4089 ctxt->sax->ignorableWhitespace(ctxt->userData,
4090 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004091 } else {
4092 if (ctxt->sax->characters != NULL)
4093 ctxt->sax->characters(ctxt->userData,
4094 tmp, nbchar);
4095 if (*ctxt->space == -1)
4096 *ctxt->space = -2;
4097 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004098 } else if ((ctxt->sax != NULL) &&
4099 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004100 ctxt->sax->characters(ctxt->userData,
4101 tmp, nbchar);
4102 }
4103 }
4104 return;
4105 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004106
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004107get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004108 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004109 while (test_char_data[*in]) {
4110 in++;
4111 ccol++;
4112 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004113 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004114 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004115 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004116 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004117 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004118 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004119 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004120 }
4121 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004122 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004123 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004124 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004125 return;
4126 }
4127 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004128 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004129 goto get_more;
4130 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004131 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004132 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004133 if ((ctxt->sax != NULL) &&
4134 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004135 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004136 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004137 const xmlChar *tmp = ctxt->input->cur;
4138 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004139
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004140 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004141 if (ctxt->sax->ignorableWhitespace != NULL)
4142 ctxt->sax->ignorableWhitespace(ctxt->userData,
4143 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004144 } else {
4145 if (ctxt->sax->characters != NULL)
4146 ctxt->sax->characters(ctxt->userData,
4147 tmp, nbchar);
4148 if (*ctxt->space == -1)
4149 *ctxt->space = -2;
4150 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004151 line = ctxt->input->line;
4152 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004153 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004154 if (ctxt->sax->characters != NULL)
4155 ctxt->sax->characters(ctxt->userData,
4156 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004157 line = ctxt->input->line;
4158 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004159 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004160 /* something really bad happened in the SAX callback */
4161 if (ctxt->instate != XML_PARSER_CONTENT)
4162 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004163 }
4164 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004165 if (*in == 0xD) {
4166 in++;
4167 if (*in == 0xA) {
4168 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004169 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004170 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004171 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004172 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004173 in--;
4174 }
4175 if (*in == '<') {
4176 return;
4177 }
4178 if (*in == '&') {
4179 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004180 }
4181 SHRINK;
4182 GROW;
4183 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004184 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004185 nbchar = 0;
4186 }
Daniel Veillard50582112001-03-26 22:52:16 +00004187 ctxt->input->line = line;
4188 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004189 xmlParseCharDataComplex(ctxt, cdata);
4190}
4191
Daniel Veillard01c13b52002-12-10 15:19:08 +00004192/**
4193 * xmlParseCharDataComplex:
4194 * @ctxt: an XML parser context
4195 * @cdata: int indicating whether we are within a CDATA section
4196 *
4197 * parse a CharData section.this is the fallback function
4198 * of xmlParseCharData() when the parsing requires handling
4199 * of non-ASCII characters.
4200 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004201void
4202xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004203 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4204 int nbchar = 0;
4205 int cur, l;
4206 int count = 0;
4207
4208 SHRINK;
4209 GROW;
4210 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004211 while ((cur != '<') && /* checked */
4212 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004213 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004214 if ((cur == ']') && (NXT(1) == ']') &&
4215 (NXT(2) == '>')) {
4216 if (cdata) break;
4217 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004218 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004219 }
4220 }
4221 COPY_BUF(l,buf,nbchar,cur);
4222 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004223 buf[nbchar] = 0;
4224
Owen Taylor3473f882001-02-23 17:55:21 +00004225 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004226 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004227 */
4228 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004229 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004230 if (ctxt->sax->ignorableWhitespace != NULL)
4231 ctxt->sax->ignorableWhitespace(ctxt->userData,
4232 buf, nbchar);
4233 } else {
4234 if (ctxt->sax->characters != NULL)
4235 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004236 if ((ctxt->sax->characters !=
4237 ctxt->sax->ignorableWhitespace) &&
4238 (*ctxt->space == -1))
4239 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004240 }
4241 }
4242 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004243 /* something really bad happened in the SAX callback */
4244 if (ctxt->instate != XML_PARSER_CONTENT)
4245 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004246 }
4247 count++;
4248 if (count > 50) {
4249 GROW;
4250 count = 0;
4251 }
4252 NEXTL(l);
4253 cur = CUR_CHAR(l);
4254 }
4255 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004256 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004257 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004258 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004259 */
4260 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004261 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004262 if (ctxt->sax->ignorableWhitespace != NULL)
4263 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4264 } else {
4265 if (ctxt->sax->characters != NULL)
4266 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004267 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4268 (*ctxt->space == -1))
4269 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004270 }
4271 }
4272 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004273 if ((cur != 0) && (!IS_CHAR(cur))) {
4274 /* Generate the error and skip the offending character */
4275 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4276 "PCDATA invalid Char value %d\n",
4277 cur);
4278 NEXTL(l);
4279 }
Owen Taylor3473f882001-02-23 17:55:21 +00004280}
4281
4282/**
4283 * xmlParseExternalID:
4284 * @ctxt: an XML parser context
4285 * @publicID: a xmlChar** receiving PubidLiteral
4286 * @strict: indicate whether we should restrict parsing to only
4287 * production [75], see NOTE below
4288 *
4289 * Parse an External ID or a Public ID
4290 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004291 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004292 * 'PUBLIC' S PubidLiteral S SystemLiteral
4293 *
4294 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4295 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4296 *
4297 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4298 *
4299 * Returns the function returns SystemLiteral and in the second
4300 * case publicID receives PubidLiteral, is strict is off
4301 * it is possible to return NULL and have publicID set.
4302 */
4303
4304xmlChar *
4305xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4306 xmlChar *URI = NULL;
4307
4308 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004309
4310 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004311 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004312 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004313 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004314 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4315 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004316 }
4317 SKIP_BLANKS;
4318 URI = xmlParseSystemLiteral(ctxt);
4319 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004320 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004321 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004322 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004323 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004324 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004325 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004326 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004327 }
4328 SKIP_BLANKS;
4329 *publicID = xmlParsePubidLiteral(ctxt);
4330 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004331 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004332 }
4333 if (strict) {
4334 /*
4335 * We don't handle [83] so "S SystemLiteral" is required.
4336 */
William M. Brack76e95df2003-10-18 16:20:14 +00004337 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004338 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004339 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004340 }
4341 } else {
4342 /*
4343 * We handle [83] so we return immediately, if
4344 * "S SystemLiteral" is not detected. From a purely parsing
4345 * point of view that's a nice mess.
4346 */
4347 const xmlChar *ptr;
4348 GROW;
4349
4350 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004351 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004352
William M. Brack76e95df2003-10-18 16:20:14 +00004353 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004354 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4355 }
4356 SKIP_BLANKS;
4357 URI = xmlParseSystemLiteral(ctxt);
4358 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004359 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004360 }
4361 }
4362 return(URI);
4363}
4364
4365/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004366 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004367 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004368 * @buf: the already parsed part of the buffer
4369 * @len: number of bytes filles in the buffer
4370 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004371 *
4372 * Skip an XML (SGML) comment <!-- .... -->
4373 * The spec says that "For compatibility, the string "--" (double-hyphen)
4374 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004375 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004376 *
4377 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4378 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004379static void
4380xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004381 int q, ql;
4382 int r, rl;
4383 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004384 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004385 int inputid;
4386
4387 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004388
Owen Taylor3473f882001-02-23 17:55:21 +00004389 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004390 len = 0;
4391 size = XML_PARSER_BUFFER_SIZE;
4392 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4393 if (buf == NULL) {
4394 xmlErrMemory(ctxt, NULL);
4395 return;
4396 }
Owen Taylor3473f882001-02-23 17:55:21 +00004397 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004398 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004399 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004400 if (q == 0)
4401 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004402 if (!IS_CHAR(q)) {
4403 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4404 "xmlParseComment: invalid xmlChar value %d\n",
4405 q);
4406 xmlFree (buf);
4407 return;
4408 }
Owen Taylor3473f882001-02-23 17:55:21 +00004409 NEXTL(ql);
4410 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004411 if (r == 0)
4412 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004413 if (!IS_CHAR(r)) {
4414 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4415 "xmlParseComment: invalid xmlChar value %d\n",
4416 q);
4417 xmlFree (buf);
4418 return;
4419 }
Owen Taylor3473f882001-02-23 17:55:21 +00004420 NEXTL(rl);
4421 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004422 if (cur == 0)
4423 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004424 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004425 ((cur != '>') ||
4426 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004427 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004428 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004429 }
4430 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004431 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004432 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004433 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4434 if (new_buf == NULL) {
4435 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004436 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004437 return;
4438 }
William M. Bracka3215c72004-07-31 16:24:01 +00004439 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004440 }
4441 COPY_BUF(ql,buf,len,q);
4442 q = r;
4443 ql = rl;
4444 r = cur;
4445 rl = l;
4446
4447 count++;
4448 if (count > 50) {
4449 GROW;
4450 count = 0;
4451 }
4452 NEXTL(l);
4453 cur = CUR_CHAR(l);
4454 if (cur == 0) {
4455 SHRINK;
4456 GROW;
4457 cur = CUR_CHAR(l);
4458 }
4459 }
4460 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004461 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004462 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004463 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004464 } else if (!IS_CHAR(cur)) {
4465 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4466 "xmlParseComment: invalid xmlChar value %d\n",
4467 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004468 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004469 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004470 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4471 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004472 }
4473 NEXT;
4474 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4475 (!ctxt->disableSAX))
4476 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004477 }
Daniel Veillardda629342007-08-01 07:49:06 +00004478 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004479 return;
4480not_terminated:
4481 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4482 "Comment not terminated\n", NULL);
4483 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004484 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004485}
Daniel Veillardda629342007-08-01 07:49:06 +00004486
Daniel Veillard4c778d82005-01-23 17:37:44 +00004487/**
4488 * xmlParseComment:
4489 * @ctxt: an XML parser context
4490 *
4491 * Skip an XML (SGML) comment <!-- .... -->
4492 * The spec says that "For compatibility, the string "--" (double-hyphen)
4493 * must not occur within comments. "
4494 *
4495 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4496 */
4497void
4498xmlParseComment(xmlParserCtxtPtr ctxt) {
4499 xmlChar *buf = NULL;
4500 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004501 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004502 xmlParserInputState state;
4503 const xmlChar *in;
4504 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004505 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004506
4507 /*
4508 * Check that there is a comment right here.
4509 */
4510 if ((RAW != '<') || (NXT(1) != '!') ||
4511 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004512 state = ctxt->instate;
4513 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004514 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004515 SKIP(4);
4516 SHRINK;
4517 GROW;
4518
4519 /*
4520 * Accelerated common case where input don't need to be
4521 * modified before passing it to the handler.
4522 */
4523 in = ctxt->input->cur;
4524 do {
4525 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004526 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004527 ctxt->input->line++; ctxt->input->col = 1;
4528 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004529 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004530 }
4531get_more:
4532 ccol = ctxt->input->col;
4533 while (((*in > '-') && (*in <= 0x7F)) ||
4534 ((*in >= 0x20) && (*in < '-')) ||
4535 (*in == 0x09)) {
4536 in++;
4537 ccol++;
4538 }
4539 ctxt->input->col = ccol;
4540 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004541 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004542 ctxt->input->line++; ctxt->input->col = 1;
4543 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004544 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004545 goto get_more;
4546 }
4547 nbchar = in - ctxt->input->cur;
4548 /*
4549 * save current set of data
4550 */
4551 if (nbchar > 0) {
4552 if ((ctxt->sax != NULL) &&
4553 (ctxt->sax->comment != NULL)) {
4554 if (buf == NULL) {
4555 if ((*in == '-') && (in[1] == '-'))
4556 size = nbchar + 1;
4557 else
4558 size = XML_PARSER_BUFFER_SIZE + nbchar;
4559 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4560 if (buf == NULL) {
4561 xmlErrMemory(ctxt, NULL);
4562 ctxt->instate = state;
4563 return;
4564 }
4565 len = 0;
4566 } else if (len + nbchar + 1 >= size) {
4567 xmlChar *new_buf;
4568 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4569 new_buf = (xmlChar *) xmlRealloc(buf,
4570 size * sizeof(xmlChar));
4571 if (new_buf == NULL) {
4572 xmlFree (buf);
4573 xmlErrMemory(ctxt, NULL);
4574 ctxt->instate = state;
4575 return;
4576 }
4577 buf = new_buf;
4578 }
4579 memcpy(&buf[len], ctxt->input->cur, nbchar);
4580 len += nbchar;
4581 buf[len] = 0;
4582 }
4583 }
4584 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004585 if (*in == 0xA) {
4586 in++;
4587 ctxt->input->line++; ctxt->input->col = 1;
4588 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004589 if (*in == 0xD) {
4590 in++;
4591 if (*in == 0xA) {
4592 ctxt->input->cur = in;
4593 in++;
4594 ctxt->input->line++; ctxt->input->col = 1;
4595 continue; /* while */
4596 }
4597 in--;
4598 }
4599 SHRINK;
4600 GROW;
4601 in = ctxt->input->cur;
4602 if (*in == '-') {
4603 if (in[1] == '-') {
4604 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004605 if (ctxt->input->id != inputid) {
4606 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4607 "comment doesn't start and stop in the same entity\n");
4608 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004609 SKIP(3);
4610 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4611 (!ctxt->disableSAX)) {
4612 if (buf != NULL)
4613 ctxt->sax->comment(ctxt->userData, buf);
4614 else
4615 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4616 }
4617 if (buf != NULL)
4618 xmlFree(buf);
4619 ctxt->instate = state;
4620 return;
4621 }
4622 if (buf != NULL)
4623 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4624 "Comment not terminated \n<!--%.50s\n",
4625 buf);
4626 else
4627 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4628 "Comment not terminated \n", NULL);
4629 in++;
4630 ctxt->input->col++;
4631 }
4632 in++;
4633 ctxt->input->col++;
4634 goto get_more;
4635 }
4636 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4637 xmlParseCommentComplex(ctxt, buf, len, size);
4638 ctxt->instate = state;
4639 return;
4640}
4641
Owen Taylor3473f882001-02-23 17:55:21 +00004642
4643/**
4644 * xmlParsePITarget:
4645 * @ctxt: an XML parser context
4646 *
4647 * parse the name of a PI
4648 *
4649 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4650 *
4651 * Returns the PITarget name or NULL
4652 */
4653
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004654const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004655xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004656 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004657
4658 name = xmlParseName(ctxt);
4659 if ((name != NULL) &&
4660 ((name[0] == 'x') || (name[0] == 'X')) &&
4661 ((name[1] == 'm') || (name[1] == 'M')) &&
4662 ((name[2] == 'l') || (name[2] == 'L'))) {
4663 int i;
4664 if ((name[0] == 'x') && (name[1] == 'm') &&
4665 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004666 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004667 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004668 return(name);
4669 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004670 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004671 return(name);
4672 }
4673 for (i = 0;;i++) {
4674 if (xmlW3CPIs[i] == NULL) break;
4675 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4676 return(name);
4677 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004678 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4679 "xmlParsePITarget: invalid name prefix 'xml'\n",
4680 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004681 }
Daniel Veillard37334572008-07-31 08:20:02 +00004682 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4683 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4684 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4685 }
Owen Taylor3473f882001-02-23 17:55:21 +00004686 return(name);
4687}
4688
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004689#ifdef LIBXML_CATALOG_ENABLED
4690/**
4691 * xmlParseCatalogPI:
4692 * @ctxt: an XML parser context
4693 * @catalog: the PI value string
4694 *
4695 * parse an XML Catalog Processing Instruction.
4696 *
4697 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4698 *
4699 * Occurs only if allowed by the user and if happening in the Misc
4700 * part of the document before any doctype informations
4701 * This will add the given catalog to the parsing context in order
4702 * to be used if there is a resolution need further down in the document
4703 */
4704
4705static void
4706xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4707 xmlChar *URL = NULL;
4708 const xmlChar *tmp, *base;
4709 xmlChar marker;
4710
4711 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004712 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004713 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4714 goto error;
4715 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004716 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004717 if (*tmp != '=') {
4718 return;
4719 }
4720 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004721 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004722 marker = *tmp;
4723 if ((marker != '\'') && (marker != '"'))
4724 goto error;
4725 tmp++;
4726 base = tmp;
4727 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4728 if (*tmp == 0)
4729 goto error;
4730 URL = xmlStrndup(base, tmp - base);
4731 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004732 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004733 if (*tmp != 0)
4734 goto error;
4735
4736 if (URL != NULL) {
4737 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4738 xmlFree(URL);
4739 }
4740 return;
4741
4742error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004743 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4744 "Catalog PI syntax error: %s\n",
4745 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004746 if (URL != NULL)
4747 xmlFree(URL);
4748}
4749#endif
4750
Owen Taylor3473f882001-02-23 17:55:21 +00004751/**
4752 * xmlParsePI:
4753 * @ctxt: an XML parser context
4754 *
4755 * parse an XML Processing Instruction.
4756 *
4757 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4758 *
4759 * The processing is transfered to SAX once parsed.
4760 */
4761
4762void
4763xmlParsePI(xmlParserCtxtPtr ctxt) {
4764 xmlChar *buf = NULL;
4765 int len = 0;
4766 int size = XML_PARSER_BUFFER_SIZE;
4767 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004768 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004769 xmlParserInputState state;
4770 int count = 0;
4771
4772 if ((RAW == '<') && (NXT(1) == '?')) {
4773 xmlParserInputPtr input = ctxt->input;
4774 state = ctxt->instate;
4775 ctxt->instate = XML_PARSER_PI;
4776 /*
4777 * this is a Processing Instruction.
4778 */
4779 SKIP(2);
4780 SHRINK;
4781
4782 /*
4783 * Parse the target name and check for special support like
4784 * namespace.
4785 */
4786 target = xmlParsePITarget(ctxt);
4787 if (target != NULL) {
4788 if ((RAW == '?') && (NXT(1) == '>')) {
4789 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004790 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4791 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004792 }
4793 SKIP(2);
4794
4795 /*
4796 * SAX: PI detected.
4797 */
4798 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4799 (ctxt->sax->processingInstruction != NULL))
4800 ctxt->sax->processingInstruction(ctxt->userData,
4801 target, NULL);
4802 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004803 return;
4804 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004805 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004806 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004807 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004808 ctxt->instate = state;
4809 return;
4810 }
4811 cur = CUR;
4812 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004813 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4814 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004815 }
4816 SKIP_BLANKS;
4817 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004818 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004819 ((cur != '?') || (NXT(1) != '>'))) {
4820 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004821 xmlChar *tmp;
4822
Owen Taylor3473f882001-02-23 17:55:21 +00004823 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004824 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4825 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004826 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004827 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004828 ctxt->instate = state;
4829 return;
4830 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004831 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004832 }
4833 count++;
4834 if (count > 50) {
4835 GROW;
4836 count = 0;
4837 }
4838 COPY_BUF(l,buf,len,cur);
4839 NEXTL(l);
4840 cur = CUR_CHAR(l);
4841 if (cur == 0) {
4842 SHRINK;
4843 GROW;
4844 cur = CUR_CHAR(l);
4845 }
4846 }
4847 buf[len] = 0;
4848 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004849 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4850 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004851 } else {
4852 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004853 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4854 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004855 }
4856 SKIP(2);
4857
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004858#ifdef LIBXML_CATALOG_ENABLED
4859 if (((state == XML_PARSER_MISC) ||
4860 (state == XML_PARSER_START)) &&
4861 (xmlStrEqual(target, XML_CATALOG_PI))) {
4862 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4863 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4864 (allow == XML_CATA_ALLOW_ALL))
4865 xmlParseCatalogPI(ctxt, buf);
4866 }
4867#endif
4868
4869
Owen Taylor3473f882001-02-23 17:55:21 +00004870 /*
4871 * SAX: PI detected.
4872 */
4873 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4874 (ctxt->sax->processingInstruction != NULL))
4875 ctxt->sax->processingInstruction(ctxt->userData,
4876 target, buf);
4877 }
4878 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004879 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004880 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004881 }
4882 ctxt->instate = state;
4883 }
4884}
4885
4886/**
4887 * xmlParseNotationDecl:
4888 * @ctxt: an XML parser context
4889 *
4890 * parse a notation declaration
4891 *
4892 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4893 *
4894 * Hence there is actually 3 choices:
4895 * 'PUBLIC' S PubidLiteral
4896 * 'PUBLIC' S PubidLiteral S SystemLiteral
4897 * and 'SYSTEM' S SystemLiteral
4898 *
4899 * See the NOTE on xmlParseExternalID().
4900 */
4901
4902void
4903xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004904 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004905 xmlChar *Pubid;
4906 xmlChar *Systemid;
4907
Daniel Veillarda07050d2003-10-19 14:46:32 +00004908 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004909 xmlParserInputPtr input = ctxt->input;
4910 SHRINK;
4911 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004912 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004913 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4914 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004915 return;
4916 }
4917 SKIP_BLANKS;
4918
Daniel Veillard76d66f42001-05-16 21:05:17 +00004919 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004920 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004921 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004922 return;
4923 }
William M. Brack76e95df2003-10-18 16:20:14 +00004924 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004925 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004926 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004927 return;
4928 }
Daniel Veillard37334572008-07-31 08:20:02 +00004929 if (xmlStrchr(name, ':') != NULL) {
4930 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4931 "colon are forbidden from notation names '%s'\n",
4932 name, NULL, NULL);
4933 }
Owen Taylor3473f882001-02-23 17:55:21 +00004934 SKIP_BLANKS;
4935
4936 /*
4937 * Parse the IDs.
4938 */
4939 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4940 SKIP_BLANKS;
4941
4942 if (RAW == '>') {
4943 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004944 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4945 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004946 }
4947 NEXT;
4948 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4949 (ctxt->sax->notationDecl != NULL))
4950 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4951 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004952 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004953 }
Owen Taylor3473f882001-02-23 17:55:21 +00004954 if (Systemid != NULL) xmlFree(Systemid);
4955 if (Pubid != NULL) xmlFree(Pubid);
4956 }
4957}
4958
4959/**
4960 * xmlParseEntityDecl:
4961 * @ctxt: an XML parser context
4962 *
4963 * parse <!ENTITY declarations
4964 *
4965 * [70] EntityDecl ::= GEDecl | PEDecl
4966 *
4967 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4968 *
4969 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4970 *
4971 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4972 *
4973 * [74] PEDef ::= EntityValue | ExternalID
4974 *
4975 * [76] NDataDecl ::= S 'NDATA' S Name
4976 *
4977 * [ VC: Notation Declared ]
4978 * The Name must match the declared name of a notation.
4979 */
4980
4981void
4982xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004983 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004984 xmlChar *value = NULL;
4985 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004986 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004987 int isParameter = 0;
4988 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004989 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004990
Daniel Veillard4c778d82005-01-23 17:37:44 +00004991 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004992 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004993 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004994 SHRINK;
4995 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004996 skipped = SKIP_BLANKS;
4997 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004998 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4999 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005000 }
Owen Taylor3473f882001-02-23 17:55:21 +00005001
5002 if (RAW == '%') {
5003 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005004 skipped = SKIP_BLANKS;
5005 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005006 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5007 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005008 }
Owen Taylor3473f882001-02-23 17:55:21 +00005009 isParameter = 1;
5010 }
5011
Daniel Veillard76d66f42001-05-16 21:05:17 +00005012 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005013 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005014 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5015 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005016 return;
5017 }
Daniel Veillard37334572008-07-31 08:20:02 +00005018 if (xmlStrchr(name, ':') != NULL) {
5019 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5020 "colon are forbidden from entities names '%s'\n",
5021 name, NULL, NULL);
5022 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005023 skipped = SKIP_BLANKS;
5024 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005025 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5026 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005027 }
Owen Taylor3473f882001-02-23 17:55:21 +00005028
Daniel Veillardf5582f12002-06-11 10:08:16 +00005029 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005030 /*
5031 * handle the various case of definitions...
5032 */
5033 if (isParameter) {
5034 if ((RAW == '"') || (RAW == '\'')) {
5035 value = xmlParseEntityValue(ctxt, &orig);
5036 if (value) {
5037 if ((ctxt->sax != NULL) &&
5038 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5039 ctxt->sax->entityDecl(ctxt->userData, name,
5040 XML_INTERNAL_PARAMETER_ENTITY,
5041 NULL, NULL, value);
5042 }
5043 } else {
5044 URI = xmlParseExternalID(ctxt, &literal, 1);
5045 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005046 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005047 }
5048 if (URI) {
5049 xmlURIPtr uri;
5050
5051 uri = xmlParseURI((const char *) URI);
5052 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005053 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5054 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005055 /*
5056 * This really ought to be a well formedness error
5057 * but the XML Core WG decided otherwise c.f. issue
5058 * E26 of the XML erratas.
5059 */
Owen Taylor3473f882001-02-23 17:55:21 +00005060 } else {
5061 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005062 /*
5063 * Okay this is foolish to block those but not
5064 * invalid URIs.
5065 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005066 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005067 } else {
5068 if ((ctxt->sax != NULL) &&
5069 (!ctxt->disableSAX) &&
5070 (ctxt->sax->entityDecl != NULL))
5071 ctxt->sax->entityDecl(ctxt->userData, name,
5072 XML_EXTERNAL_PARAMETER_ENTITY,
5073 literal, URI, NULL);
5074 }
5075 xmlFreeURI(uri);
5076 }
5077 }
5078 }
5079 } else {
5080 if ((RAW == '"') || (RAW == '\'')) {
5081 value = xmlParseEntityValue(ctxt, &orig);
5082 if ((ctxt->sax != NULL) &&
5083 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5084 ctxt->sax->entityDecl(ctxt->userData, name,
5085 XML_INTERNAL_GENERAL_ENTITY,
5086 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005087 /*
5088 * For expat compatibility in SAX mode.
5089 */
5090 if ((ctxt->myDoc == NULL) ||
5091 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5092 if (ctxt->myDoc == NULL) {
5093 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005094 if (ctxt->myDoc == NULL) {
5095 xmlErrMemory(ctxt, "New Doc failed");
5096 return;
5097 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005098 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005099 }
5100 if (ctxt->myDoc->intSubset == NULL)
5101 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5102 BAD_CAST "fake", NULL, NULL);
5103
Daniel Veillard1af9a412003-08-20 22:54:39 +00005104 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5105 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005106 }
Owen Taylor3473f882001-02-23 17:55:21 +00005107 } else {
5108 URI = xmlParseExternalID(ctxt, &literal, 1);
5109 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005110 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005111 }
5112 if (URI) {
5113 xmlURIPtr uri;
5114
5115 uri = xmlParseURI((const char *)URI);
5116 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005117 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5118 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005119 /*
5120 * This really ought to be a well formedness error
5121 * but the XML Core WG decided otherwise c.f. issue
5122 * E26 of the XML erratas.
5123 */
Owen Taylor3473f882001-02-23 17:55:21 +00005124 } else {
5125 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005126 /*
5127 * Okay this is foolish to block those but not
5128 * invalid URIs.
5129 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005130 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005131 }
5132 xmlFreeURI(uri);
5133 }
5134 }
William M. Brack76e95df2003-10-18 16:20:14 +00005135 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005136 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5137 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005138 }
5139 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005140 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005141 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005142 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005143 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5144 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005145 }
5146 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005147 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005148 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5149 (ctxt->sax->unparsedEntityDecl != NULL))
5150 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5151 literal, URI, ndata);
5152 } else {
5153 if ((ctxt->sax != NULL) &&
5154 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5155 ctxt->sax->entityDecl(ctxt->userData, name,
5156 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5157 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005158 /*
5159 * For expat compatibility in SAX mode.
5160 * assuming the entity repalcement was asked for
5161 */
5162 if ((ctxt->replaceEntities != 0) &&
5163 ((ctxt->myDoc == NULL) ||
5164 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5165 if (ctxt->myDoc == NULL) {
5166 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005167 if (ctxt->myDoc == NULL) {
5168 xmlErrMemory(ctxt, "New Doc failed");
5169 return;
5170 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005171 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005172 }
5173
5174 if (ctxt->myDoc->intSubset == NULL)
5175 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5176 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005177 xmlSAX2EntityDecl(ctxt, name,
5178 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5179 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005180 }
Owen Taylor3473f882001-02-23 17:55:21 +00005181 }
5182 }
5183 }
5184 SKIP_BLANKS;
5185 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005186 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005187 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005188 } else {
5189 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005190 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5191 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005192 }
5193 NEXT;
5194 }
5195 if (orig != NULL) {
5196 /*
5197 * Ugly mechanism to save the raw entity value.
5198 */
5199 xmlEntityPtr cur = NULL;
5200
5201 if (isParameter) {
5202 if ((ctxt->sax != NULL) &&
5203 (ctxt->sax->getParameterEntity != NULL))
5204 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5205 } else {
5206 if ((ctxt->sax != NULL) &&
5207 (ctxt->sax->getEntity != NULL))
5208 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005209 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005210 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005211 }
Owen Taylor3473f882001-02-23 17:55:21 +00005212 }
5213 if (cur != NULL) {
5214 if (cur->orig != NULL)
5215 xmlFree(orig);
5216 else
5217 cur->orig = orig;
5218 } else
5219 xmlFree(orig);
5220 }
Owen Taylor3473f882001-02-23 17:55:21 +00005221 if (value != NULL) xmlFree(value);
5222 if (URI != NULL) xmlFree(URI);
5223 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005224 }
5225}
5226
5227/**
5228 * xmlParseDefaultDecl:
5229 * @ctxt: an XML parser context
5230 * @value: Receive a possible fixed default value for the attribute
5231 *
5232 * Parse an attribute default declaration
5233 *
5234 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5235 *
5236 * [ VC: Required Attribute ]
5237 * if the default declaration is the keyword #REQUIRED, then the
5238 * attribute must be specified for all elements of the type in the
5239 * attribute-list declaration.
5240 *
5241 * [ VC: Attribute Default Legal ]
5242 * The declared default value must meet the lexical constraints of
5243 * the declared attribute type c.f. xmlValidateAttributeDecl()
5244 *
5245 * [ VC: Fixed Attribute Default ]
5246 * if an attribute has a default value declared with the #FIXED
5247 * keyword, instances of that attribute must match the default value.
5248 *
5249 * [ WFC: No < in Attribute Values ]
5250 * handled in xmlParseAttValue()
5251 *
5252 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5253 * or XML_ATTRIBUTE_FIXED.
5254 */
5255
5256int
5257xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5258 int val;
5259 xmlChar *ret;
5260
5261 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005262 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005263 SKIP(9);
5264 return(XML_ATTRIBUTE_REQUIRED);
5265 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005266 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005267 SKIP(8);
5268 return(XML_ATTRIBUTE_IMPLIED);
5269 }
5270 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005271 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005272 SKIP(6);
5273 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005274 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005275 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5276 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005277 }
5278 SKIP_BLANKS;
5279 }
5280 ret = xmlParseAttValue(ctxt);
5281 ctxt->instate = XML_PARSER_DTD;
5282 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005283 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005284 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005285 } else
5286 *value = ret;
5287 return(val);
5288}
5289
5290/**
5291 * xmlParseNotationType:
5292 * @ctxt: an XML parser context
5293 *
5294 * parse an Notation attribute type.
5295 *
5296 * Note: the leading 'NOTATION' S part has already being parsed...
5297 *
5298 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5299 *
5300 * [ VC: Notation Attributes ]
5301 * Values of this type must match one of the notation names included
5302 * in the declaration; all notation names in the declaration must be declared.
5303 *
5304 * Returns: the notation attribute tree built while parsing
5305 */
5306
5307xmlEnumerationPtr
5308xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005309 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005310 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005311
5312 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005313 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005314 return(NULL);
5315 }
5316 SHRINK;
5317 do {
5318 NEXT;
5319 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005320 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005321 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005322 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5323 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005324 return(ret);
5325 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005326 tmp = ret;
5327 while (tmp != NULL) {
5328 if (xmlStrEqual(name, tmp->name)) {
5329 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5330 "standalone: attribute notation value token %s duplicated\n",
5331 name, NULL);
5332 if (!xmlDictOwns(ctxt->dict, name))
5333 xmlFree((xmlChar *) name);
5334 break;
5335 }
5336 tmp = tmp->next;
5337 }
5338 if (tmp == NULL) {
5339 cur = xmlCreateEnumeration(name);
5340 if (cur == NULL) return(ret);
5341 if (last == NULL) ret = last = cur;
5342 else {
5343 last->next = cur;
5344 last = cur;
5345 }
Owen Taylor3473f882001-02-23 17:55:21 +00005346 }
5347 SKIP_BLANKS;
5348 } while (RAW == '|');
5349 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005350 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005351 if ((last != NULL) && (last != ret))
5352 xmlFreeEnumeration(last);
5353 return(ret);
5354 }
5355 NEXT;
5356 return(ret);
5357}
5358
5359/**
5360 * xmlParseEnumerationType:
5361 * @ctxt: an XML parser context
5362 *
5363 * parse an Enumeration attribute type.
5364 *
5365 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5366 *
5367 * [ VC: Enumeration ]
5368 * Values of this type must match one of the Nmtoken tokens in
5369 * the declaration
5370 *
5371 * Returns: the enumeration attribute tree built while parsing
5372 */
5373
5374xmlEnumerationPtr
5375xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5376 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005377 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005378
5379 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005380 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005381 return(NULL);
5382 }
5383 SHRINK;
5384 do {
5385 NEXT;
5386 SKIP_BLANKS;
5387 name = xmlParseNmtoken(ctxt);
5388 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005389 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005390 return(ret);
5391 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005392 tmp = ret;
5393 while (tmp != NULL) {
5394 if (xmlStrEqual(name, tmp->name)) {
5395 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5396 "standalone: attribute enumeration value token %s duplicated\n",
5397 name, NULL);
5398 if (!xmlDictOwns(ctxt->dict, name))
5399 xmlFree(name);
5400 break;
5401 }
5402 tmp = tmp->next;
5403 }
5404 if (tmp == NULL) {
5405 cur = xmlCreateEnumeration(name);
5406 if (!xmlDictOwns(ctxt->dict, name))
5407 xmlFree(name);
5408 if (cur == NULL) return(ret);
5409 if (last == NULL) ret = last = cur;
5410 else {
5411 last->next = cur;
5412 last = cur;
5413 }
Owen Taylor3473f882001-02-23 17:55:21 +00005414 }
5415 SKIP_BLANKS;
5416 } while (RAW == '|');
5417 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005418 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005419 return(ret);
5420 }
5421 NEXT;
5422 return(ret);
5423}
5424
5425/**
5426 * xmlParseEnumeratedType:
5427 * @ctxt: an XML parser context
5428 * @tree: the enumeration tree built while parsing
5429 *
5430 * parse an Enumerated attribute type.
5431 *
5432 * [57] EnumeratedType ::= NotationType | Enumeration
5433 *
5434 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5435 *
5436 *
5437 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5438 */
5439
5440int
5441xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005442 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005443 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005444 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005445 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5446 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005447 return(0);
5448 }
5449 SKIP_BLANKS;
5450 *tree = xmlParseNotationType(ctxt);
5451 if (*tree == NULL) return(0);
5452 return(XML_ATTRIBUTE_NOTATION);
5453 }
5454 *tree = xmlParseEnumerationType(ctxt);
5455 if (*tree == NULL) return(0);
5456 return(XML_ATTRIBUTE_ENUMERATION);
5457}
5458
5459/**
5460 * xmlParseAttributeType:
5461 * @ctxt: an XML parser context
5462 * @tree: the enumeration tree built while parsing
5463 *
5464 * parse the Attribute list def for an element
5465 *
5466 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5467 *
5468 * [55] StringType ::= 'CDATA'
5469 *
5470 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5471 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5472 *
5473 * Validity constraints for attribute values syntax are checked in
5474 * xmlValidateAttributeValue()
5475 *
5476 * [ VC: ID ]
5477 * Values of type ID must match the Name production. A name must not
5478 * appear more than once in an XML document as a value of this type;
5479 * i.e., ID values must uniquely identify the elements which bear them.
5480 *
5481 * [ VC: One ID per Element Type ]
5482 * No element type may have more than one ID attribute specified.
5483 *
5484 * [ VC: ID Attribute Default ]
5485 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5486 *
5487 * [ VC: IDREF ]
5488 * Values of type IDREF must match the Name production, and values
5489 * of type IDREFS must match Names; each IDREF Name must match the value
5490 * of an ID attribute on some element in the XML document; i.e. IDREF
5491 * values must match the value of some ID attribute.
5492 *
5493 * [ VC: Entity Name ]
5494 * Values of type ENTITY must match the Name production, values
5495 * of type ENTITIES must match Names; each Entity Name must match the
5496 * name of an unparsed entity declared in the DTD.
5497 *
5498 * [ VC: Name Token ]
5499 * Values of type NMTOKEN must match the Nmtoken production; values
5500 * of type NMTOKENS must match Nmtokens.
5501 *
5502 * Returns the attribute type
5503 */
5504int
5505xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5506 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005507 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005508 SKIP(5);
5509 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005510 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005511 SKIP(6);
5512 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005513 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005514 SKIP(5);
5515 return(XML_ATTRIBUTE_IDREF);
5516 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5517 SKIP(2);
5518 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005519 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005520 SKIP(6);
5521 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005522 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005523 SKIP(8);
5524 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005525 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005526 SKIP(8);
5527 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005528 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005529 SKIP(7);
5530 return(XML_ATTRIBUTE_NMTOKEN);
5531 }
5532 return(xmlParseEnumeratedType(ctxt, tree));
5533}
5534
5535/**
5536 * xmlParseAttributeListDecl:
5537 * @ctxt: an XML parser context
5538 *
5539 * : parse the Attribute list def for an element
5540 *
5541 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5542 *
5543 * [53] AttDef ::= S Name S AttType S DefaultDecl
5544 *
5545 */
5546void
5547xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005548 const xmlChar *elemName;
5549 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005550 xmlEnumerationPtr tree;
5551
Daniel Veillarda07050d2003-10-19 14:46:32 +00005552 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005553 xmlParserInputPtr input = ctxt->input;
5554
5555 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005556 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005557 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005558 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005559 }
5560 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005561 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005562 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005563 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5564 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005565 return;
5566 }
5567 SKIP_BLANKS;
5568 GROW;
5569 while (RAW != '>') {
5570 const xmlChar *check = CUR_PTR;
5571 int type;
5572 int def;
5573 xmlChar *defaultValue = NULL;
5574
5575 GROW;
5576 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005577 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005578 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005579 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5580 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005581 break;
5582 }
5583 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005584 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005585 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005586 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005587 break;
5588 }
5589 SKIP_BLANKS;
5590
5591 type = xmlParseAttributeType(ctxt, &tree);
5592 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005593 break;
5594 }
5595
5596 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005597 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005598 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5599 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005600 if (tree != NULL)
5601 xmlFreeEnumeration(tree);
5602 break;
5603 }
5604 SKIP_BLANKS;
5605
5606 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5607 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005608 if (defaultValue != NULL)
5609 xmlFree(defaultValue);
5610 if (tree != NULL)
5611 xmlFreeEnumeration(tree);
5612 break;
5613 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005614 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5615 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005616
5617 GROW;
5618 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005619 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005621 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005622 if (defaultValue != NULL)
5623 xmlFree(defaultValue);
5624 if (tree != NULL)
5625 xmlFreeEnumeration(tree);
5626 break;
5627 }
5628 SKIP_BLANKS;
5629 }
5630 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005631 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5632 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005633 if (defaultValue != NULL)
5634 xmlFree(defaultValue);
5635 if (tree != NULL)
5636 xmlFreeEnumeration(tree);
5637 break;
5638 }
5639 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5640 (ctxt->sax->attributeDecl != NULL))
5641 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5642 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005643 else if (tree != NULL)
5644 xmlFreeEnumeration(tree);
5645
5646 if ((ctxt->sax2) && (defaultValue != NULL) &&
5647 (def != XML_ATTRIBUTE_IMPLIED) &&
5648 (def != XML_ATTRIBUTE_REQUIRED)) {
5649 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5650 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005651 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005652 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5653 }
Owen Taylor3473f882001-02-23 17:55:21 +00005654 if (defaultValue != NULL)
5655 xmlFree(defaultValue);
5656 GROW;
5657 }
5658 if (RAW == '>') {
5659 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005660 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5661 "Attribute list declaration doesn't start and stop in the same entity\n",
5662 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005663 }
5664 NEXT;
5665 }
Owen Taylor3473f882001-02-23 17:55:21 +00005666 }
5667}
5668
5669/**
5670 * xmlParseElementMixedContentDecl:
5671 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005672 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005673 *
5674 * parse the declaration for a Mixed Element content
5675 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5676 *
5677 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5678 * '(' S? '#PCDATA' S? ')'
5679 *
5680 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5681 *
5682 * [ VC: No Duplicate Types ]
5683 * The same name must not appear more than once in a single
5684 * mixed-content declaration.
5685 *
5686 * returns: the list of the xmlElementContentPtr describing the element choices
5687 */
5688xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005689xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005690 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005691 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005692
5693 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005694 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005695 SKIP(7);
5696 SKIP_BLANKS;
5697 SHRINK;
5698 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005699 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005700 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5701"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005702 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005703 }
Owen Taylor3473f882001-02-23 17:55:21 +00005704 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005705 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005706 if (ret == NULL)
5707 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005708 if (RAW == '*') {
5709 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5710 NEXT;
5711 }
5712 return(ret);
5713 }
5714 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005715 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005716 if (ret == NULL) return(NULL);
5717 }
5718 while (RAW == '|') {
5719 NEXT;
5720 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005721 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005722 if (ret == NULL) return(NULL);
5723 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005724 if (cur != NULL)
5725 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005726 cur = ret;
5727 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005728 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005729 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005730 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005731 if (n->c1 != NULL)
5732 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005733 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005734 if (n != NULL)
5735 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005736 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005737 }
5738 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005739 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005740 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005741 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005742 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005743 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005744 return(NULL);
5745 }
5746 SKIP_BLANKS;
5747 GROW;
5748 }
5749 if ((RAW == ')') && (NXT(1) == '*')) {
5750 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005751 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005752 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005753 if (cur->c2 != NULL)
5754 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005755 }
5756 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005757 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005758 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5759"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005760 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005761 }
Owen Taylor3473f882001-02-23 17:55:21 +00005762 SKIP(2);
5763 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005764 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005765 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005766 return(NULL);
5767 }
5768
5769 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005770 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005771 }
5772 return(ret);
5773}
5774
5775/**
5776 * xmlParseElementChildrenContentDecl:
5777 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005778 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005779 *
5780 * parse the declaration for a Mixed Element content
5781 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5782 *
5783 *
5784 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5785 *
5786 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5787 *
5788 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5789 *
5790 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5791 *
5792 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5793 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005794 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005795 * opening or closing parentheses in a choice, seq, or Mixed
5796 * construct is contained in the replacement text for a parameter
5797 * entity, both must be contained in the same replacement text. For
5798 * interoperability, if a parameter-entity reference appears in a
5799 * choice, seq, or Mixed construct, its replacement text should not
5800 * be empty, and neither the first nor last non-blank character of
5801 * the replacement text should be a connector (| or ,).
5802 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005803 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005804 * hierarchy.
5805 */
5806xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005807xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005808 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005809 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005810 xmlChar type = 0;
5811
5812 SKIP_BLANKS;
5813 GROW;
5814 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005815 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005816
Owen Taylor3473f882001-02-23 17:55:21 +00005817 /* Recurse on first child */
5818 NEXT;
5819 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005820 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005821 SKIP_BLANKS;
5822 GROW;
5823 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005824 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005825 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005826 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005827 return(NULL);
5828 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005829 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005830 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005831 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005832 return(NULL);
5833 }
Owen Taylor3473f882001-02-23 17:55:21 +00005834 GROW;
5835 if (RAW == '?') {
5836 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5837 NEXT;
5838 } else if (RAW == '*') {
5839 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5840 NEXT;
5841 } else if (RAW == '+') {
5842 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5843 NEXT;
5844 } else {
5845 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5846 }
Owen Taylor3473f882001-02-23 17:55:21 +00005847 GROW;
5848 }
5849 SKIP_BLANKS;
5850 SHRINK;
5851 while (RAW != ')') {
5852 /*
5853 * Each loop we parse one separator and one element.
5854 */
5855 if (RAW == ',') {
5856 if (type == 0) type = CUR;
5857
5858 /*
5859 * Detect "Name | Name , Name" error
5860 */
5861 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005862 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005863 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005864 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005865 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005866 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005867 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005868 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005869 return(NULL);
5870 }
5871 NEXT;
5872
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005873 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005874 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005875 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005876 xmlFreeDocElementContent(ctxt->myDoc, last);
5877 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005878 return(NULL);
5879 }
5880 if (last == NULL) {
5881 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005882 if (ret != NULL)
5883 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005884 ret = cur = op;
5885 } else {
5886 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005887 if (op != NULL)
5888 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005889 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005890 if (last != NULL)
5891 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005892 cur =op;
5893 last = NULL;
5894 }
5895 } else if (RAW == '|') {
5896 if (type == 0) type = CUR;
5897
5898 /*
5899 * Detect "Name , Name | Name" error
5900 */
5901 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005902 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005903 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005904 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005905 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005906 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005907 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005908 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005909 return(NULL);
5910 }
5911 NEXT;
5912
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005913 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005914 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005915 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005916 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005917 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005918 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005919 return(NULL);
5920 }
5921 if (last == NULL) {
5922 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005923 if (ret != NULL)
5924 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005925 ret = cur = op;
5926 } else {
5927 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005928 if (op != NULL)
5929 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005930 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005931 if (last != NULL)
5932 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005933 cur =op;
5934 last = NULL;
5935 }
5936 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005937 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005938 if ((last != NULL) && (last != ret))
5939 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005940 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005941 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005942 return(NULL);
5943 }
5944 GROW;
5945 SKIP_BLANKS;
5946 GROW;
5947 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005948 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005949 /* Recurse on second child */
5950 NEXT;
5951 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005952 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005953 SKIP_BLANKS;
5954 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005955 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005956 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005957 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005958 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005959 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005960 return(NULL);
5961 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005962 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005963 if (last == NULL) {
5964 if (ret != NULL)
5965 xmlFreeDocElementContent(ctxt->myDoc, ret);
5966 return(NULL);
5967 }
Owen Taylor3473f882001-02-23 17:55:21 +00005968 if (RAW == '?') {
5969 last->ocur = XML_ELEMENT_CONTENT_OPT;
5970 NEXT;
5971 } else if (RAW == '*') {
5972 last->ocur = XML_ELEMENT_CONTENT_MULT;
5973 NEXT;
5974 } else if (RAW == '+') {
5975 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5976 NEXT;
5977 } else {
5978 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5979 }
5980 }
5981 SKIP_BLANKS;
5982 GROW;
5983 }
5984 if ((cur != NULL) && (last != NULL)) {
5985 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005986 if (last != NULL)
5987 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005988 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005989 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005990 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5991"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005992 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005993 }
Owen Taylor3473f882001-02-23 17:55:21 +00005994 NEXT;
5995 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005996 if (ret != NULL) {
5997 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5998 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5999 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6000 else
6001 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6002 }
Owen Taylor3473f882001-02-23 17:55:21 +00006003 NEXT;
6004 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006005 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006006 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006007 cur = ret;
6008 /*
6009 * Some normalization:
6010 * (a | b* | c?)* == (a | b | c)*
6011 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006012 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006013 if ((cur->c1 != NULL) &&
6014 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6015 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6016 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6017 if ((cur->c2 != NULL) &&
6018 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6019 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6020 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6021 cur = cur->c2;
6022 }
6023 }
Owen Taylor3473f882001-02-23 17:55:21 +00006024 NEXT;
6025 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006026 if (ret != NULL) {
6027 int found = 0;
6028
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006029 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6030 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6031 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006032 else
6033 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006034 /*
6035 * Some normalization:
6036 * (a | b*)+ == (a | b)*
6037 * (a | b?)+ == (a | b)*
6038 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006039 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006040 if ((cur->c1 != NULL) &&
6041 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6042 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6043 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6044 found = 1;
6045 }
6046 if ((cur->c2 != NULL) &&
6047 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6048 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6049 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6050 found = 1;
6051 }
6052 cur = cur->c2;
6053 }
6054 if (found)
6055 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6056 }
Owen Taylor3473f882001-02-23 17:55:21 +00006057 NEXT;
6058 }
6059 return(ret);
6060}
6061
6062/**
6063 * xmlParseElementContentDecl:
6064 * @ctxt: an XML parser context
6065 * @name: the name of the element being defined.
6066 * @result: the Element Content pointer will be stored here if any
6067 *
6068 * parse the declaration for an Element content either Mixed or Children,
6069 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6070 *
6071 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6072 *
6073 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6074 */
6075
6076int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006077xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006078 xmlElementContentPtr *result) {
6079
6080 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006081 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006082 int res;
6083
6084 *result = NULL;
6085
6086 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006087 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006088 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006089 return(-1);
6090 }
6091 NEXT;
6092 GROW;
6093 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006094 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006095 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006096 res = XML_ELEMENT_TYPE_MIXED;
6097 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006098 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006099 res = XML_ELEMENT_TYPE_ELEMENT;
6100 }
Owen Taylor3473f882001-02-23 17:55:21 +00006101 SKIP_BLANKS;
6102 *result = tree;
6103 return(res);
6104}
6105
6106/**
6107 * xmlParseElementDecl:
6108 * @ctxt: an XML parser context
6109 *
6110 * parse an Element declaration.
6111 *
6112 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6113 *
6114 * [ VC: Unique Element Type Declaration ]
6115 * No element type may be declared more than once
6116 *
6117 * Returns the type of the element, or -1 in case of error
6118 */
6119int
6120xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006121 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006122 int ret = -1;
6123 xmlElementContentPtr content = NULL;
6124
Daniel Veillard4c778d82005-01-23 17:37:44 +00006125 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006126 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006127 xmlParserInputPtr input = ctxt->input;
6128
6129 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006130 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006131 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6132 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006133 }
6134 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006135 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006136 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006137 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6138 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006139 return(-1);
6140 }
6141 while ((RAW == 0) && (ctxt->inputNr > 1))
6142 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006143 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006144 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6145 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006146 }
6147 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006148 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006149 SKIP(5);
6150 /*
6151 * Element must always be empty.
6152 */
6153 ret = XML_ELEMENT_TYPE_EMPTY;
6154 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6155 (NXT(2) == 'Y')) {
6156 SKIP(3);
6157 /*
6158 * Element is a generic container.
6159 */
6160 ret = XML_ELEMENT_TYPE_ANY;
6161 } else if (RAW == '(') {
6162 ret = xmlParseElementContentDecl(ctxt, name, &content);
6163 } else {
6164 /*
6165 * [ WFC: PEs in Internal Subset ] error handling.
6166 */
6167 if ((RAW == '%') && (ctxt->external == 0) &&
6168 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006169 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006170 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006171 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006172 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006173 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6174 }
Owen Taylor3473f882001-02-23 17:55:21 +00006175 return(-1);
6176 }
6177
6178 SKIP_BLANKS;
6179 /*
6180 * Pop-up of finished entities.
6181 */
6182 while ((RAW == 0) && (ctxt->inputNr > 1))
6183 xmlPopInput(ctxt);
6184 SKIP_BLANKS;
6185
6186 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006187 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006188 if (content != NULL) {
6189 xmlFreeDocElementContent(ctxt->myDoc, content);
6190 }
Owen Taylor3473f882001-02-23 17:55:21 +00006191 } else {
6192 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006193 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6194 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006195 }
6196
6197 NEXT;
6198 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006199 (ctxt->sax->elementDecl != NULL)) {
6200 if (content != NULL)
6201 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006202 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6203 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006204 if ((content != NULL) && (content->parent == NULL)) {
6205 /*
6206 * this is a trick: if xmlAddElementDecl is called,
6207 * instead of copying the full tree it is plugged directly
6208 * if called from the parser. Avoid duplicating the
6209 * interfaces or change the API/ABI
6210 */
6211 xmlFreeDocElementContent(ctxt->myDoc, content);
6212 }
6213 } else if (content != NULL) {
6214 xmlFreeDocElementContent(ctxt->myDoc, content);
6215 }
Owen Taylor3473f882001-02-23 17:55:21 +00006216 }
Owen Taylor3473f882001-02-23 17:55:21 +00006217 }
6218 return(ret);
6219}
6220
6221/**
Owen Taylor3473f882001-02-23 17:55:21 +00006222 * xmlParseConditionalSections
6223 * @ctxt: an XML parser context
6224 *
6225 * [61] conditionalSect ::= includeSect | ignoreSect
6226 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6227 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6228 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6229 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6230 */
6231
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006232static void
Owen Taylor3473f882001-02-23 17:55:21 +00006233xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006234 int id = ctxt->input->id;
6235
Owen Taylor3473f882001-02-23 17:55:21 +00006236 SKIP(3);
6237 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006238 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006239 SKIP(7);
6240 SKIP_BLANKS;
6241 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006242 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006243 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006244 if (ctxt->input->id != id) {
6245 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6246 "All markup of the conditional section is not in the same entity\n",
6247 NULL, NULL);
6248 }
Owen Taylor3473f882001-02-23 17:55:21 +00006249 NEXT;
6250 }
6251 if (xmlParserDebugEntities) {
6252 if ((ctxt->input != NULL) && (ctxt->input->filename))
6253 xmlGenericError(xmlGenericErrorContext,
6254 "%s(%d): ", ctxt->input->filename,
6255 ctxt->input->line);
6256 xmlGenericError(xmlGenericErrorContext,
6257 "Entering INCLUDE Conditional Section\n");
6258 }
6259
6260 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6261 (NXT(2) != '>'))) {
6262 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006263 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006264
6265 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6266 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006267 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006268 NEXT;
6269 } else if (RAW == '%') {
6270 xmlParsePEReference(ctxt);
6271 } else
6272 xmlParseMarkupDecl(ctxt);
6273
6274 /*
6275 * Pop-up of finished entities.
6276 */
6277 while ((RAW == 0) && (ctxt->inputNr > 1))
6278 xmlPopInput(ctxt);
6279
Daniel Veillardfdc91562002-07-01 21:52:03 +00006280 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006281 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006282 break;
6283 }
6284 }
6285 if (xmlParserDebugEntities) {
6286 if ((ctxt->input != NULL) && (ctxt->input->filename))
6287 xmlGenericError(xmlGenericErrorContext,
6288 "%s(%d): ", ctxt->input->filename,
6289 ctxt->input->line);
6290 xmlGenericError(xmlGenericErrorContext,
6291 "Leaving INCLUDE Conditional Section\n");
6292 }
6293
Daniel Veillarda07050d2003-10-19 14:46:32 +00006294 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006295 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006296 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006297 int depth = 0;
6298
6299 SKIP(6);
6300 SKIP_BLANKS;
6301 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006302 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006303 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006304 if (ctxt->input->id != id) {
6305 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6306 "All markup of the conditional section is not in the same entity\n",
6307 NULL, NULL);
6308 }
Owen Taylor3473f882001-02-23 17:55:21 +00006309 NEXT;
6310 }
6311 if (xmlParserDebugEntities) {
6312 if ((ctxt->input != NULL) && (ctxt->input->filename))
6313 xmlGenericError(xmlGenericErrorContext,
6314 "%s(%d): ", ctxt->input->filename,
6315 ctxt->input->line);
6316 xmlGenericError(xmlGenericErrorContext,
6317 "Entering IGNORE Conditional Section\n");
6318 }
6319
6320 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006321 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006322 * But disable SAX event generating DTD building in the meantime
6323 */
6324 state = ctxt->disableSAX;
6325 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006326 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006327 ctxt->instate = XML_PARSER_IGNORE;
6328
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006329 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006330 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6331 depth++;
6332 SKIP(3);
6333 continue;
6334 }
6335 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6336 if (--depth >= 0) SKIP(3);
6337 continue;
6338 }
6339 NEXT;
6340 continue;
6341 }
6342
6343 ctxt->disableSAX = state;
6344 ctxt->instate = instate;
6345
6346 if (xmlParserDebugEntities) {
6347 if ((ctxt->input != NULL) && (ctxt->input->filename))
6348 xmlGenericError(xmlGenericErrorContext,
6349 "%s(%d): ", ctxt->input->filename,
6350 ctxt->input->line);
6351 xmlGenericError(xmlGenericErrorContext,
6352 "Leaving IGNORE Conditional Section\n");
6353 }
6354
6355 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006356 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006357 }
6358
6359 if (RAW == 0)
6360 SHRINK;
6361
6362 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006363 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006364 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006365 if (ctxt->input->id != id) {
6366 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6367 "All markup of the conditional section is not in the same entity\n",
6368 NULL, NULL);
6369 }
Owen Taylor3473f882001-02-23 17:55:21 +00006370 SKIP(3);
6371 }
6372}
6373
6374/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006375 * xmlParseMarkupDecl:
6376 * @ctxt: an XML parser context
6377 *
6378 * parse Markup declarations
6379 *
6380 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6381 * NotationDecl | PI | Comment
6382 *
6383 * [ VC: Proper Declaration/PE Nesting ]
6384 * Parameter-entity replacement text must be properly nested with
6385 * markup declarations. That is to say, if either the first character
6386 * or the last character of a markup declaration (markupdecl above) is
6387 * contained in the replacement text for a parameter-entity reference,
6388 * both must be contained in the same replacement text.
6389 *
6390 * [ WFC: PEs in Internal Subset ]
6391 * In the internal DTD subset, parameter-entity references can occur
6392 * only where markup declarations can occur, not within markup declarations.
6393 * (This does not apply to references that occur in external parameter
6394 * entities or to the external subset.)
6395 */
6396void
6397xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6398 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006399 if (CUR == '<') {
6400 if (NXT(1) == '!') {
6401 switch (NXT(2)) {
6402 case 'E':
6403 if (NXT(3) == 'L')
6404 xmlParseElementDecl(ctxt);
6405 else if (NXT(3) == 'N')
6406 xmlParseEntityDecl(ctxt);
6407 break;
6408 case 'A':
6409 xmlParseAttributeListDecl(ctxt);
6410 break;
6411 case 'N':
6412 xmlParseNotationDecl(ctxt);
6413 break;
6414 case '-':
6415 xmlParseComment(ctxt);
6416 break;
6417 default:
6418 /* there is an error but it will be detected later */
6419 break;
6420 }
6421 } else if (NXT(1) == '?') {
6422 xmlParsePI(ctxt);
6423 }
6424 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006425 /*
6426 * This is only for internal subset. On external entities,
6427 * the replacement is done before parsing stage
6428 */
6429 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6430 xmlParsePEReference(ctxt);
6431
6432 /*
6433 * Conditional sections are allowed from entities included
6434 * by PE References in the internal subset.
6435 */
6436 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6437 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6438 xmlParseConditionalSections(ctxt);
6439 }
6440 }
6441
6442 ctxt->instate = XML_PARSER_DTD;
6443}
6444
6445/**
6446 * xmlParseTextDecl:
6447 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006448 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006449 * parse an XML declaration header for external entities
6450 *
6451 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006452 */
6453
6454void
6455xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6456 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006457 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006458
6459 /*
6460 * We know that '<?xml' is here.
6461 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006462 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006463 SKIP(5);
6464 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006465 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006466 return;
6467 }
6468
William M. Brack76e95df2003-10-18 16:20:14 +00006469 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006470 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6471 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006472 }
6473 SKIP_BLANKS;
6474
6475 /*
6476 * We may have the VersionInfo here.
6477 */
6478 version = xmlParseVersionInfo(ctxt);
6479 if (version == NULL)
6480 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006481 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006482 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006483 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6484 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006485 }
6486 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006487 ctxt->input->version = version;
6488
6489 /*
6490 * We must have the encoding declaration
6491 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006492 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006493 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6494 /*
6495 * The XML REC instructs us to stop parsing right here
6496 */
6497 return;
6498 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006499 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6500 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6501 "Missing encoding in text declaration\n");
6502 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006503
6504 SKIP_BLANKS;
6505 if ((RAW == '?') && (NXT(1) == '>')) {
6506 SKIP(2);
6507 } else if (RAW == '>') {
6508 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006509 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006510 NEXT;
6511 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006512 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006513 MOVETO_ENDTAG(CUR_PTR);
6514 NEXT;
6515 }
6516}
6517
6518/**
Owen Taylor3473f882001-02-23 17:55:21 +00006519 * xmlParseExternalSubset:
6520 * @ctxt: an XML parser context
6521 * @ExternalID: the external identifier
6522 * @SystemID: the system identifier (or URL)
6523 *
6524 * parse Markup declarations from an external subset
6525 *
6526 * [30] extSubset ::= textDecl? extSubsetDecl
6527 *
6528 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6529 */
6530void
6531xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6532 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006533 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006534 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006535
6536 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6537 (ctxt->input->end - ctxt->input->cur >= 4)) {
6538 xmlChar start[4];
6539 xmlCharEncoding enc;
6540
6541 start[0] = RAW;
6542 start[1] = NXT(1);
6543 start[2] = NXT(2);
6544 start[3] = NXT(3);
6545 enc = xmlDetectCharEncoding(start, 4);
6546 if (enc != XML_CHAR_ENCODING_NONE)
6547 xmlSwitchEncoding(ctxt, enc);
6548 }
6549
Daniel Veillarda07050d2003-10-19 14:46:32 +00006550 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006551 xmlParseTextDecl(ctxt);
6552 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6553 /*
6554 * The XML REC instructs us to stop parsing right here
6555 */
6556 ctxt->instate = XML_PARSER_EOF;
6557 return;
6558 }
6559 }
6560 if (ctxt->myDoc == NULL) {
6561 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006562 if (ctxt->myDoc == NULL) {
6563 xmlErrMemory(ctxt, "New Doc failed");
6564 return;
6565 }
6566 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006567 }
6568 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6569 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6570
6571 ctxt->instate = XML_PARSER_DTD;
6572 ctxt->external = 1;
6573 while (((RAW == '<') && (NXT(1) == '?')) ||
6574 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006575 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006576 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006577 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006578
6579 GROW;
6580 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6581 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006582 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006583 NEXT;
6584 } else if (RAW == '%') {
6585 xmlParsePEReference(ctxt);
6586 } else
6587 xmlParseMarkupDecl(ctxt);
6588
6589 /*
6590 * Pop-up of finished entities.
6591 */
6592 while ((RAW == 0) && (ctxt->inputNr > 1))
6593 xmlPopInput(ctxt);
6594
Daniel Veillardfdc91562002-07-01 21:52:03 +00006595 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006596 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006597 break;
6598 }
6599 }
6600
6601 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006602 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006603 }
6604
6605}
6606
6607/**
6608 * xmlParseReference:
6609 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006610 *
Owen Taylor3473f882001-02-23 17:55:21 +00006611 * parse and handle entity references in content, depending on the SAX
6612 * interface, this may end-up in a call to character() if this is a
6613 * CharRef, a predefined entity, if there is no reference() callback.
6614 * or if the parser was asked to switch to that mode.
6615 *
6616 * [67] Reference ::= EntityRef | CharRef
6617 */
6618void
6619xmlParseReference(xmlParserCtxtPtr ctxt) {
6620 xmlEntityPtr ent;
6621 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006622 int was_checked;
6623 xmlNodePtr list = NULL;
6624 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006625
Daniel Veillard0161e632008-08-28 15:36:32 +00006626
6627 if (RAW != '&')
6628 return;
6629
6630 /*
6631 * Simple case of a CharRef
6632 */
Owen Taylor3473f882001-02-23 17:55:21 +00006633 if (NXT(1) == '#') {
6634 int i = 0;
6635 xmlChar out[10];
6636 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006637 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006638
Daniel Veillarddc171602008-03-26 17:41:38 +00006639 if (value == 0)
6640 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006641 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6642 /*
6643 * So we are using non-UTF-8 buffers
6644 * Check that the char fit on 8bits, if not
6645 * generate a CharRef.
6646 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006647 if (value <= 0xFF) {
6648 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006649 out[1] = 0;
6650 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6651 (!ctxt->disableSAX))
6652 ctxt->sax->characters(ctxt->userData, out, 1);
6653 } else {
6654 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006655 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006656 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006657 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006658 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6659 (!ctxt->disableSAX))
6660 ctxt->sax->reference(ctxt->userData, out);
6661 }
6662 } else {
6663 /*
6664 * Just encode the value in UTF-8
6665 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006666 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006667 out[i] = 0;
6668 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6669 (!ctxt->disableSAX))
6670 ctxt->sax->characters(ctxt->userData, out, i);
6671 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006672 return;
6673 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006674
Daniel Veillard0161e632008-08-28 15:36:32 +00006675 /*
6676 * We are seeing an entity reference
6677 */
6678 ent = xmlParseEntityRef(ctxt);
6679 if (ent == NULL) return;
6680 if (!ctxt->wellFormed)
6681 return;
6682 was_checked = ent->checked;
6683
6684 /* special case of predefined entities */
6685 if ((ent->name == NULL) ||
6686 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6687 val = ent->content;
6688 if (val == NULL) return;
6689 /*
6690 * inline the entity.
6691 */
6692 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6693 (!ctxt->disableSAX))
6694 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6695 return;
6696 }
6697
6698 /*
6699 * The first reference to the entity trigger a parsing phase
6700 * where the ent->children is filled with the result from
6701 * the parsing.
6702 */
6703 if (ent->checked == 0) {
6704 unsigned long oldnbent = ctxt->nbentities;
6705
6706 /*
6707 * This is a bit hackish but this seems the best
6708 * way to make sure both SAX and DOM entity support
6709 * behaves okay.
6710 */
6711 void *user_data;
6712 if (ctxt->userData == ctxt)
6713 user_data = NULL;
6714 else
6715 user_data = ctxt->userData;
6716
6717 /*
6718 * Check that this entity is well formed
6719 * 4.3.2: An internal general parsed entity is well-formed
6720 * if its replacement text matches the production labeled
6721 * content.
6722 */
6723 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6724 ctxt->depth++;
6725 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6726 user_data, &list);
6727 ctxt->depth--;
6728
6729 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6730 ctxt->depth++;
6731 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6732 user_data, ctxt->depth, ent->URI,
6733 ent->ExternalID, &list);
6734 ctxt->depth--;
6735 } else {
6736 ret = XML_ERR_ENTITY_PE_INTERNAL;
6737 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6738 "invalid entity type found\n", NULL);
6739 }
6740
6741 /*
6742 * Store the number of entities needing parsing for this entity
6743 * content and do checkings
6744 */
6745 ent->checked = ctxt->nbentities - oldnbent;
6746 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006747 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006748 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006749 return;
6750 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006751 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6752 xmlFreeNodeList(list);
6753 return;
6754 }
Owen Taylor3473f882001-02-23 17:55:21 +00006755
Daniel Veillard0161e632008-08-28 15:36:32 +00006756 if ((ret == XML_ERR_OK) && (list != NULL)) {
6757 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6758 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6759 (ent->children == NULL)) {
6760 ent->children = list;
6761 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006762 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006763 * Prune it directly in the generated document
6764 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006765 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006766 if (((list->type == XML_TEXT_NODE) &&
6767 (list->next == NULL)) ||
6768 (ctxt->parseMode == XML_PARSE_READER)) {
6769 list->parent = (xmlNodePtr) ent;
6770 list = NULL;
6771 ent->owner = 1;
6772 } else {
6773 ent->owner = 0;
6774 while (list != NULL) {
6775 list->parent = (xmlNodePtr) ctxt->node;
6776 list->doc = ctxt->myDoc;
6777 if (list->next == NULL)
6778 ent->last = list;
6779 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006780 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006781 list = ent->children;
6782#ifdef LIBXML_LEGACY_ENABLED
6783 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6784 xmlAddEntityReference(ent, list, NULL);
6785#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006786 }
6787 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006788 ent->owner = 1;
6789 while (list != NULL) {
6790 list->parent = (xmlNodePtr) ent;
6791 if (list->next == NULL)
6792 ent->last = list;
6793 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006794 }
6795 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006796 } else {
6797 xmlFreeNodeList(list);
6798 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006799 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006800 } else if ((ret != XML_ERR_OK) &&
6801 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6802 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6803 "Entity '%s' failed to parse\n", ent->name);
6804 } else if (list != NULL) {
6805 xmlFreeNodeList(list);
6806 list = NULL;
6807 }
6808 if (ent->checked == 0)
6809 ent->checked = 1;
6810 } else if (ent->checked != 1) {
6811 ctxt->nbentities += ent->checked;
6812 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006813
Daniel Veillard0161e632008-08-28 15:36:32 +00006814 /*
6815 * Now that the entity content has been gathered
6816 * provide it to the application, this can take different forms based
6817 * on the parsing modes.
6818 */
6819 if (ent->children == NULL) {
6820 /*
6821 * Probably running in SAX mode and the callbacks don't
6822 * build the entity content. So unless we already went
6823 * though parsing for first checking go though the entity
6824 * content to generate callbacks associated to the entity
6825 */
6826 if (was_checked != 0) {
6827 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006828 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006829 * This is a bit hackish but this seems the best
6830 * way to make sure both SAX and DOM entity support
6831 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006832 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006833 if (ctxt->userData == ctxt)
6834 user_data = NULL;
6835 else
6836 user_data = ctxt->userData;
6837
6838 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6839 ctxt->depth++;
6840 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6841 ent->content, user_data, NULL);
6842 ctxt->depth--;
6843 } else if (ent->etype ==
6844 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6845 ctxt->depth++;
6846 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6847 ctxt->sax, user_data, ctxt->depth,
6848 ent->URI, ent->ExternalID, NULL);
6849 ctxt->depth--;
6850 } else {
6851 ret = XML_ERR_ENTITY_PE_INTERNAL;
6852 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6853 "invalid entity type found\n", NULL);
6854 }
6855 if (ret == XML_ERR_ENTITY_LOOP) {
6856 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6857 return;
6858 }
6859 }
6860 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6861 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6862 /*
6863 * Entity reference callback comes second, it's somewhat
6864 * superfluous but a compatibility to historical behaviour
6865 */
6866 ctxt->sax->reference(ctxt->userData, ent->name);
6867 }
6868 return;
6869 }
6870
6871 /*
6872 * If we didn't get any children for the entity being built
6873 */
6874 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6875 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6876 /*
6877 * Create a node.
6878 */
6879 ctxt->sax->reference(ctxt->userData, ent->name);
6880 return;
6881 }
6882
6883 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6884 /*
6885 * There is a problem on the handling of _private for entities
6886 * (bug 155816): Should we copy the content of the field from
6887 * the entity (possibly overwriting some value set by the user
6888 * when a copy is created), should we leave it alone, or should
6889 * we try to take care of different situations? The problem
6890 * is exacerbated by the usage of this field by the xmlReader.
6891 * To fix this bug, we look at _private on the created node
6892 * and, if it's NULL, we copy in whatever was in the entity.
6893 * If it's not NULL we leave it alone. This is somewhat of a
6894 * hack - maybe we should have further tests to determine
6895 * what to do.
6896 */
6897 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6898 /*
6899 * Seems we are generating the DOM content, do
6900 * a simple tree copy for all references except the first
6901 * In the first occurrence list contains the replacement.
6902 * progressive == 2 means we are operating on the Reader
6903 * and since nodes are discarded we must copy all the time.
6904 */
6905 if (((list == NULL) && (ent->owner == 0)) ||
6906 (ctxt->parseMode == XML_PARSE_READER)) {
6907 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6908
6909 /*
6910 * when operating on a reader, the entities definitions
6911 * are always owning the entities subtree.
6912 if (ctxt->parseMode == XML_PARSE_READER)
6913 ent->owner = 1;
6914 */
6915
6916 cur = ent->children;
6917 while (cur != NULL) {
6918 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6919 if (nw != NULL) {
6920 if (nw->_private == NULL)
6921 nw->_private = cur->_private;
6922 if (firstChild == NULL){
6923 firstChild = nw;
6924 }
6925 nw = xmlAddChild(ctxt->node, nw);
6926 }
6927 if (cur == ent->last) {
6928 /*
6929 * needed to detect some strange empty
6930 * node cases in the reader tests
6931 */
6932 if ((ctxt->parseMode == XML_PARSE_READER) &&
6933 (nw != NULL) &&
6934 (nw->type == XML_ELEMENT_NODE) &&
6935 (nw->children == NULL))
6936 nw->extra = 1;
6937
6938 break;
6939 }
6940 cur = cur->next;
6941 }
6942#ifdef LIBXML_LEGACY_ENABLED
6943 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6944 xmlAddEntityReference(ent, firstChild, nw);
6945#endif /* LIBXML_LEGACY_ENABLED */
6946 } else if (list == NULL) {
6947 xmlNodePtr nw = NULL, cur, next, last,
6948 firstChild = NULL;
6949 /*
6950 * Copy the entity child list and make it the new
6951 * entity child list. The goal is to make sure any
6952 * ID or REF referenced will be the one from the
6953 * document content and not the entity copy.
6954 */
6955 cur = ent->children;
6956 ent->children = NULL;
6957 last = ent->last;
6958 ent->last = NULL;
6959 while (cur != NULL) {
6960 next = cur->next;
6961 cur->next = NULL;
6962 cur->parent = NULL;
6963 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6964 if (nw != NULL) {
6965 if (nw->_private == NULL)
6966 nw->_private = cur->_private;
6967 if (firstChild == NULL){
6968 firstChild = cur;
6969 }
6970 xmlAddChild((xmlNodePtr) ent, nw);
6971 xmlAddChild(ctxt->node, cur);
6972 }
6973 if (cur == last)
6974 break;
6975 cur = next;
6976 }
Daniel Veillardcba68392008-08-29 12:43:40 +00006977 if (ent->owner == 0)
6978 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00006979#ifdef LIBXML_LEGACY_ENABLED
6980 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6981 xmlAddEntityReference(ent, firstChild, nw);
6982#endif /* LIBXML_LEGACY_ENABLED */
6983 } else {
6984 const xmlChar *nbktext;
6985
6986 /*
6987 * the name change is to avoid coalescing of the
6988 * node with a possible previous text one which
6989 * would make ent->children a dangling pointer
6990 */
6991 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6992 -1);
6993 if (ent->children->type == XML_TEXT_NODE)
6994 ent->children->name = nbktext;
6995 if ((ent->last != ent->children) &&
6996 (ent->last->type == XML_TEXT_NODE))
6997 ent->last->name = nbktext;
6998 xmlAddChildList(ctxt->node, ent->children);
6999 }
7000
7001 /*
7002 * This is to avoid a nasty side effect, see
7003 * characters() in SAX.c
7004 */
7005 ctxt->nodemem = 0;
7006 ctxt->nodelen = 0;
7007 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007008 }
7009 }
7010}
7011
7012/**
7013 * xmlParseEntityRef:
7014 * @ctxt: an XML parser context
7015 *
7016 * parse ENTITY references declarations
7017 *
7018 * [68] EntityRef ::= '&' Name ';'
7019 *
7020 * [ WFC: Entity Declared ]
7021 * In a document without any DTD, a document with only an internal DTD
7022 * subset which contains no parameter entity references, or a document
7023 * with "standalone='yes'", the Name given in the entity reference
7024 * must match that in an entity declaration, except that well-formed
7025 * documents need not declare any of the following entities: amp, lt,
7026 * gt, apos, quot. The declaration of a parameter entity must precede
7027 * any reference to it. Similarly, the declaration of a general entity
7028 * must precede any reference to it which appears in a default value in an
7029 * attribute-list declaration. Note that if entities are declared in the
7030 * external subset or in external parameter entities, a non-validating
7031 * processor is not obligated to read and process their declarations;
7032 * for such documents, the rule that an entity must be declared is a
7033 * well-formedness constraint only if standalone='yes'.
7034 *
7035 * [ WFC: Parsed Entity ]
7036 * An entity reference must not contain the name of an unparsed entity
7037 *
7038 * Returns the xmlEntityPtr if found, or NULL otherwise.
7039 */
7040xmlEntityPtr
7041xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007042 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007043 xmlEntityPtr ent = NULL;
7044
7045 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007046
Daniel Veillard0161e632008-08-28 15:36:32 +00007047 if (RAW != '&')
7048 return(NULL);
7049 NEXT;
7050 name = xmlParseName(ctxt);
7051 if (name == NULL) {
7052 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7053 "xmlParseEntityRef: no name\n");
7054 return(NULL);
7055 }
7056 if (RAW != ';') {
7057 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7058 return(NULL);
7059 }
7060 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007061
Daniel Veillard0161e632008-08-28 15:36:32 +00007062 /*
7063 * Predefined entites override any extra definition
7064 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007065 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7066 ent = xmlGetPredefinedEntity(name);
7067 if (ent != NULL)
7068 return(ent);
7069 }
Owen Taylor3473f882001-02-23 17:55:21 +00007070
Daniel Veillard0161e632008-08-28 15:36:32 +00007071 /*
7072 * Increate the number of entity references parsed
7073 */
7074 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007075
Daniel Veillard0161e632008-08-28 15:36:32 +00007076 /*
7077 * Ask first SAX for entity resolution, otherwise try the
7078 * entities which may have stored in the parser context.
7079 */
7080 if (ctxt->sax != NULL) {
7081 if (ctxt->sax->getEntity != NULL)
7082 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007083 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7084 (ctxt->options & XML_PARSE_OLDSAX))
7085 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007086 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7087 (ctxt->userData==ctxt)) {
7088 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007089 }
7090 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007091 /*
7092 * [ WFC: Entity Declared ]
7093 * In a document without any DTD, a document with only an
7094 * internal DTD subset which contains no parameter entity
7095 * references, or a document with "standalone='yes'", the
7096 * Name given in the entity reference must match that in an
7097 * entity declaration, except that well-formed documents
7098 * need not declare any of the following entities: amp, lt,
7099 * gt, apos, quot.
7100 * The declaration of a parameter entity must precede any
7101 * reference to it.
7102 * Similarly, the declaration of a general entity must
7103 * precede any reference to it which appears in a default
7104 * value in an attribute-list declaration. Note that if
7105 * entities are declared in the external subset or in
7106 * external parameter entities, a non-validating processor
7107 * is not obligated to read and process their declarations;
7108 * for such documents, the rule that an entity must be
7109 * declared is a well-formedness constraint only if
7110 * standalone='yes'.
7111 */
7112 if (ent == NULL) {
7113 if ((ctxt->standalone == 1) ||
7114 ((ctxt->hasExternalSubset == 0) &&
7115 (ctxt->hasPErefs == 0))) {
7116 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7117 "Entity '%s' not defined\n", name);
7118 } else {
7119 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7120 "Entity '%s' not defined\n", name);
7121 if ((ctxt->inSubset == 0) &&
7122 (ctxt->sax != NULL) &&
7123 (ctxt->sax->reference != NULL)) {
7124 ctxt->sax->reference(ctxt->userData, name);
7125 }
7126 }
7127 ctxt->valid = 0;
7128 }
7129
7130 /*
7131 * [ WFC: Parsed Entity ]
7132 * An entity reference must not contain the name of an
7133 * unparsed entity
7134 */
7135 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7136 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7137 "Entity reference to unparsed entity %s\n", name);
7138 }
7139
7140 /*
7141 * [ WFC: No External Entity References ]
7142 * Attribute values cannot contain direct or indirect
7143 * entity references to external entities.
7144 */
7145 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7146 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7147 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7148 "Attribute references external entity '%s'\n", name);
7149 }
7150 /*
7151 * [ WFC: No < in Attribute Values ]
7152 * The replacement text of any entity referred to directly or
7153 * indirectly in an attribute value (other than "&lt;") must
7154 * not contain a <.
7155 */
7156 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7157 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007158 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007159 (xmlStrchr(ent->content, '<'))) {
7160 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7161 "'<' in entity '%s' is not allowed in attributes values\n", name);
7162 }
7163
7164 /*
7165 * Internal check, no parameter entities here ...
7166 */
7167 else {
7168 switch (ent->etype) {
7169 case XML_INTERNAL_PARAMETER_ENTITY:
7170 case XML_EXTERNAL_PARAMETER_ENTITY:
7171 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7172 "Attempt to reference the parameter entity '%s'\n",
7173 name);
7174 break;
7175 default:
7176 break;
7177 }
7178 }
7179
7180 /*
7181 * [ WFC: No Recursion ]
7182 * A parsed entity must not contain a recursive reference
7183 * to itself, either directly or indirectly.
7184 * Done somewhere else
7185 */
Owen Taylor3473f882001-02-23 17:55:21 +00007186 return(ent);
7187}
7188
7189/**
7190 * xmlParseStringEntityRef:
7191 * @ctxt: an XML parser context
7192 * @str: a pointer to an index in the string
7193 *
7194 * parse ENTITY references declarations, but this version parses it from
7195 * a string value.
7196 *
7197 * [68] EntityRef ::= '&' Name ';'
7198 *
7199 * [ WFC: Entity Declared ]
7200 * In a document without any DTD, a document with only an internal DTD
7201 * subset which contains no parameter entity references, or a document
7202 * with "standalone='yes'", the Name given in the entity reference
7203 * must match that in an entity declaration, except that well-formed
7204 * documents need not declare any of the following entities: amp, lt,
7205 * gt, apos, quot. The declaration of a parameter entity must precede
7206 * any reference to it. Similarly, the declaration of a general entity
7207 * must precede any reference to it which appears in a default value in an
7208 * attribute-list declaration. Note that if entities are declared in the
7209 * external subset or in external parameter entities, a non-validating
7210 * processor is not obligated to read and process their declarations;
7211 * for such documents, the rule that an entity must be declared is a
7212 * well-formedness constraint only if standalone='yes'.
7213 *
7214 * [ WFC: Parsed Entity ]
7215 * An entity reference must not contain the name of an unparsed entity
7216 *
7217 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7218 * is updated to the current location in the string.
7219 */
7220xmlEntityPtr
7221xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7222 xmlChar *name;
7223 const xmlChar *ptr;
7224 xmlChar cur;
7225 xmlEntityPtr ent = NULL;
7226
7227 if ((str == NULL) || (*str == NULL))
7228 return(NULL);
7229 ptr = *str;
7230 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007231 if (cur != '&')
7232 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007233
Daniel Veillard0161e632008-08-28 15:36:32 +00007234 ptr++;
7235 cur = *ptr;
7236 name = xmlParseStringName(ctxt, &ptr);
7237 if (name == NULL) {
7238 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7239 "xmlParseStringEntityRef: no name\n");
7240 *str = ptr;
7241 return(NULL);
7242 }
7243 if (*ptr != ';') {
7244 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007245 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007246 *str = ptr;
7247 return(NULL);
7248 }
7249 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007250
Owen Taylor3473f882001-02-23 17:55:21 +00007251
Daniel Veillard0161e632008-08-28 15:36:32 +00007252 /*
7253 * Predefined entites override any extra definition
7254 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007255 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7256 ent = xmlGetPredefinedEntity(name);
7257 if (ent != NULL) {
7258 xmlFree(name);
7259 *str = ptr;
7260 return(ent);
7261 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007262 }
Owen Taylor3473f882001-02-23 17:55:21 +00007263
Daniel Veillard0161e632008-08-28 15:36:32 +00007264 /*
7265 * Increate the number of entity references parsed
7266 */
7267 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007268
Daniel Veillard0161e632008-08-28 15:36:32 +00007269 /*
7270 * Ask first SAX for entity resolution, otherwise try the
7271 * entities which may have stored in the parser context.
7272 */
7273 if (ctxt->sax != NULL) {
7274 if (ctxt->sax->getEntity != NULL)
7275 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007276 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7277 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007278 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7279 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007280 }
7281 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007282
7283 /*
7284 * [ WFC: Entity Declared ]
7285 * In a document without any DTD, a document with only an
7286 * internal DTD subset which contains no parameter entity
7287 * references, or a document with "standalone='yes'", the
7288 * Name given in the entity reference must match that in an
7289 * entity declaration, except that well-formed documents
7290 * need not declare any of the following entities: amp, lt,
7291 * gt, apos, quot.
7292 * The declaration of a parameter entity must precede any
7293 * reference to it.
7294 * Similarly, the declaration of a general entity must
7295 * precede any reference to it which appears in a default
7296 * value in an attribute-list declaration. Note that if
7297 * entities are declared in the external subset or in
7298 * external parameter entities, a non-validating processor
7299 * is not obligated to read and process their declarations;
7300 * for such documents, the rule that an entity must be
7301 * declared is a well-formedness constraint only if
7302 * standalone='yes'.
7303 */
7304 if (ent == NULL) {
7305 if ((ctxt->standalone == 1) ||
7306 ((ctxt->hasExternalSubset == 0) &&
7307 (ctxt->hasPErefs == 0))) {
7308 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7309 "Entity '%s' not defined\n", name);
7310 } else {
7311 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7312 "Entity '%s' not defined\n",
7313 name);
7314 }
7315 /* TODO ? check regressions ctxt->valid = 0; */
7316 }
7317
7318 /*
7319 * [ WFC: Parsed Entity ]
7320 * An entity reference must not contain the name of an
7321 * unparsed entity
7322 */
7323 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7324 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7325 "Entity reference to unparsed entity %s\n", name);
7326 }
7327
7328 /*
7329 * [ WFC: No External Entity References ]
7330 * Attribute values cannot contain direct or indirect
7331 * entity references to external entities.
7332 */
7333 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7334 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7335 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7336 "Attribute references external entity '%s'\n", name);
7337 }
7338 /*
7339 * [ WFC: No < in Attribute Values ]
7340 * The replacement text of any entity referred to directly or
7341 * indirectly in an attribute value (other than "&lt;") must
7342 * not contain a <.
7343 */
7344 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7345 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007346 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007347 (xmlStrchr(ent->content, '<'))) {
7348 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7349 "'<' in entity '%s' is not allowed in attributes values\n",
7350 name);
7351 }
7352
7353 /*
7354 * Internal check, no parameter entities here ...
7355 */
7356 else {
7357 switch (ent->etype) {
7358 case XML_INTERNAL_PARAMETER_ENTITY:
7359 case XML_EXTERNAL_PARAMETER_ENTITY:
7360 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7361 "Attempt to reference the parameter entity '%s'\n",
7362 name);
7363 break;
7364 default:
7365 break;
7366 }
7367 }
7368
7369 /*
7370 * [ WFC: No Recursion ]
7371 * A parsed entity must not contain a recursive reference
7372 * to itself, either directly or indirectly.
7373 * Done somewhere else
7374 */
7375
7376 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007377 *str = ptr;
7378 return(ent);
7379}
7380
7381/**
7382 * xmlParsePEReference:
7383 * @ctxt: an XML parser context
7384 *
7385 * parse PEReference declarations
7386 * The entity content is handled directly by pushing it's content as
7387 * a new input stream.
7388 *
7389 * [69] PEReference ::= '%' Name ';'
7390 *
7391 * [ WFC: No Recursion ]
7392 * A parsed entity must not contain a recursive
7393 * reference to itself, either directly or indirectly.
7394 *
7395 * [ WFC: Entity Declared ]
7396 * In a document without any DTD, a document with only an internal DTD
7397 * subset which contains no parameter entity references, or a document
7398 * with "standalone='yes'", ... ... The declaration of a parameter
7399 * entity must precede any reference to it...
7400 *
7401 * [ VC: Entity Declared ]
7402 * In a document with an external subset or external parameter entities
7403 * with "standalone='no'", ... ... The declaration of a parameter entity
7404 * must precede any reference to it...
7405 *
7406 * [ WFC: In DTD ]
7407 * Parameter-entity references may only appear in the DTD.
7408 * NOTE: misleading but this is handled.
7409 */
7410void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007411xmlParsePEReference(xmlParserCtxtPtr ctxt)
7412{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007413 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007414 xmlEntityPtr entity = NULL;
7415 xmlParserInputPtr input;
7416
Daniel Veillard0161e632008-08-28 15:36:32 +00007417 if (RAW != '%')
7418 return;
7419 NEXT;
7420 name = xmlParseName(ctxt);
7421 if (name == NULL) {
7422 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7423 "xmlParsePEReference: no name\n");
7424 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007425 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007426 if (RAW != ';') {
7427 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7428 return;
7429 }
7430
7431 NEXT;
7432
7433 /*
7434 * Increate the number of entity references parsed
7435 */
7436 ctxt->nbentities++;
7437
7438 /*
7439 * Request the entity from SAX
7440 */
7441 if ((ctxt->sax != NULL) &&
7442 (ctxt->sax->getParameterEntity != NULL))
7443 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7444 name);
7445 if (entity == NULL) {
7446 /*
7447 * [ WFC: Entity Declared ]
7448 * In a document without any DTD, a document with only an
7449 * internal DTD subset which contains no parameter entity
7450 * references, or a document with "standalone='yes'", ...
7451 * ... The declaration of a parameter entity must precede
7452 * any reference to it...
7453 */
7454 if ((ctxt->standalone == 1) ||
7455 ((ctxt->hasExternalSubset == 0) &&
7456 (ctxt->hasPErefs == 0))) {
7457 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7458 "PEReference: %%%s; not found\n",
7459 name);
7460 } else {
7461 /*
7462 * [ VC: Entity Declared ]
7463 * In a document with an external subset or external
7464 * parameter entities with "standalone='no'", ...
7465 * ... The declaration of a parameter entity must
7466 * precede any reference to it...
7467 */
7468 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7469 "PEReference: %%%s; not found\n",
7470 name, NULL);
7471 ctxt->valid = 0;
7472 }
7473 } else {
7474 /*
7475 * Internal checking in case the entity quest barfed
7476 */
7477 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7478 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7479 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7480 "Internal: %%%s; is not a parameter entity\n",
7481 name, NULL);
7482 } else if (ctxt->input->free != deallocblankswrapper) {
7483 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7484 if (xmlPushInput(ctxt, input) < 0)
7485 return;
7486 } else {
7487 /*
7488 * TODO !!!
7489 * handle the extra spaces added before and after
7490 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7491 */
7492 input = xmlNewEntityInputStream(ctxt, entity);
7493 if (xmlPushInput(ctxt, input) < 0)
7494 return;
7495 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7496 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7497 (IS_BLANK_CH(NXT(5)))) {
7498 xmlParseTextDecl(ctxt);
7499 if (ctxt->errNo ==
7500 XML_ERR_UNSUPPORTED_ENCODING) {
7501 /*
7502 * The XML REC instructs us to stop parsing
7503 * right here
7504 */
7505 ctxt->instate = XML_PARSER_EOF;
7506 return;
7507 }
7508 }
7509 }
7510 }
7511 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007512}
7513
7514/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007515 * xmlLoadEntityContent:
7516 * @ctxt: an XML parser context
7517 * @entity: an unloaded system entity
7518 *
7519 * Load the original content of the given system entity from the
7520 * ExternalID/SystemID given. This is to be used for Included in Literal
7521 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7522 *
7523 * Returns 0 in case of success and -1 in case of failure
7524 */
7525static int
7526xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7527 xmlParserInputPtr input;
7528 xmlBufferPtr buf;
7529 int l, c;
7530 int count = 0;
7531
7532 if ((ctxt == NULL) || (entity == NULL) ||
7533 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7534 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7535 (entity->content != NULL)) {
7536 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7537 "xmlLoadEntityContent parameter error");
7538 return(-1);
7539 }
7540
7541 if (xmlParserDebugEntities)
7542 xmlGenericError(xmlGenericErrorContext,
7543 "Reading %s entity content input\n", entity->name);
7544
7545 buf = xmlBufferCreate();
7546 if (buf == NULL) {
7547 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7548 "xmlLoadEntityContent parameter error");
7549 return(-1);
7550 }
7551
7552 input = xmlNewEntityInputStream(ctxt, entity);
7553 if (input == NULL) {
7554 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7555 "xmlLoadEntityContent input error");
7556 xmlBufferFree(buf);
7557 return(-1);
7558 }
7559
7560 /*
7561 * Push the entity as the current input, read char by char
7562 * saving to the buffer until the end of the entity or an error
7563 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007564 if (xmlPushInput(ctxt, input) < 0) {
7565 xmlBufferFree(buf);
7566 return(-1);
7567 }
7568
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007569 GROW;
7570 c = CUR_CHAR(l);
7571 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7572 (IS_CHAR(c))) {
7573 xmlBufferAdd(buf, ctxt->input->cur, l);
7574 if (count++ > 100) {
7575 count = 0;
7576 GROW;
7577 }
7578 NEXTL(l);
7579 c = CUR_CHAR(l);
7580 }
7581
7582 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7583 xmlPopInput(ctxt);
7584 } else if (!IS_CHAR(c)) {
7585 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7586 "xmlLoadEntityContent: invalid char value %d\n",
7587 c);
7588 xmlBufferFree(buf);
7589 return(-1);
7590 }
7591 entity->content = buf->content;
7592 buf->content = NULL;
7593 xmlBufferFree(buf);
7594
7595 return(0);
7596}
7597
7598/**
Owen Taylor3473f882001-02-23 17:55:21 +00007599 * xmlParseStringPEReference:
7600 * @ctxt: an XML parser context
7601 * @str: a pointer to an index in the string
7602 *
7603 * parse PEReference declarations
7604 *
7605 * [69] PEReference ::= '%' Name ';'
7606 *
7607 * [ WFC: No Recursion ]
7608 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007609 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007610 *
7611 * [ WFC: Entity Declared ]
7612 * In a document without any DTD, a document with only an internal DTD
7613 * subset which contains no parameter entity references, or a document
7614 * with "standalone='yes'", ... ... The declaration of a parameter
7615 * entity must precede any reference to it...
7616 *
7617 * [ VC: Entity Declared ]
7618 * In a document with an external subset or external parameter entities
7619 * with "standalone='no'", ... ... The declaration of a parameter entity
7620 * must precede any reference to it...
7621 *
7622 * [ WFC: In DTD ]
7623 * Parameter-entity references may only appear in the DTD.
7624 * NOTE: misleading but this is handled.
7625 *
7626 * Returns the string of the entity content.
7627 * str is updated to the current value of the index
7628 */
7629xmlEntityPtr
7630xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7631 const xmlChar *ptr;
7632 xmlChar cur;
7633 xmlChar *name;
7634 xmlEntityPtr entity = NULL;
7635
7636 if ((str == NULL) || (*str == NULL)) return(NULL);
7637 ptr = *str;
7638 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007639 if (cur != '%')
7640 return(NULL);
7641 ptr++;
7642 cur = *ptr;
7643 name = xmlParseStringName(ctxt, &ptr);
7644 if (name == NULL) {
7645 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7646 "xmlParseStringPEReference: no name\n");
7647 *str = ptr;
7648 return(NULL);
7649 }
7650 cur = *ptr;
7651 if (cur != ';') {
7652 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7653 xmlFree(name);
7654 *str = ptr;
7655 return(NULL);
7656 }
7657 ptr++;
7658
7659 /*
7660 * Increate the number of entity references parsed
7661 */
7662 ctxt->nbentities++;
7663
7664 /*
7665 * Request the entity from SAX
7666 */
7667 if ((ctxt->sax != NULL) &&
7668 (ctxt->sax->getParameterEntity != NULL))
7669 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7670 name);
7671 if (entity == NULL) {
7672 /*
7673 * [ WFC: Entity Declared ]
7674 * In a document without any DTD, a document with only an
7675 * internal DTD subset which contains no parameter entity
7676 * references, or a document with "standalone='yes'", ...
7677 * ... The declaration of a parameter entity must precede
7678 * any reference to it...
7679 */
7680 if ((ctxt->standalone == 1) ||
7681 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7682 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7683 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007684 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007685 /*
7686 * [ VC: Entity Declared ]
7687 * In a document with an external subset or external
7688 * parameter entities with "standalone='no'", ...
7689 * ... The declaration of a parameter entity must
7690 * precede any reference to it...
7691 */
7692 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7693 "PEReference: %%%s; not found\n",
7694 name, NULL);
7695 ctxt->valid = 0;
7696 }
7697 } else {
7698 /*
7699 * Internal checking in case the entity quest barfed
7700 */
7701 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7702 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7703 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7704 "%%%s; is not a parameter entity\n",
7705 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007706 }
7707 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007708 ctxt->hasPErefs = 1;
7709 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007710 *str = ptr;
7711 return(entity);
7712}
7713
7714/**
7715 * xmlParseDocTypeDecl:
7716 * @ctxt: an XML parser context
7717 *
7718 * parse a DOCTYPE declaration
7719 *
7720 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7721 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7722 *
7723 * [ VC: Root Element Type ]
7724 * The Name in the document type declaration must match the element
7725 * type of the root element.
7726 */
7727
7728void
7729xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007730 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007731 xmlChar *ExternalID = NULL;
7732 xmlChar *URI = NULL;
7733
7734 /*
7735 * We know that '<!DOCTYPE' has been detected.
7736 */
7737 SKIP(9);
7738
7739 SKIP_BLANKS;
7740
7741 /*
7742 * Parse the DOCTYPE name.
7743 */
7744 name = xmlParseName(ctxt);
7745 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7747 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007748 }
7749 ctxt->intSubName = name;
7750
7751 SKIP_BLANKS;
7752
7753 /*
7754 * Check for SystemID and ExternalID
7755 */
7756 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7757
7758 if ((URI != NULL) || (ExternalID != NULL)) {
7759 ctxt->hasExternalSubset = 1;
7760 }
7761 ctxt->extSubURI = URI;
7762 ctxt->extSubSystem = ExternalID;
7763
7764 SKIP_BLANKS;
7765
7766 /*
7767 * Create and update the internal subset.
7768 */
7769 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7770 (!ctxt->disableSAX))
7771 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7772
7773 /*
7774 * Is there any internal subset declarations ?
7775 * they are handled separately in xmlParseInternalSubset()
7776 */
7777 if (RAW == '[')
7778 return;
7779
7780 /*
7781 * We should be at the end of the DOCTYPE declaration.
7782 */
7783 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007784 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007785 }
7786 NEXT;
7787}
7788
7789/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007790 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007791 * @ctxt: an XML parser context
7792 *
7793 * parse the internal subset declaration
7794 *
7795 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7796 */
7797
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007798static void
Owen Taylor3473f882001-02-23 17:55:21 +00007799xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7800 /*
7801 * Is there any DTD definition ?
7802 */
7803 if (RAW == '[') {
7804 ctxt->instate = XML_PARSER_DTD;
7805 NEXT;
7806 /*
7807 * Parse the succession of Markup declarations and
7808 * PEReferences.
7809 * Subsequence (markupdecl | PEReference | S)*
7810 */
7811 while (RAW != ']') {
7812 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007813 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007814
7815 SKIP_BLANKS;
7816 xmlParseMarkupDecl(ctxt);
7817 xmlParsePEReference(ctxt);
7818
7819 /*
7820 * Pop-up of finished entities.
7821 */
7822 while ((RAW == 0) && (ctxt->inputNr > 1))
7823 xmlPopInput(ctxt);
7824
7825 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007826 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007827 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007828 break;
7829 }
7830 }
7831 if (RAW == ']') {
7832 NEXT;
7833 SKIP_BLANKS;
7834 }
7835 }
7836
7837 /*
7838 * We should be at the end of the DOCTYPE declaration.
7839 */
7840 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007841 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007842 }
7843 NEXT;
7844}
7845
Daniel Veillard81273902003-09-30 00:43:48 +00007846#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007847/**
7848 * xmlParseAttribute:
7849 * @ctxt: an XML parser context
7850 * @value: a xmlChar ** used to store the value of the attribute
7851 *
7852 * parse an attribute
7853 *
7854 * [41] Attribute ::= Name Eq AttValue
7855 *
7856 * [ WFC: No External Entity References ]
7857 * Attribute values cannot contain direct or indirect entity references
7858 * to external entities.
7859 *
7860 * [ WFC: No < in Attribute Values ]
7861 * The replacement text of any entity referred to directly or indirectly in
7862 * an attribute value (other than "&lt;") must not contain a <.
7863 *
7864 * [ VC: Attribute Value Type ]
7865 * The attribute must have been declared; the value must be of the type
7866 * declared for it.
7867 *
7868 * [25] Eq ::= S? '=' S?
7869 *
7870 * With namespace:
7871 *
7872 * [NS 11] Attribute ::= QName Eq AttValue
7873 *
7874 * Also the case QName == xmlns:??? is handled independently as a namespace
7875 * definition.
7876 *
7877 * Returns the attribute name, and the value in *value.
7878 */
7879
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007880const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007881xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007882 const xmlChar *name;
7883 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007884
7885 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007886 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007887 name = xmlParseName(ctxt);
7888 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007889 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007890 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007891 return(NULL);
7892 }
7893
7894 /*
7895 * read the value
7896 */
7897 SKIP_BLANKS;
7898 if (RAW == '=') {
7899 NEXT;
7900 SKIP_BLANKS;
7901 val = xmlParseAttValue(ctxt);
7902 ctxt->instate = XML_PARSER_CONTENT;
7903 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007904 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007905 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007906 return(NULL);
7907 }
7908
7909 /*
7910 * Check that xml:lang conforms to the specification
7911 * No more registered as an error, just generate a warning now
7912 * since this was deprecated in XML second edition
7913 */
7914 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7915 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007916 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7917 "Malformed value for xml:lang : %s\n",
7918 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007919 }
7920 }
7921
7922 /*
7923 * Check that xml:space conforms to the specification
7924 */
7925 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7926 if (xmlStrEqual(val, BAD_CAST "default"))
7927 *(ctxt->space) = 0;
7928 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7929 *(ctxt->space) = 1;
7930 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007931 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007932"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007933 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007934 }
7935 }
7936
7937 *value = val;
7938 return(name);
7939}
7940
7941/**
7942 * xmlParseStartTag:
7943 * @ctxt: an XML parser context
7944 *
7945 * parse a start of tag either for rule element or
7946 * EmptyElement. In both case we don't parse the tag closing chars.
7947 *
7948 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7949 *
7950 * [ WFC: Unique Att Spec ]
7951 * No attribute name may appear more than once in the same start-tag or
7952 * empty-element tag.
7953 *
7954 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7955 *
7956 * [ WFC: Unique Att Spec ]
7957 * No attribute name may appear more than once in the same start-tag or
7958 * empty-element tag.
7959 *
7960 * With namespace:
7961 *
7962 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7963 *
7964 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7965 *
7966 * Returns the element name parsed
7967 */
7968
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007969const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007970xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007971 const xmlChar *name;
7972 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007973 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007974 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007975 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007976 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007977 int i;
7978
7979 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007980 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007981
7982 name = xmlParseName(ctxt);
7983 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007984 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007985 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007986 return(NULL);
7987 }
7988
7989 /*
7990 * Now parse the attributes, it ends up with the ending
7991 *
7992 * (S Attribute)* S?
7993 */
7994 SKIP_BLANKS;
7995 GROW;
7996
Daniel Veillard21a0f912001-02-25 19:54:14 +00007997 while ((RAW != '>') &&
7998 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007999 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008000 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008001 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008002
8003 attname = xmlParseAttribute(ctxt, &attvalue);
8004 if ((attname != NULL) && (attvalue != NULL)) {
8005 /*
8006 * [ WFC: Unique Att Spec ]
8007 * No attribute name may appear more than once in the same
8008 * start-tag or empty-element tag.
8009 */
8010 for (i = 0; i < nbatts;i += 2) {
8011 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008012 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008013 xmlFree(attvalue);
8014 goto failed;
8015 }
8016 }
Owen Taylor3473f882001-02-23 17:55:21 +00008017 /*
8018 * Add the pair to atts
8019 */
8020 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008021 maxatts = 22; /* allow for 10 attrs by default */
8022 atts = (const xmlChar **)
8023 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008024 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008025 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008026 if (attvalue != NULL)
8027 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008028 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008029 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008030 ctxt->atts = atts;
8031 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008032 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008033 const xmlChar **n;
8034
Owen Taylor3473f882001-02-23 17:55:21 +00008035 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008036 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008037 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008038 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008039 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008040 if (attvalue != NULL)
8041 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008042 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008043 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008044 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008045 ctxt->atts = atts;
8046 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008047 }
8048 atts[nbatts++] = attname;
8049 atts[nbatts++] = attvalue;
8050 atts[nbatts] = NULL;
8051 atts[nbatts + 1] = NULL;
8052 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008053 if (attvalue != NULL)
8054 xmlFree(attvalue);
8055 }
8056
8057failed:
8058
Daniel Veillard3772de32002-12-17 10:31:45 +00008059 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008060 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8061 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008062 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008063 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8064 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008065 }
8066 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008067 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8068 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008069 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8070 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008071 break;
8072 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008073 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008074 GROW;
8075 }
8076
8077 /*
8078 * SAX: Start of Element !
8079 */
8080 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008081 (!ctxt->disableSAX)) {
8082 if (nbatts > 0)
8083 ctxt->sax->startElement(ctxt->userData, name, atts);
8084 else
8085 ctxt->sax->startElement(ctxt->userData, name, NULL);
8086 }
Owen Taylor3473f882001-02-23 17:55:21 +00008087
8088 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008089 /* Free only the content strings */
8090 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008091 if (atts[i] != NULL)
8092 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008093 }
8094 return(name);
8095}
8096
8097/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008098 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008099 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008100 * @line: line of the start tag
8101 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008102 *
8103 * parse an end of tag
8104 *
8105 * [42] ETag ::= '</' Name S? '>'
8106 *
8107 * With namespace
8108 *
8109 * [NS 9] ETag ::= '</' QName S? '>'
8110 */
8111
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008112static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008113xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008114 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008115
8116 GROW;
8117 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008118 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008119 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008120 return;
8121 }
8122 SKIP(2);
8123
Daniel Veillard46de64e2002-05-29 08:21:33 +00008124 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008125
8126 /*
8127 * We should definitely be at the ending "S? '>'" part
8128 */
8129 GROW;
8130 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008131 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008132 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008133 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008134 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008135
8136 /*
8137 * [ WFC: Element Type Match ]
8138 * The Name in an element's end-tag must match the element type in the
8139 * start-tag.
8140 *
8141 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008142 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008143 if (name == NULL) name = BAD_CAST "unparseable";
8144 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008145 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008146 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008147 }
8148
8149 /*
8150 * SAX: End of Tag
8151 */
8152 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8153 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008154 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008155
Daniel Veillarde57ec792003-09-10 10:50:59 +00008156 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008157 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008158 return;
8159}
8160
8161/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008162 * xmlParseEndTag:
8163 * @ctxt: an XML parser context
8164 *
8165 * parse an end of tag
8166 *
8167 * [42] ETag ::= '</' Name S? '>'
8168 *
8169 * With namespace
8170 *
8171 * [NS 9] ETag ::= '</' QName S? '>'
8172 */
8173
8174void
8175xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008176 xmlParseEndTag1(ctxt, 0);
8177}
Daniel Veillard81273902003-09-30 00:43:48 +00008178#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008179
8180/************************************************************************
8181 * *
8182 * SAX 2 specific operations *
8183 * *
8184 ************************************************************************/
8185
Daniel Veillard0fb18932003-09-07 09:14:37 +00008186/*
8187 * xmlGetNamespace:
8188 * @ctxt: an XML parser context
8189 * @prefix: the prefix to lookup
8190 *
8191 * Lookup the namespace name for the @prefix (which ca be NULL)
8192 * The prefix must come from the @ctxt->dict dictionnary
8193 *
8194 * Returns the namespace name or NULL if not bound
8195 */
8196static const xmlChar *
8197xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8198 int i;
8199
Daniel Veillarde57ec792003-09-10 10:50:59 +00008200 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008201 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008202 if (ctxt->nsTab[i] == prefix) {
8203 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8204 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008205 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008206 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008207 return(NULL);
8208}
8209
8210/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008211 * xmlParseQName:
8212 * @ctxt: an XML parser context
8213 * @prefix: pointer to store the prefix part
8214 *
8215 * parse an XML Namespace QName
8216 *
8217 * [6] QName ::= (Prefix ':')? LocalPart
8218 * [7] Prefix ::= NCName
8219 * [8] LocalPart ::= NCName
8220 *
8221 * Returns the Name parsed or NULL
8222 */
8223
8224static const xmlChar *
8225xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8226 const xmlChar *l, *p;
8227
8228 GROW;
8229
8230 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008231 if (l == NULL) {
8232 if (CUR == ':') {
8233 l = xmlParseName(ctxt);
8234 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008235 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8236 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008237 *prefix = NULL;
8238 return(l);
8239 }
8240 }
8241 return(NULL);
8242 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008243 if (CUR == ':') {
8244 NEXT;
8245 p = l;
8246 l = xmlParseNCName(ctxt);
8247 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008248 xmlChar *tmp;
8249
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008250 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8251 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008252 l = xmlParseNmtoken(ctxt);
8253 if (l == NULL)
8254 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8255 else {
8256 tmp = xmlBuildQName(l, p, NULL, 0);
8257 xmlFree((char *)l);
8258 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008259 p = xmlDictLookup(ctxt->dict, tmp, -1);
8260 if (tmp != NULL) xmlFree(tmp);
8261 *prefix = NULL;
8262 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008263 }
8264 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008265 xmlChar *tmp;
8266
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008267 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8268 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008269 NEXT;
8270 tmp = (xmlChar *) xmlParseName(ctxt);
8271 if (tmp != NULL) {
8272 tmp = xmlBuildQName(tmp, l, NULL, 0);
8273 l = xmlDictLookup(ctxt->dict, tmp, -1);
8274 if (tmp != NULL) xmlFree(tmp);
8275 *prefix = p;
8276 return(l);
8277 }
8278 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8279 l = xmlDictLookup(ctxt->dict, tmp, -1);
8280 if (tmp != NULL) xmlFree(tmp);
8281 *prefix = p;
8282 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008283 }
8284 *prefix = p;
8285 } else
8286 *prefix = NULL;
8287 return(l);
8288}
8289
8290/**
8291 * xmlParseQNameAndCompare:
8292 * @ctxt: an XML parser context
8293 * @name: the localname
8294 * @prefix: the prefix, if any.
8295 *
8296 * parse an XML name and compares for match
8297 * (specialized for endtag parsing)
8298 *
8299 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8300 * and the name for mismatch
8301 */
8302
8303static const xmlChar *
8304xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8305 xmlChar const *prefix) {
8306 const xmlChar *cmp = name;
8307 const xmlChar *in;
8308 const xmlChar *ret;
8309 const xmlChar *prefix2;
8310
8311 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8312
8313 GROW;
8314 in = ctxt->input->cur;
8315
8316 cmp = prefix;
8317 while (*in != 0 && *in == *cmp) {
8318 ++in;
8319 ++cmp;
8320 }
8321 if ((*cmp == 0) && (*in == ':')) {
8322 in++;
8323 cmp = name;
8324 while (*in != 0 && *in == *cmp) {
8325 ++in;
8326 ++cmp;
8327 }
William M. Brack76e95df2003-10-18 16:20:14 +00008328 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008329 /* success */
8330 ctxt->input->cur = in;
8331 return((const xmlChar*) 1);
8332 }
8333 }
8334 /*
8335 * all strings coms from the dictionary, equality can be done directly
8336 */
8337 ret = xmlParseQName (ctxt, &prefix2);
8338 if ((ret == name) && (prefix == prefix2))
8339 return((const xmlChar*) 1);
8340 return ret;
8341}
8342
8343/**
8344 * xmlParseAttValueInternal:
8345 * @ctxt: an XML parser context
8346 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008347 * @alloc: whether the attribute was reallocated as a new string
8348 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008349 *
8350 * parse a value for an attribute.
8351 * NOTE: if no normalization is needed, the routine will return pointers
8352 * directly from the data buffer.
8353 *
8354 * 3.3.3 Attribute-Value Normalization:
8355 * Before the value of an attribute is passed to the application or
8356 * checked for validity, the XML processor must normalize it as follows:
8357 * - a character reference is processed by appending the referenced
8358 * character to the attribute value
8359 * - an entity reference is processed by recursively processing the
8360 * replacement text of the entity
8361 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8362 * appending #x20 to the normalized value, except that only a single
8363 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8364 * parsed entity or the literal entity value of an internal parsed entity
8365 * - other characters are processed by appending them to the normalized value
8366 * If the declared value is not CDATA, then the XML processor must further
8367 * process the normalized attribute value by discarding any leading and
8368 * trailing space (#x20) characters, and by replacing sequences of space
8369 * (#x20) characters by a single space (#x20) character.
8370 * All attributes for which no declaration has been read should be treated
8371 * by a non-validating parser as if declared CDATA.
8372 *
8373 * Returns the AttValue parsed or NULL. The value has to be freed by the
8374 * caller if it was copied, this can be detected by val[*len] == 0.
8375 */
8376
8377static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008378xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8379 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008380{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008381 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008382 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008383 xmlChar *ret = NULL;
8384
8385 GROW;
8386 in = (xmlChar *) CUR_PTR;
8387 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008388 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008389 return (NULL);
8390 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008391 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008392
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008393 /*
8394 * try to handle in this routine the most common case where no
8395 * allocation of a new string is required and where content is
8396 * pure ASCII.
8397 */
8398 limit = *in++;
8399 end = ctxt->input->end;
8400 start = in;
8401 if (in >= end) {
8402 const xmlChar *oldbase = ctxt->input->base;
8403 GROW;
8404 if (oldbase != ctxt->input->base) {
8405 long delta = ctxt->input->base - oldbase;
8406 start = start + delta;
8407 in = in + delta;
8408 }
8409 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008410 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008411 if (normalize) {
8412 /*
8413 * Skip any leading spaces
8414 */
8415 while ((in < end) && (*in != limit) &&
8416 ((*in == 0x20) || (*in == 0x9) ||
8417 (*in == 0xA) || (*in == 0xD))) {
8418 in++;
8419 start = in;
8420 if (in >= end) {
8421 const xmlChar *oldbase = ctxt->input->base;
8422 GROW;
8423 if (oldbase != ctxt->input->base) {
8424 long delta = ctxt->input->base - oldbase;
8425 start = start + delta;
8426 in = in + delta;
8427 }
8428 end = ctxt->input->end;
8429 }
8430 }
8431 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8432 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8433 if ((*in++ == 0x20) && (*in == 0x20)) break;
8434 if (in >= end) {
8435 const xmlChar *oldbase = ctxt->input->base;
8436 GROW;
8437 if (oldbase != ctxt->input->base) {
8438 long delta = ctxt->input->base - oldbase;
8439 start = start + delta;
8440 in = in + delta;
8441 }
8442 end = ctxt->input->end;
8443 }
8444 }
8445 last = in;
8446 /*
8447 * skip the trailing blanks
8448 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008449 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008450 while ((in < end) && (*in != limit) &&
8451 ((*in == 0x20) || (*in == 0x9) ||
8452 (*in == 0xA) || (*in == 0xD))) {
8453 in++;
8454 if (in >= end) {
8455 const xmlChar *oldbase = ctxt->input->base;
8456 GROW;
8457 if (oldbase != ctxt->input->base) {
8458 long delta = ctxt->input->base - oldbase;
8459 start = start + delta;
8460 in = in + delta;
8461 last = last + delta;
8462 }
8463 end = ctxt->input->end;
8464 }
8465 }
8466 if (*in != limit) goto need_complex;
8467 } else {
8468 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8469 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8470 in++;
8471 if (in >= end) {
8472 const xmlChar *oldbase = ctxt->input->base;
8473 GROW;
8474 if (oldbase != ctxt->input->base) {
8475 long delta = ctxt->input->base - oldbase;
8476 start = start + delta;
8477 in = in + delta;
8478 }
8479 end = ctxt->input->end;
8480 }
8481 }
8482 last = in;
8483 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008484 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008485 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008486 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008487 *len = last - start;
8488 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008489 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008490 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008491 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008492 }
8493 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008494 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008495 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008496need_complex:
8497 if (alloc) *alloc = 1;
8498 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008499}
8500
8501/**
8502 * xmlParseAttribute2:
8503 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008504 * @pref: the element prefix
8505 * @elem: the element name
8506 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008507 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008508 * @len: an int * to save the length of the attribute
8509 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008510 *
8511 * parse an attribute in the new SAX2 framework.
8512 *
8513 * Returns the attribute name, and the value in *value, .
8514 */
8515
8516static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008517xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008518 const xmlChar * pref, const xmlChar * elem,
8519 const xmlChar ** prefix, xmlChar ** value,
8520 int *len, int *alloc)
8521{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008522 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008523 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008524 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008525
8526 *value = NULL;
8527 GROW;
8528 name = xmlParseQName(ctxt, prefix);
8529 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008530 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8531 "error parsing attribute name\n");
8532 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008533 }
8534
8535 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008536 * get the type if needed
8537 */
8538 if (ctxt->attsSpecial != NULL) {
8539 int type;
8540
8541 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008542 pref, elem, *prefix, name);
8543 if (type != 0)
8544 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008545 }
8546
8547 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008548 * read the value
8549 */
8550 SKIP_BLANKS;
8551 if (RAW == '=') {
8552 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008553 SKIP_BLANKS;
8554 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8555 if (normalize) {
8556 /*
8557 * Sometimes a second normalisation pass for spaces is needed
8558 * but that only happens if charrefs or entities refernces
8559 * have been used in the attribute value, i.e. the attribute
8560 * value have been extracted in an allocated string already.
8561 */
8562 if (*alloc) {
8563 const xmlChar *val2;
8564
8565 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008566 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008567 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008568 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008569 }
8570 }
8571 }
8572 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008573 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008574 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8575 "Specification mandate value for attribute %s\n",
8576 name);
8577 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008578 }
8579
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008580 if (*prefix == ctxt->str_xml) {
8581 /*
8582 * Check that xml:lang conforms to the specification
8583 * No more registered as an error, just generate a warning now
8584 * since this was deprecated in XML second edition
8585 */
8586 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8587 internal_val = xmlStrndup(val, *len);
8588 if (!xmlCheckLanguageID(internal_val)) {
8589 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8590 "Malformed value for xml:lang : %s\n",
8591 internal_val, NULL);
8592 }
8593 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008594
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008595 /*
8596 * Check that xml:space conforms to the specification
8597 */
8598 if (xmlStrEqual(name, BAD_CAST "space")) {
8599 internal_val = xmlStrndup(val, *len);
8600 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8601 *(ctxt->space) = 0;
8602 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8603 *(ctxt->space) = 1;
8604 else {
8605 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8606 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8607 internal_val, NULL);
8608 }
8609 }
8610 if (internal_val) {
8611 xmlFree(internal_val);
8612 }
8613 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008614
8615 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008616 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008617}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008618/**
8619 * xmlParseStartTag2:
8620 * @ctxt: an XML parser context
8621 *
8622 * parse a start of tag either for rule element or
8623 * EmptyElement. In both case we don't parse the tag closing chars.
8624 * This routine is called when running SAX2 parsing
8625 *
8626 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8627 *
8628 * [ WFC: Unique Att Spec ]
8629 * No attribute name may appear more than once in the same start-tag or
8630 * empty-element tag.
8631 *
8632 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8633 *
8634 * [ WFC: Unique Att Spec ]
8635 * No attribute name may appear more than once in the same start-tag or
8636 * empty-element tag.
8637 *
8638 * With namespace:
8639 *
8640 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8641 *
8642 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8643 *
8644 * Returns the element name parsed
8645 */
8646
8647static const xmlChar *
8648xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008649 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008650 const xmlChar *localname;
8651 const xmlChar *prefix;
8652 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008653 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008654 const xmlChar *nsname;
8655 xmlChar *attvalue;
8656 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008657 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008658 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008659 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008660 const xmlChar *base;
8661 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008662 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008663
8664 if (RAW != '<') return(NULL);
8665 NEXT1;
8666
8667 /*
8668 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8669 * point since the attribute values may be stored as pointers to
8670 * the buffer and calling SHRINK would destroy them !
8671 * The Shrinking is only possible once the full set of attribute
8672 * callbacks have been done.
8673 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008674reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008675 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008676 base = ctxt->input->base;
8677 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008678 oldline = ctxt->input->line;
8679 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008680 nbatts = 0;
8681 nratts = 0;
8682 nbdef = 0;
8683 nbNs = 0;
8684 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008685 /* Forget any namespaces added during an earlier parse of this element. */
8686 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008687
8688 localname = xmlParseQName(ctxt, &prefix);
8689 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008690 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8691 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008692 return(NULL);
8693 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008694 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008695
8696 /*
8697 * Now parse the attributes, it ends up with the ending
8698 *
8699 * (S Attribute)* S?
8700 */
8701 SKIP_BLANKS;
8702 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008703 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008704
8705 while ((RAW != '>') &&
8706 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008707 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008708 const xmlChar *q = CUR_PTR;
8709 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008710 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008711
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008712 attname = xmlParseAttribute2(ctxt, prefix, localname,
8713 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008714 if (ctxt->input->base != base) {
8715 if ((attvalue != NULL) && (alloc != 0))
8716 xmlFree(attvalue);
8717 attvalue = NULL;
8718 goto base_changed;
8719 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008720 if ((attname != NULL) && (attvalue != NULL)) {
8721 if (len < 0) len = xmlStrlen(attvalue);
8722 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008723 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8724 xmlURIPtr uri;
8725
8726 if (*URL != 0) {
8727 uri = xmlParseURI((const char *) URL);
8728 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008729 xmlNsErr(ctxt, XML_WAR_NS_URI,
8730 "xmlns: '%s' is not a valid URI\n",
8731 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008732 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008733 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008734 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8735 "xmlns: URI %s is not absolute\n",
8736 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008737 }
8738 xmlFreeURI(uri);
8739 }
Daniel Veillard37334572008-07-31 08:20:02 +00008740 if (URL == ctxt->str_xml_ns) {
8741 if (attname != ctxt->str_xml) {
8742 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8743 "xml namespace URI cannot be the default namespace\n",
8744 NULL, NULL, NULL);
8745 }
8746 goto skip_default_ns;
8747 }
8748 if ((len == 29) &&
8749 (xmlStrEqual(URL,
8750 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8751 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8752 "reuse of the xmlns namespace name is forbidden\n",
8753 NULL, NULL, NULL);
8754 goto skip_default_ns;
8755 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008756 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008757 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008758 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008759 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008760 for (j = 1;j <= nbNs;j++)
8761 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8762 break;
8763 if (j <= nbNs)
8764 xmlErrAttributeDup(ctxt, NULL, attname);
8765 else
8766 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008767skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008768 if (alloc != 0) xmlFree(attvalue);
8769 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008770 continue;
8771 }
8772 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008773 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8774 xmlURIPtr uri;
8775
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008776 if (attname == ctxt->str_xml) {
8777 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008778 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8779 "xml namespace prefix mapped to wrong URI\n",
8780 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008781 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008782 /*
8783 * Do not keep a namespace definition node
8784 */
Daniel Veillard37334572008-07-31 08:20:02 +00008785 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008786 }
Daniel Veillard37334572008-07-31 08:20:02 +00008787 if (URL == ctxt->str_xml_ns) {
8788 if (attname != ctxt->str_xml) {
8789 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8790 "xml namespace URI mapped to wrong prefix\n",
8791 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008792 }
Daniel Veillard37334572008-07-31 08:20:02 +00008793 goto skip_ns;
8794 }
8795 if (attname == ctxt->str_xmlns) {
8796 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8797 "redefinition of the xmlns prefix is forbidden\n",
8798 NULL, NULL, NULL);
8799 goto skip_ns;
8800 }
8801 if ((len == 29) &&
8802 (xmlStrEqual(URL,
8803 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8804 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8805 "reuse of the xmlns namespace name is forbidden\n",
8806 NULL, NULL, NULL);
8807 goto skip_ns;
8808 }
8809 if ((URL == NULL) || (URL[0] == 0)) {
8810 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8811 "xmlns:%s: Empty XML namespace is not allowed\n",
8812 attname, NULL, NULL);
8813 goto skip_ns;
8814 } else {
8815 uri = xmlParseURI((const char *) URL);
8816 if (uri == NULL) {
8817 xmlNsErr(ctxt, XML_WAR_NS_URI,
8818 "xmlns:%s: '%s' is not a valid URI\n",
8819 attname, URL, NULL);
8820 } else {
8821 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8822 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8823 "xmlns:%s: URI %s is not absolute\n",
8824 attname, URL, NULL);
8825 }
8826 xmlFreeURI(uri);
8827 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008828 }
8829
Daniel Veillard0fb18932003-09-07 09:14:37 +00008830 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008831 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008832 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008833 for (j = 1;j <= nbNs;j++)
8834 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8835 break;
8836 if (j <= nbNs)
8837 xmlErrAttributeDup(ctxt, aprefix, attname);
8838 else
8839 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008840skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008841 if (alloc != 0) xmlFree(attvalue);
8842 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008843 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008844 continue;
8845 }
8846
8847 /*
8848 * Add the pair to atts
8849 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008850 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8851 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008852 if (attvalue[len] == 0)
8853 xmlFree(attvalue);
8854 goto failed;
8855 }
8856 maxatts = ctxt->maxatts;
8857 atts = ctxt->atts;
8858 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008859 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008860 atts[nbatts++] = attname;
8861 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008862 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008863 atts[nbatts++] = attvalue;
8864 attvalue += len;
8865 atts[nbatts++] = attvalue;
8866 /*
8867 * tag if some deallocation is needed
8868 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008869 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008870 } else {
8871 if ((attvalue != NULL) && (attvalue[len] == 0))
8872 xmlFree(attvalue);
8873 }
8874
Daniel Veillard37334572008-07-31 08:20:02 +00008875failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008876
8877 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008878 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008879 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8880 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008881 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008882 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8883 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008884 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008885 }
8886 SKIP_BLANKS;
8887 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8888 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008889 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008890 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008891 break;
8892 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008893 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008894 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008895 }
8896
Daniel Veillard0fb18932003-09-07 09:14:37 +00008897 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008898 * The attributes defaulting
8899 */
8900 if (ctxt->attsDefault != NULL) {
8901 xmlDefAttrsPtr defaults;
8902
8903 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8904 if (defaults != NULL) {
8905 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008906 attname = defaults->values[5 * i];
8907 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008908
8909 /*
8910 * special work for namespaces defaulted defs
8911 */
8912 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8913 /*
8914 * check that it's not a defined namespace
8915 */
8916 for (j = 1;j <= nbNs;j++)
8917 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8918 break;
8919 if (j <= nbNs) continue;
8920
8921 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008922 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008923 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008924 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008925 nbNs++;
8926 }
8927 } else if (aprefix == ctxt->str_xmlns) {
8928 /*
8929 * check that it's not a defined namespace
8930 */
8931 for (j = 1;j <= nbNs;j++)
8932 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8933 break;
8934 if (j <= nbNs) continue;
8935
8936 nsname = xmlGetNamespace(ctxt, attname);
8937 if (nsname != defaults->values[2]) {
8938 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008939 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008940 nbNs++;
8941 }
8942 } else {
8943 /*
8944 * check that it's not a defined attribute
8945 */
8946 for (j = 0;j < nbatts;j+=5) {
8947 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8948 break;
8949 }
8950 if (j < nbatts) continue;
8951
8952 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8953 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008954 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008955 }
8956 maxatts = ctxt->maxatts;
8957 atts = ctxt->atts;
8958 }
8959 atts[nbatts++] = attname;
8960 atts[nbatts++] = aprefix;
8961 if (aprefix == NULL)
8962 atts[nbatts++] = NULL;
8963 else
8964 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008965 atts[nbatts++] = defaults->values[5 * i + 2];
8966 atts[nbatts++] = defaults->values[5 * i + 3];
8967 if ((ctxt->standalone == 1) &&
8968 (defaults->values[5 * i + 4] != NULL)) {
8969 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
8970 "standalone: attribute %s on %s defaulted from external subset\n",
8971 attname, localname);
8972 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008973 nbdef++;
8974 }
8975 }
8976 }
8977 }
8978
Daniel Veillarde70c8772003-11-25 07:21:18 +00008979 /*
8980 * The attributes checkings
8981 */
8982 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008983 /*
8984 * The default namespace does not apply to attribute names.
8985 */
8986 if (atts[i + 1] != NULL) {
8987 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8988 if (nsname == NULL) {
8989 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8990 "Namespace prefix %s for %s on %s is not defined\n",
8991 atts[i + 1], atts[i], localname);
8992 }
8993 atts[i + 2] = nsname;
8994 } else
8995 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008996 /*
8997 * [ WFC: Unique Att Spec ]
8998 * No attribute name may appear more than once in the same
8999 * start-tag or empty-element tag.
9000 * As extended by the Namespace in XML REC.
9001 */
9002 for (j = 0; j < i;j += 5) {
9003 if (atts[i] == atts[j]) {
9004 if (atts[i+1] == atts[j+1]) {
9005 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9006 break;
9007 }
9008 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9009 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9010 "Namespaced Attribute %s in '%s' redefined\n",
9011 atts[i], nsname, NULL);
9012 break;
9013 }
9014 }
9015 }
9016 }
9017
Daniel Veillarde57ec792003-09-10 10:50:59 +00009018 nsname = xmlGetNamespace(ctxt, prefix);
9019 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009020 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9021 "Namespace prefix %s on %s is not defined\n",
9022 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009023 }
9024 *pref = prefix;
9025 *URI = nsname;
9026
9027 /*
9028 * SAX: Start of Element !
9029 */
9030 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9031 (!ctxt->disableSAX)) {
9032 if (nbNs > 0)
9033 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9034 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9035 nbatts / 5, nbdef, atts);
9036 else
9037 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9038 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9039 }
9040
9041 /*
9042 * Free up attribute allocated strings if needed
9043 */
9044 if (attval != 0) {
9045 for (i = 3,j = 0; j < nratts;i += 5,j++)
9046 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9047 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009048 }
9049
9050 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009051
9052base_changed:
9053 /*
9054 * the attribute strings are valid iif the base didn't changed
9055 */
9056 if (attval != 0) {
9057 for (i = 3,j = 0; j < nratts;i += 5,j++)
9058 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9059 xmlFree((xmlChar *) atts[i]);
9060 }
9061 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009062 ctxt->input->line = oldline;
9063 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009064 if (ctxt->wellFormed == 1) {
9065 goto reparse;
9066 }
9067 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009068}
9069
9070/**
9071 * xmlParseEndTag2:
9072 * @ctxt: an XML parser context
9073 * @line: line of the start tag
9074 * @nsNr: number of namespaces on the start tag
9075 *
9076 * parse an end of tag
9077 *
9078 * [42] ETag ::= '</' Name S? '>'
9079 *
9080 * With namespace
9081 *
9082 * [NS 9] ETag ::= '</' QName S? '>'
9083 */
9084
9085static void
9086xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009087 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009088 const xmlChar *name;
9089
9090 GROW;
9091 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009092 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009093 return;
9094 }
9095 SKIP(2);
9096
William M. Brack13dfa872004-09-18 04:52:08 +00009097 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009098 if (ctxt->input->cur[tlen] == '>') {
9099 ctxt->input->cur += tlen + 1;
9100 goto done;
9101 }
9102 ctxt->input->cur += tlen;
9103 name = (xmlChar*)1;
9104 } else {
9105 if (prefix == NULL)
9106 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9107 else
9108 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9109 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009110
9111 /*
9112 * We should definitely be at the ending "S? '>'" part
9113 */
9114 GROW;
9115 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009116 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009117 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009118 } else
9119 NEXT1;
9120
9121 /*
9122 * [ WFC: Element Type Match ]
9123 * The Name in an element's end-tag must match the element type in the
9124 * start-tag.
9125 *
9126 */
9127 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009128 if (name == NULL) name = BAD_CAST "unparseable";
9129 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009130 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009131 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009132 }
9133
9134 /*
9135 * SAX: End of Tag
9136 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009137done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009138 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9139 (!ctxt->disableSAX))
9140 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9141
Daniel Veillard0fb18932003-09-07 09:14:37 +00009142 spacePop(ctxt);
9143 if (nsNr != 0)
9144 nsPop(ctxt, nsNr);
9145 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009146}
9147
9148/**
Owen Taylor3473f882001-02-23 17:55:21 +00009149 * xmlParseCDSect:
9150 * @ctxt: an XML parser context
9151 *
9152 * Parse escaped pure raw content.
9153 *
9154 * [18] CDSect ::= CDStart CData CDEnd
9155 *
9156 * [19] CDStart ::= '<![CDATA['
9157 *
9158 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9159 *
9160 * [21] CDEnd ::= ']]>'
9161 */
9162void
9163xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9164 xmlChar *buf = NULL;
9165 int len = 0;
9166 int size = XML_PARSER_BUFFER_SIZE;
9167 int r, rl;
9168 int s, sl;
9169 int cur, l;
9170 int count = 0;
9171
Daniel Veillard8f597c32003-10-06 08:19:27 +00009172 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009173 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009174 SKIP(9);
9175 } else
9176 return;
9177
9178 ctxt->instate = XML_PARSER_CDATA_SECTION;
9179 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009180 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009181 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009182 ctxt->instate = XML_PARSER_CONTENT;
9183 return;
9184 }
9185 NEXTL(rl);
9186 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009187 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009188 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009189 ctxt->instate = XML_PARSER_CONTENT;
9190 return;
9191 }
9192 NEXTL(sl);
9193 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009194 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009195 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009196 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009197 return;
9198 }
William M. Brack871611b2003-10-18 04:53:14 +00009199 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009200 ((r != ']') || (s != ']') || (cur != '>'))) {
9201 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009202 xmlChar *tmp;
9203
Owen Taylor3473f882001-02-23 17:55:21 +00009204 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009205 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9206 if (tmp == NULL) {
9207 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009208 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009209 return;
9210 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009211 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009212 }
9213 COPY_BUF(rl,buf,len,r);
9214 r = s;
9215 rl = sl;
9216 s = cur;
9217 sl = l;
9218 count++;
9219 if (count > 50) {
9220 GROW;
9221 count = 0;
9222 }
9223 NEXTL(l);
9224 cur = CUR_CHAR(l);
9225 }
9226 buf[len] = 0;
9227 ctxt->instate = XML_PARSER_CONTENT;
9228 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009229 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009230 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009231 xmlFree(buf);
9232 return;
9233 }
9234 NEXTL(l);
9235
9236 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009237 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009238 */
9239 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9240 if (ctxt->sax->cdataBlock != NULL)
9241 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009242 else if (ctxt->sax->characters != NULL)
9243 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009244 }
9245 xmlFree(buf);
9246}
9247
9248/**
9249 * xmlParseContent:
9250 * @ctxt: an XML parser context
9251 *
9252 * Parse a content:
9253 *
9254 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9255 */
9256
9257void
9258xmlParseContent(xmlParserCtxtPtr ctxt) {
9259 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009260 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009261 ((RAW != '<') || (NXT(1) != '/')) &&
9262 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009263 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009264 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009265 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009266
9267 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009268 * First case : a Processing Instruction.
9269 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009270 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009271 xmlParsePI(ctxt);
9272 }
9273
9274 /*
9275 * Second case : a CDSection
9276 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009277 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009278 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009279 xmlParseCDSect(ctxt);
9280 }
9281
9282 /*
9283 * Third case : a comment
9284 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009285 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009286 (NXT(2) == '-') && (NXT(3) == '-')) {
9287 xmlParseComment(ctxt);
9288 ctxt->instate = XML_PARSER_CONTENT;
9289 }
9290
9291 /*
9292 * Fourth case : a sub-element.
9293 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009294 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009295 xmlParseElement(ctxt);
9296 }
9297
9298 /*
9299 * Fifth case : a reference. If if has not been resolved,
9300 * parsing returns it's Name, create the node
9301 */
9302
Daniel Veillard21a0f912001-02-25 19:54:14 +00009303 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009304 xmlParseReference(ctxt);
9305 }
9306
9307 /*
9308 * Last case, text. Note that References are handled directly.
9309 */
9310 else {
9311 xmlParseCharData(ctxt, 0);
9312 }
9313
9314 GROW;
9315 /*
9316 * Pop-up of finished entities.
9317 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009318 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009319 xmlPopInput(ctxt);
9320 SHRINK;
9321
Daniel Veillardfdc91562002-07-01 21:52:03 +00009322 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009323 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9324 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009325 ctxt->instate = XML_PARSER_EOF;
9326 break;
9327 }
9328 }
9329}
9330
9331/**
9332 * xmlParseElement:
9333 * @ctxt: an XML parser context
9334 *
9335 * parse an XML element, this is highly recursive
9336 *
9337 * [39] element ::= EmptyElemTag | STag content ETag
9338 *
9339 * [ WFC: Element Type Match ]
9340 * The Name in an element's end-tag must match the element type in the
9341 * start-tag.
9342 *
Owen Taylor3473f882001-02-23 17:55:21 +00009343 */
9344
9345void
9346xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009347 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009348 const xmlChar *prefix;
9349 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009350 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009351 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009352 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009353 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009354
Daniel Veillard8915c152008-08-26 13:05:34 +00009355 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9356 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9357 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9358 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9359 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009360 ctxt->instate = XML_PARSER_EOF;
9361 return;
9362 }
9363
Owen Taylor3473f882001-02-23 17:55:21 +00009364 /* Capture start position */
9365 if (ctxt->record_info) {
9366 node_info.begin_pos = ctxt->input->consumed +
9367 (CUR_PTR - ctxt->input->base);
9368 node_info.begin_line = ctxt->input->line;
9369 }
9370
9371 if (ctxt->spaceNr == 0)
9372 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009373 else if (*ctxt->space == -2)
9374 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009375 else
9376 spacePush(ctxt, *ctxt->space);
9377
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009378 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009379#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009380 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009381#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009382 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009383#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009384 else
9385 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009386#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009387 if (name == NULL) {
9388 spacePop(ctxt);
9389 return;
9390 }
9391 namePush(ctxt, name);
9392 ret = ctxt->node;
9393
Daniel Veillard4432df22003-09-28 18:58:27 +00009394#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009395 /*
9396 * [ VC: Root Element Type ]
9397 * The Name in the document type declaration must match the element
9398 * type of the root element.
9399 */
9400 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9401 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9402 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009403#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009404
9405 /*
9406 * Check for an Empty Element.
9407 */
9408 if ((RAW == '/') && (NXT(1) == '>')) {
9409 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009410 if (ctxt->sax2) {
9411 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9412 (!ctxt->disableSAX))
9413 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009414#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009415 } else {
9416 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9417 (!ctxt->disableSAX))
9418 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009419#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009420 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009421 namePop(ctxt);
9422 spacePop(ctxt);
9423 if (nsNr != ctxt->nsNr)
9424 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009425 if ( ret != NULL && ctxt->record_info ) {
9426 node_info.end_pos = ctxt->input->consumed +
9427 (CUR_PTR - ctxt->input->base);
9428 node_info.end_line = ctxt->input->line;
9429 node_info.node = ret;
9430 xmlParserAddNodeInfo(ctxt, &node_info);
9431 }
9432 return;
9433 }
9434 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009435 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009436 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009437 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9438 "Couldn't find end of Start Tag %s line %d\n",
9439 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009440
9441 /*
9442 * end of parsing of this node.
9443 */
9444 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009445 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009446 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009447 if (nsNr != ctxt->nsNr)
9448 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009449
9450 /*
9451 * Capture end position and add node
9452 */
9453 if ( ret != NULL && ctxt->record_info ) {
9454 node_info.end_pos = ctxt->input->consumed +
9455 (CUR_PTR - ctxt->input->base);
9456 node_info.end_line = ctxt->input->line;
9457 node_info.node = ret;
9458 xmlParserAddNodeInfo(ctxt, &node_info);
9459 }
9460 return;
9461 }
9462
9463 /*
9464 * Parse the content of the element:
9465 */
9466 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009467 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009468 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009469 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009470 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009471
9472 /*
9473 * end of parsing of this node.
9474 */
9475 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009476 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009477 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009478 if (nsNr != ctxt->nsNr)
9479 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009480 return;
9481 }
9482
9483 /*
9484 * parse the end of tag: '</' should be here.
9485 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009486 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009487 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009488 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009489 }
9490#ifdef LIBXML_SAX1_ENABLED
9491 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009492 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009493#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009494
9495 /*
9496 * Capture end position and add node
9497 */
9498 if ( ret != NULL && ctxt->record_info ) {
9499 node_info.end_pos = ctxt->input->consumed +
9500 (CUR_PTR - ctxt->input->base);
9501 node_info.end_line = ctxt->input->line;
9502 node_info.node = ret;
9503 xmlParserAddNodeInfo(ctxt, &node_info);
9504 }
9505}
9506
9507/**
9508 * xmlParseVersionNum:
9509 * @ctxt: an XML parser context
9510 *
9511 * parse the XML version value.
9512 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009513 * [26] VersionNum ::= '1.' [0-9]+
9514 *
9515 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009516 *
9517 * Returns the string giving the XML version number, or NULL
9518 */
9519xmlChar *
9520xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9521 xmlChar *buf = NULL;
9522 int len = 0;
9523 int size = 10;
9524 xmlChar cur;
9525
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009526 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009527 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009528 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009529 return(NULL);
9530 }
9531 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009532 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009533 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009534 return(NULL);
9535 }
9536 buf[len++] = cur;
9537 NEXT;
9538 cur=CUR;
9539 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009540 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009541 return(NULL);
9542 }
9543 buf[len++] = cur;
9544 NEXT;
9545 cur=CUR;
9546 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009547 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009548 xmlChar *tmp;
9549
Owen Taylor3473f882001-02-23 17:55:21 +00009550 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009551 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9552 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009553 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009554 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009555 return(NULL);
9556 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009557 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009558 }
9559 buf[len++] = cur;
9560 NEXT;
9561 cur=CUR;
9562 }
9563 buf[len] = 0;
9564 return(buf);
9565}
9566
9567/**
9568 * xmlParseVersionInfo:
9569 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009570 *
Owen Taylor3473f882001-02-23 17:55:21 +00009571 * parse the XML version.
9572 *
9573 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009574 *
Owen Taylor3473f882001-02-23 17:55:21 +00009575 * [25] Eq ::= S? '=' S?
9576 *
9577 * Returns the version string, e.g. "1.0"
9578 */
9579
9580xmlChar *
9581xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9582 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009583
Daniel Veillarda07050d2003-10-19 14:46:32 +00009584 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009585 SKIP(7);
9586 SKIP_BLANKS;
9587 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009588 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009589 return(NULL);
9590 }
9591 NEXT;
9592 SKIP_BLANKS;
9593 if (RAW == '"') {
9594 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009595 version = xmlParseVersionNum(ctxt);
9596 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009597 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009598 } else
9599 NEXT;
9600 } else if (RAW == '\''){
9601 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009602 version = xmlParseVersionNum(ctxt);
9603 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009604 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009605 } else
9606 NEXT;
9607 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009608 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009609 }
9610 }
9611 return(version);
9612}
9613
9614/**
9615 * xmlParseEncName:
9616 * @ctxt: an XML parser context
9617 *
9618 * parse the XML encoding name
9619 *
9620 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9621 *
9622 * Returns the encoding name value or NULL
9623 */
9624xmlChar *
9625xmlParseEncName(xmlParserCtxtPtr ctxt) {
9626 xmlChar *buf = NULL;
9627 int len = 0;
9628 int size = 10;
9629 xmlChar cur;
9630
9631 cur = CUR;
9632 if (((cur >= 'a') && (cur <= 'z')) ||
9633 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009634 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009635 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009636 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009637 return(NULL);
9638 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009639
Owen Taylor3473f882001-02-23 17:55:21 +00009640 buf[len++] = cur;
9641 NEXT;
9642 cur = CUR;
9643 while (((cur >= 'a') && (cur <= 'z')) ||
9644 ((cur >= 'A') && (cur <= 'Z')) ||
9645 ((cur >= '0') && (cur <= '9')) ||
9646 (cur == '.') || (cur == '_') ||
9647 (cur == '-')) {
9648 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009649 xmlChar *tmp;
9650
Owen Taylor3473f882001-02-23 17:55:21 +00009651 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009652 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9653 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009654 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009655 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009656 return(NULL);
9657 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009658 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009659 }
9660 buf[len++] = cur;
9661 NEXT;
9662 cur = CUR;
9663 if (cur == 0) {
9664 SHRINK;
9665 GROW;
9666 cur = CUR;
9667 }
9668 }
9669 buf[len] = 0;
9670 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009671 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009672 }
9673 return(buf);
9674}
9675
9676/**
9677 * xmlParseEncodingDecl:
9678 * @ctxt: an XML parser context
9679 *
9680 * parse the XML encoding declaration
9681 *
9682 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9683 *
9684 * this setups the conversion filters.
9685 *
9686 * Returns the encoding value or NULL
9687 */
9688
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009689const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009690xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9691 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009692
9693 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009694 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009695 SKIP(8);
9696 SKIP_BLANKS;
9697 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009698 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009699 return(NULL);
9700 }
9701 NEXT;
9702 SKIP_BLANKS;
9703 if (RAW == '"') {
9704 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009705 encoding = xmlParseEncName(ctxt);
9706 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009707 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009708 } else
9709 NEXT;
9710 } else if (RAW == '\''){
9711 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009712 encoding = xmlParseEncName(ctxt);
9713 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009714 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009715 } else
9716 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009717 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009718 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009719 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009720 /*
9721 * UTF-16 encoding stwich has already taken place at this stage,
9722 * more over the little-endian/big-endian selection is already done
9723 */
9724 if ((encoding != NULL) &&
9725 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9726 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009727 /*
9728 * If no encoding was passed to the parser, that we are
9729 * using UTF-16 and no decoder is present i.e. the
9730 * document is apparently UTF-8 compatible, then raise an
9731 * encoding mismatch fatal error
9732 */
9733 if ((ctxt->encoding == NULL) &&
9734 (ctxt->input->buf != NULL) &&
9735 (ctxt->input->buf->encoder == NULL)) {
9736 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9737 "Document labelled UTF-16 but has UTF-8 content\n");
9738 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009739 if (ctxt->encoding != NULL)
9740 xmlFree((xmlChar *) ctxt->encoding);
9741 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009742 }
9743 /*
9744 * UTF-8 encoding is handled natively
9745 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009746 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009747 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9748 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009749 if (ctxt->encoding != NULL)
9750 xmlFree((xmlChar *) ctxt->encoding);
9751 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009752 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009753 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009754 xmlCharEncodingHandlerPtr handler;
9755
9756 if (ctxt->input->encoding != NULL)
9757 xmlFree((xmlChar *) ctxt->input->encoding);
9758 ctxt->input->encoding = encoding;
9759
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009760 handler = xmlFindCharEncodingHandler((const char *) encoding);
9761 if (handler != NULL) {
9762 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009763 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009764 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009765 "Unsupported encoding %s\n", encoding);
9766 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009767 }
9768 }
9769 }
9770 return(encoding);
9771}
9772
9773/**
9774 * xmlParseSDDecl:
9775 * @ctxt: an XML parser context
9776 *
9777 * parse the XML standalone declaration
9778 *
9779 * [32] SDDecl ::= S 'standalone' Eq
9780 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9781 *
9782 * [ VC: Standalone Document Declaration ]
9783 * TODO The standalone document declaration must have the value "no"
9784 * if any external markup declarations contain declarations of:
9785 * - attributes with default values, if elements to which these
9786 * attributes apply appear in the document without specifications
9787 * of values for these attributes, or
9788 * - entities (other than amp, lt, gt, apos, quot), if references
9789 * to those entities appear in the document, or
9790 * - attributes with values subject to normalization, where the
9791 * attribute appears in the document with a value which will change
9792 * as a result of normalization, or
9793 * - element types with element content, if white space occurs directly
9794 * within any instance of those types.
9795 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009796 * Returns:
9797 * 1 if standalone="yes"
9798 * 0 if standalone="no"
9799 * -2 if standalone attribute is missing or invalid
9800 * (A standalone value of -2 means that the XML declaration was found,
9801 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009802 */
9803
9804int
9805xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009806 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009807
9808 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009809 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009810 SKIP(10);
9811 SKIP_BLANKS;
9812 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009813 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009814 return(standalone);
9815 }
9816 NEXT;
9817 SKIP_BLANKS;
9818 if (RAW == '\''){
9819 NEXT;
9820 if ((RAW == 'n') && (NXT(1) == 'o')) {
9821 standalone = 0;
9822 SKIP(2);
9823 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9824 (NXT(2) == 's')) {
9825 standalone = 1;
9826 SKIP(3);
9827 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009828 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009829 }
9830 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009831 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009832 } else
9833 NEXT;
9834 } else if (RAW == '"'){
9835 NEXT;
9836 if ((RAW == 'n') && (NXT(1) == 'o')) {
9837 standalone = 0;
9838 SKIP(2);
9839 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9840 (NXT(2) == 's')) {
9841 standalone = 1;
9842 SKIP(3);
9843 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009844 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009845 }
9846 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009847 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009848 } else
9849 NEXT;
9850 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009851 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009852 }
9853 }
9854 return(standalone);
9855}
9856
9857/**
9858 * xmlParseXMLDecl:
9859 * @ctxt: an XML parser context
9860 *
9861 * parse an XML declaration header
9862 *
9863 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9864 */
9865
9866void
9867xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9868 xmlChar *version;
9869
9870 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009871 * This value for standalone indicates that the document has an
9872 * XML declaration but it does not have a standalone attribute.
9873 * It will be overwritten later if a standalone attribute is found.
9874 */
9875 ctxt->input->standalone = -2;
9876
9877 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009878 * We know that '<?xml' is here.
9879 */
9880 SKIP(5);
9881
William M. Brack76e95df2003-10-18 16:20:14 +00009882 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009883 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9884 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009885 }
9886 SKIP_BLANKS;
9887
9888 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009889 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009890 */
9891 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009892 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009893 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009894 } else {
9895 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9896 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009897 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009898 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009899 if (ctxt->options & XML_PARSE_OLD10) {
9900 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9901 "Unsupported version '%s'\n",
9902 version);
9903 } else {
9904 if ((version[0] == '1') && ((version[1] == '.'))) {
9905 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9906 "Unsupported version '%s'\n",
9907 version, NULL);
9908 } else {
9909 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9910 "Unsupported version '%s'\n",
9911 version);
9912 }
9913 }
Daniel Veillard19840942001-11-29 16:11:38 +00009914 }
9915 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009916 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009917 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009918 }
Owen Taylor3473f882001-02-23 17:55:21 +00009919
9920 /*
9921 * We may have the encoding declaration
9922 */
William M. Brack76e95df2003-10-18 16:20:14 +00009923 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009924 if ((RAW == '?') && (NXT(1) == '>')) {
9925 SKIP(2);
9926 return;
9927 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009928 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009929 }
9930 xmlParseEncodingDecl(ctxt);
9931 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9932 /*
9933 * The XML REC instructs us to stop parsing right here
9934 */
9935 return;
9936 }
9937
9938 /*
9939 * We may have the standalone status.
9940 */
William M. Brack76e95df2003-10-18 16:20:14 +00009941 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009942 if ((RAW == '?') && (NXT(1) == '>')) {
9943 SKIP(2);
9944 return;
9945 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009946 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009947 }
9948 SKIP_BLANKS;
9949 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9950
9951 SKIP_BLANKS;
9952 if ((RAW == '?') && (NXT(1) == '>')) {
9953 SKIP(2);
9954 } else if (RAW == '>') {
9955 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009956 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009957 NEXT;
9958 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009959 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009960 MOVETO_ENDTAG(CUR_PTR);
9961 NEXT;
9962 }
9963}
9964
9965/**
9966 * xmlParseMisc:
9967 * @ctxt: an XML parser context
9968 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009969 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009970 *
9971 * [27] Misc ::= Comment | PI | S
9972 */
9973
9974void
9975xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009976 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009977 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009978 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009979 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009980 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009981 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009982 NEXT;
9983 } else
9984 xmlParseComment(ctxt);
9985 }
9986}
9987
9988/**
9989 * xmlParseDocument:
9990 * @ctxt: an XML parser context
9991 *
9992 * parse an XML document (and build a tree if using the standard SAX
9993 * interface).
9994 *
9995 * [1] document ::= prolog element Misc*
9996 *
9997 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9998 *
9999 * Returns 0, -1 in case of error. the parser context is augmented
10000 * as a result of the parsing.
10001 */
10002
10003int
10004xmlParseDocument(xmlParserCtxtPtr ctxt) {
10005 xmlChar start[4];
10006 xmlCharEncoding enc;
10007
10008 xmlInitParser();
10009
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010010 if ((ctxt == NULL) || (ctxt->input == NULL))
10011 return(-1);
10012
Owen Taylor3473f882001-02-23 17:55:21 +000010013 GROW;
10014
10015 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010016 * SAX: detecting the level.
10017 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010018 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010019
10020 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010021 * SAX: beginning of the document processing.
10022 */
10023 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10024 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10025
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010026 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10027 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010028 /*
10029 * Get the 4 first bytes and decode the charset
10030 * if enc != XML_CHAR_ENCODING_NONE
10031 * plug some encoding conversion routines.
10032 */
10033 start[0] = RAW;
10034 start[1] = NXT(1);
10035 start[2] = NXT(2);
10036 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010037 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010038 if (enc != XML_CHAR_ENCODING_NONE) {
10039 xmlSwitchEncoding(ctxt, enc);
10040 }
Owen Taylor3473f882001-02-23 17:55:21 +000010041 }
10042
10043
10044 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010045 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010046 }
10047
10048 /*
10049 * Check for the XMLDecl in the Prolog.
10050 */
10051 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010052 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010053
10054 /*
10055 * Note that we will switch encoding on the fly.
10056 */
10057 xmlParseXMLDecl(ctxt);
10058 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10059 /*
10060 * The XML REC instructs us to stop parsing right here
10061 */
10062 return(-1);
10063 }
10064 ctxt->standalone = ctxt->input->standalone;
10065 SKIP_BLANKS;
10066 } else {
10067 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10068 }
10069 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10070 ctxt->sax->startDocument(ctxt->userData);
10071
10072 /*
10073 * The Misc part of the Prolog
10074 */
10075 GROW;
10076 xmlParseMisc(ctxt);
10077
10078 /*
10079 * Then possibly doc type declaration(s) and more Misc
10080 * (doctypedecl Misc*)?
10081 */
10082 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010083 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010084
10085 ctxt->inSubset = 1;
10086 xmlParseDocTypeDecl(ctxt);
10087 if (RAW == '[') {
10088 ctxt->instate = XML_PARSER_DTD;
10089 xmlParseInternalSubset(ctxt);
10090 }
10091
10092 /*
10093 * Create and update the external subset.
10094 */
10095 ctxt->inSubset = 2;
10096 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10097 (!ctxt->disableSAX))
10098 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10099 ctxt->extSubSystem, ctxt->extSubURI);
10100 ctxt->inSubset = 0;
10101
Daniel Veillardac4118d2008-01-11 05:27:32 +000010102 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010103
10104 ctxt->instate = XML_PARSER_PROLOG;
10105 xmlParseMisc(ctxt);
10106 }
10107
10108 /*
10109 * Time to start parsing the tree itself
10110 */
10111 GROW;
10112 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010113 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10114 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010115 } else {
10116 ctxt->instate = XML_PARSER_CONTENT;
10117 xmlParseElement(ctxt);
10118 ctxt->instate = XML_PARSER_EPILOG;
10119
10120
10121 /*
10122 * The Misc part at the end
10123 */
10124 xmlParseMisc(ctxt);
10125
Daniel Veillard561b7f82002-03-20 21:55:57 +000010126 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010127 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010128 }
10129 ctxt->instate = XML_PARSER_EOF;
10130 }
10131
10132 /*
10133 * SAX: end of the document processing.
10134 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010135 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010136 ctxt->sax->endDocument(ctxt->userData);
10137
Daniel Veillard5997aca2002-03-18 18:36:20 +000010138 /*
10139 * Remove locally kept entity definitions if the tree was not built
10140 */
10141 if ((ctxt->myDoc != NULL) &&
10142 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10143 xmlFreeDoc(ctxt->myDoc);
10144 ctxt->myDoc = NULL;
10145 }
10146
Daniel Veillardae0765b2008-07-31 19:54:59 +000010147 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10148 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10149 if (ctxt->valid)
10150 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10151 if (ctxt->nsWellFormed)
10152 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10153 if (ctxt->options & XML_PARSE_OLD10)
10154 ctxt->myDoc->properties |= XML_DOC_OLD10;
10155 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010156 if (! ctxt->wellFormed) {
10157 ctxt->valid = 0;
10158 return(-1);
10159 }
Owen Taylor3473f882001-02-23 17:55:21 +000010160 return(0);
10161}
10162
10163/**
10164 * xmlParseExtParsedEnt:
10165 * @ctxt: an XML parser context
10166 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010167 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010168 * An external general parsed entity is well-formed if it matches the
10169 * production labeled extParsedEnt.
10170 *
10171 * [78] extParsedEnt ::= TextDecl? content
10172 *
10173 * Returns 0, -1 in case of error. the parser context is augmented
10174 * as a result of the parsing.
10175 */
10176
10177int
10178xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10179 xmlChar start[4];
10180 xmlCharEncoding enc;
10181
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010182 if ((ctxt == NULL) || (ctxt->input == NULL))
10183 return(-1);
10184
Owen Taylor3473f882001-02-23 17:55:21 +000010185 xmlDefaultSAXHandlerInit();
10186
Daniel Veillard309f81d2003-09-23 09:02:53 +000010187 xmlDetectSAX2(ctxt);
10188
Owen Taylor3473f882001-02-23 17:55:21 +000010189 GROW;
10190
10191 /*
10192 * SAX: beginning of the document processing.
10193 */
10194 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10195 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10196
10197 /*
10198 * Get the 4 first bytes and decode the charset
10199 * if enc != XML_CHAR_ENCODING_NONE
10200 * plug some encoding conversion routines.
10201 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010202 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10203 start[0] = RAW;
10204 start[1] = NXT(1);
10205 start[2] = NXT(2);
10206 start[3] = NXT(3);
10207 enc = xmlDetectCharEncoding(start, 4);
10208 if (enc != XML_CHAR_ENCODING_NONE) {
10209 xmlSwitchEncoding(ctxt, enc);
10210 }
Owen Taylor3473f882001-02-23 17:55:21 +000010211 }
10212
10213
10214 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010215 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010216 }
10217
10218 /*
10219 * Check for the XMLDecl in the Prolog.
10220 */
10221 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010222 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010223
10224 /*
10225 * Note that we will switch encoding on the fly.
10226 */
10227 xmlParseXMLDecl(ctxt);
10228 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10229 /*
10230 * The XML REC instructs us to stop parsing right here
10231 */
10232 return(-1);
10233 }
10234 SKIP_BLANKS;
10235 } else {
10236 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10237 }
10238 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10239 ctxt->sax->startDocument(ctxt->userData);
10240
10241 /*
10242 * Doing validity checking on chunk doesn't make sense
10243 */
10244 ctxt->instate = XML_PARSER_CONTENT;
10245 ctxt->validate = 0;
10246 ctxt->loadsubset = 0;
10247 ctxt->depth = 0;
10248
10249 xmlParseContent(ctxt);
10250
10251 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010252 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010253 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010254 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010255 }
10256
10257 /*
10258 * SAX: end of the document processing.
10259 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010260 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010261 ctxt->sax->endDocument(ctxt->userData);
10262
10263 if (! ctxt->wellFormed) return(-1);
10264 return(0);
10265}
10266
Daniel Veillard73b013f2003-09-30 12:36:01 +000010267#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010268/************************************************************************
10269 * *
10270 * Progressive parsing interfaces *
10271 * *
10272 ************************************************************************/
10273
10274/**
10275 * xmlParseLookupSequence:
10276 * @ctxt: an XML parser context
10277 * @first: the first char to lookup
10278 * @next: the next char to lookup or zero
10279 * @third: the next char to lookup or zero
10280 *
10281 * Try to find if a sequence (first, next, third) or just (first next) or
10282 * (first) is available in the input stream.
10283 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10284 * to avoid rescanning sequences of bytes, it DOES change the state of the
10285 * parser, do not use liberally.
10286 *
10287 * Returns the index to the current parsing point if the full sequence
10288 * is available, -1 otherwise.
10289 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010290static int
Owen Taylor3473f882001-02-23 17:55:21 +000010291xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10292 xmlChar next, xmlChar third) {
10293 int base, len;
10294 xmlParserInputPtr in;
10295 const xmlChar *buf;
10296
10297 in = ctxt->input;
10298 if (in == NULL) return(-1);
10299 base = in->cur - in->base;
10300 if (base < 0) return(-1);
10301 if (ctxt->checkIndex > base)
10302 base = ctxt->checkIndex;
10303 if (in->buf == NULL) {
10304 buf = in->base;
10305 len = in->length;
10306 } else {
10307 buf = in->buf->buffer->content;
10308 len = in->buf->buffer->use;
10309 }
10310 /* take into account the sequence length */
10311 if (third) len -= 2;
10312 else if (next) len --;
10313 for (;base < len;base++) {
10314 if (buf[base] == first) {
10315 if (third != 0) {
10316 if ((buf[base + 1] != next) ||
10317 (buf[base + 2] != third)) continue;
10318 } else if (next != 0) {
10319 if (buf[base + 1] != next) continue;
10320 }
10321 ctxt->checkIndex = 0;
10322#ifdef DEBUG_PUSH
10323 if (next == 0)
10324 xmlGenericError(xmlGenericErrorContext,
10325 "PP: lookup '%c' found at %d\n",
10326 first, base);
10327 else if (third == 0)
10328 xmlGenericError(xmlGenericErrorContext,
10329 "PP: lookup '%c%c' found at %d\n",
10330 first, next, base);
10331 else
10332 xmlGenericError(xmlGenericErrorContext,
10333 "PP: lookup '%c%c%c' found at %d\n",
10334 first, next, third, base);
10335#endif
10336 return(base - (in->cur - in->base));
10337 }
10338 }
10339 ctxt->checkIndex = base;
10340#ifdef DEBUG_PUSH
10341 if (next == 0)
10342 xmlGenericError(xmlGenericErrorContext,
10343 "PP: lookup '%c' failed\n", first);
10344 else if (third == 0)
10345 xmlGenericError(xmlGenericErrorContext,
10346 "PP: lookup '%c%c' failed\n", first, next);
10347 else
10348 xmlGenericError(xmlGenericErrorContext,
10349 "PP: lookup '%c%c%c' failed\n", first, next, third);
10350#endif
10351 return(-1);
10352}
10353
10354/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010355 * xmlParseGetLasts:
10356 * @ctxt: an XML parser context
10357 * @lastlt: pointer to store the last '<' from the input
10358 * @lastgt: pointer to store the last '>' from the input
10359 *
10360 * Lookup the last < and > in the current chunk
10361 */
10362static void
10363xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10364 const xmlChar **lastgt) {
10365 const xmlChar *tmp;
10366
10367 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10368 xmlGenericError(xmlGenericErrorContext,
10369 "Internal error: xmlParseGetLasts\n");
10370 return;
10371 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010372 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010373 tmp = ctxt->input->end;
10374 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010375 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010376 if (tmp < ctxt->input->base) {
10377 *lastlt = NULL;
10378 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010379 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010380 *lastlt = tmp;
10381 tmp++;
10382 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10383 if (*tmp == '\'') {
10384 tmp++;
10385 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10386 if (tmp < ctxt->input->end) tmp++;
10387 } else if (*tmp == '"') {
10388 tmp++;
10389 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10390 if (tmp < ctxt->input->end) tmp++;
10391 } else
10392 tmp++;
10393 }
10394 if (tmp < ctxt->input->end)
10395 *lastgt = tmp;
10396 else {
10397 tmp = *lastlt;
10398 tmp--;
10399 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10400 if (tmp >= ctxt->input->base)
10401 *lastgt = tmp;
10402 else
10403 *lastgt = NULL;
10404 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010405 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010406 } else {
10407 *lastlt = NULL;
10408 *lastgt = NULL;
10409 }
10410}
10411/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010412 * xmlCheckCdataPush:
10413 * @cur: pointer to the bock of characters
10414 * @len: length of the block in bytes
10415 *
10416 * Check that the block of characters is okay as SCdata content [20]
10417 *
10418 * Returns the number of bytes to pass if okay, a negative index where an
10419 * UTF-8 error occured otherwise
10420 */
10421static int
10422xmlCheckCdataPush(const xmlChar *utf, int len) {
10423 int ix;
10424 unsigned char c;
10425 int codepoint;
10426
10427 if ((utf == NULL) || (len <= 0))
10428 return(0);
10429
10430 for (ix = 0; ix < len;) { /* string is 0-terminated */
10431 c = utf[ix];
10432 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10433 if (c >= 0x20)
10434 ix++;
10435 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10436 ix++;
10437 else
10438 return(-ix);
10439 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10440 if (ix + 2 > len) return(ix);
10441 if ((utf[ix+1] & 0xc0 ) != 0x80)
10442 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010443 codepoint = (utf[ix] & 0x1f) << 6;
10444 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010445 if (!xmlIsCharQ(codepoint))
10446 return(-ix);
10447 ix += 2;
10448 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10449 if (ix + 3 > len) return(ix);
10450 if (((utf[ix+1] & 0xc0) != 0x80) ||
10451 ((utf[ix+2] & 0xc0) != 0x80))
10452 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010453 codepoint = (utf[ix] & 0xf) << 12;
10454 codepoint |= (utf[ix+1] & 0x3f) << 6;
10455 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010456 if (!xmlIsCharQ(codepoint))
10457 return(-ix);
10458 ix += 3;
10459 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10460 if (ix + 4 > len) return(ix);
10461 if (((utf[ix+1] & 0xc0) != 0x80) ||
10462 ((utf[ix+2] & 0xc0) != 0x80) ||
10463 ((utf[ix+3] & 0xc0) != 0x80))
10464 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010465 codepoint = (utf[ix] & 0x7) << 18;
10466 codepoint |= (utf[ix+1] & 0x3f) << 12;
10467 codepoint |= (utf[ix+2] & 0x3f) << 6;
10468 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010469 if (!xmlIsCharQ(codepoint))
10470 return(-ix);
10471 ix += 4;
10472 } else /* unknown encoding */
10473 return(-ix);
10474 }
10475 return(ix);
10476}
10477
10478/**
Owen Taylor3473f882001-02-23 17:55:21 +000010479 * xmlParseTryOrFinish:
10480 * @ctxt: an XML parser context
10481 * @terminate: last chunk indicator
10482 *
10483 * Try to progress on parsing
10484 *
10485 * Returns zero if no parsing was possible
10486 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010487static int
Owen Taylor3473f882001-02-23 17:55:21 +000010488xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10489 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010490 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010491 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010492 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010493
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010494 if (ctxt->input == NULL)
10495 return(0);
10496
Owen Taylor3473f882001-02-23 17:55:21 +000010497#ifdef DEBUG_PUSH
10498 switch (ctxt->instate) {
10499 case XML_PARSER_EOF:
10500 xmlGenericError(xmlGenericErrorContext,
10501 "PP: try EOF\n"); break;
10502 case XML_PARSER_START:
10503 xmlGenericError(xmlGenericErrorContext,
10504 "PP: try START\n"); break;
10505 case XML_PARSER_MISC:
10506 xmlGenericError(xmlGenericErrorContext,
10507 "PP: try MISC\n");break;
10508 case XML_PARSER_COMMENT:
10509 xmlGenericError(xmlGenericErrorContext,
10510 "PP: try COMMENT\n");break;
10511 case XML_PARSER_PROLOG:
10512 xmlGenericError(xmlGenericErrorContext,
10513 "PP: try PROLOG\n");break;
10514 case XML_PARSER_START_TAG:
10515 xmlGenericError(xmlGenericErrorContext,
10516 "PP: try START_TAG\n");break;
10517 case XML_PARSER_CONTENT:
10518 xmlGenericError(xmlGenericErrorContext,
10519 "PP: try CONTENT\n");break;
10520 case XML_PARSER_CDATA_SECTION:
10521 xmlGenericError(xmlGenericErrorContext,
10522 "PP: try CDATA_SECTION\n");break;
10523 case XML_PARSER_END_TAG:
10524 xmlGenericError(xmlGenericErrorContext,
10525 "PP: try END_TAG\n");break;
10526 case XML_PARSER_ENTITY_DECL:
10527 xmlGenericError(xmlGenericErrorContext,
10528 "PP: try ENTITY_DECL\n");break;
10529 case XML_PARSER_ENTITY_VALUE:
10530 xmlGenericError(xmlGenericErrorContext,
10531 "PP: try ENTITY_VALUE\n");break;
10532 case XML_PARSER_ATTRIBUTE_VALUE:
10533 xmlGenericError(xmlGenericErrorContext,
10534 "PP: try ATTRIBUTE_VALUE\n");break;
10535 case XML_PARSER_DTD:
10536 xmlGenericError(xmlGenericErrorContext,
10537 "PP: try DTD\n");break;
10538 case XML_PARSER_EPILOG:
10539 xmlGenericError(xmlGenericErrorContext,
10540 "PP: try EPILOG\n");break;
10541 case XML_PARSER_PI:
10542 xmlGenericError(xmlGenericErrorContext,
10543 "PP: try PI\n");break;
10544 case XML_PARSER_IGNORE:
10545 xmlGenericError(xmlGenericErrorContext,
10546 "PP: try IGNORE\n");break;
10547 }
10548#endif
10549
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010550 if ((ctxt->input != NULL) &&
10551 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010552 xmlSHRINK(ctxt);
10553 ctxt->checkIndex = 0;
10554 }
10555 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010556
Daniel Veillarda880b122003-04-21 21:36:41 +000010557 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010558 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010559 return(0);
10560
10561
Owen Taylor3473f882001-02-23 17:55:21 +000010562 /*
10563 * Pop-up of finished entities.
10564 */
10565 while ((RAW == 0) && (ctxt->inputNr > 1))
10566 xmlPopInput(ctxt);
10567
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010568 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010569 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010570 avail = ctxt->input->length -
10571 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010572 else {
10573 /*
10574 * If we are operating on converted input, try to flush
10575 * remainng chars to avoid them stalling in the non-converted
10576 * buffer.
10577 */
10578 if ((ctxt->input->buf->raw != NULL) &&
10579 (ctxt->input->buf->raw->use > 0)) {
10580 int base = ctxt->input->base -
10581 ctxt->input->buf->buffer->content;
10582 int current = ctxt->input->cur - ctxt->input->base;
10583
10584 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10585 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10586 ctxt->input->cur = ctxt->input->base + current;
10587 ctxt->input->end =
10588 &ctxt->input->buf->buffer->content[
10589 ctxt->input->buf->buffer->use];
10590 }
10591 avail = ctxt->input->buf->buffer->use -
10592 (ctxt->input->cur - ctxt->input->base);
10593 }
Owen Taylor3473f882001-02-23 17:55:21 +000010594 if (avail < 1)
10595 goto done;
10596 switch (ctxt->instate) {
10597 case XML_PARSER_EOF:
10598 /*
10599 * Document parsing is done !
10600 */
10601 goto done;
10602 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010603 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10604 xmlChar start[4];
10605 xmlCharEncoding enc;
10606
10607 /*
10608 * Very first chars read from the document flow.
10609 */
10610 if (avail < 4)
10611 goto done;
10612
10613 /*
10614 * Get the 4 first bytes and decode the charset
10615 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010616 * plug some encoding conversion routines,
10617 * else xmlSwitchEncoding will set to (default)
10618 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010619 */
10620 start[0] = RAW;
10621 start[1] = NXT(1);
10622 start[2] = NXT(2);
10623 start[3] = NXT(3);
10624 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010625 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010626 break;
10627 }
Owen Taylor3473f882001-02-23 17:55:21 +000010628
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010629 if (avail < 2)
10630 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010631 cur = ctxt->input->cur[0];
10632 next = ctxt->input->cur[1];
10633 if (cur == 0) {
10634 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10635 ctxt->sax->setDocumentLocator(ctxt->userData,
10636 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010637 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010638 ctxt->instate = XML_PARSER_EOF;
10639#ifdef DEBUG_PUSH
10640 xmlGenericError(xmlGenericErrorContext,
10641 "PP: entering EOF\n");
10642#endif
10643 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10644 ctxt->sax->endDocument(ctxt->userData);
10645 goto done;
10646 }
10647 if ((cur == '<') && (next == '?')) {
10648 /* PI or XML decl */
10649 if (avail < 5) return(ret);
10650 if ((!terminate) &&
10651 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10652 return(ret);
10653 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10654 ctxt->sax->setDocumentLocator(ctxt->userData,
10655 &xmlDefaultSAXLocator);
10656 if ((ctxt->input->cur[2] == 'x') &&
10657 (ctxt->input->cur[3] == 'm') &&
10658 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010659 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010660 ret += 5;
10661#ifdef DEBUG_PUSH
10662 xmlGenericError(xmlGenericErrorContext,
10663 "PP: Parsing XML Decl\n");
10664#endif
10665 xmlParseXMLDecl(ctxt);
10666 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10667 /*
10668 * The XML REC instructs us to stop parsing right
10669 * here
10670 */
10671 ctxt->instate = XML_PARSER_EOF;
10672 return(0);
10673 }
10674 ctxt->standalone = ctxt->input->standalone;
10675 if ((ctxt->encoding == NULL) &&
10676 (ctxt->input->encoding != NULL))
10677 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10678 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10679 (!ctxt->disableSAX))
10680 ctxt->sax->startDocument(ctxt->userData);
10681 ctxt->instate = XML_PARSER_MISC;
10682#ifdef DEBUG_PUSH
10683 xmlGenericError(xmlGenericErrorContext,
10684 "PP: entering MISC\n");
10685#endif
10686 } else {
10687 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10688 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10689 (!ctxt->disableSAX))
10690 ctxt->sax->startDocument(ctxt->userData);
10691 ctxt->instate = XML_PARSER_MISC;
10692#ifdef DEBUG_PUSH
10693 xmlGenericError(xmlGenericErrorContext,
10694 "PP: entering MISC\n");
10695#endif
10696 }
10697 } else {
10698 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10699 ctxt->sax->setDocumentLocator(ctxt->userData,
10700 &xmlDefaultSAXLocator);
10701 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010702 if (ctxt->version == NULL) {
10703 xmlErrMemory(ctxt, NULL);
10704 break;
10705 }
Owen Taylor3473f882001-02-23 17:55:21 +000010706 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10707 (!ctxt->disableSAX))
10708 ctxt->sax->startDocument(ctxt->userData);
10709 ctxt->instate = XML_PARSER_MISC;
10710#ifdef DEBUG_PUSH
10711 xmlGenericError(xmlGenericErrorContext,
10712 "PP: entering MISC\n");
10713#endif
10714 }
10715 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010716 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010717 const xmlChar *name;
10718 const xmlChar *prefix;
10719 const xmlChar *URI;
10720 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010721
10722 if ((avail < 2) && (ctxt->inputNr == 1))
10723 goto done;
10724 cur = ctxt->input->cur[0];
10725 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010726 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010727 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010728 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10729 ctxt->sax->endDocument(ctxt->userData);
10730 goto done;
10731 }
10732 if (!terminate) {
10733 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010734 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010735 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010736 goto done;
10737 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10738 goto done;
10739 }
10740 }
10741 if (ctxt->spaceNr == 0)
10742 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010743 else if (*ctxt->space == -2)
10744 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010745 else
10746 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010747#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010748 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010749#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010750 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010751#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010752 else
10753 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010754#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010755 if (name == NULL) {
10756 spacePop(ctxt);
10757 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010758 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10759 ctxt->sax->endDocument(ctxt->userData);
10760 goto done;
10761 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010762#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010763 /*
10764 * [ VC: Root Element Type ]
10765 * The Name in the document type declaration must match
10766 * the element type of the root element.
10767 */
10768 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10769 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10770 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010771#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010772
10773 /*
10774 * Check for an Empty Element.
10775 */
10776 if ((RAW == '/') && (NXT(1) == '>')) {
10777 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010778
10779 if (ctxt->sax2) {
10780 if ((ctxt->sax != NULL) &&
10781 (ctxt->sax->endElementNs != NULL) &&
10782 (!ctxt->disableSAX))
10783 ctxt->sax->endElementNs(ctxt->userData, name,
10784 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010785 if (ctxt->nsNr - nsNr > 0)
10786 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010787#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010788 } else {
10789 if ((ctxt->sax != NULL) &&
10790 (ctxt->sax->endElement != NULL) &&
10791 (!ctxt->disableSAX))
10792 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010793#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010794 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010795 spacePop(ctxt);
10796 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010797 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010798 } else {
10799 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010800 }
10801 break;
10802 }
10803 if (RAW == '>') {
10804 NEXT;
10805 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010806 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010807 "Couldn't find end of Start Tag %s\n",
10808 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010809 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010810 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010811 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010812 if (ctxt->sax2)
10813 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010814#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010815 else
10816 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010817#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010818
Daniel Veillarda880b122003-04-21 21:36:41 +000010819 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010820 break;
10821 }
10822 case XML_PARSER_CONTENT: {
10823 const xmlChar *test;
10824 unsigned int cons;
10825 if ((avail < 2) && (ctxt->inputNr == 1))
10826 goto done;
10827 cur = ctxt->input->cur[0];
10828 next = ctxt->input->cur[1];
10829
10830 test = CUR_PTR;
10831 cons = ctxt->input->consumed;
10832 if ((cur == '<') && (next == '/')) {
10833 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010834 break;
10835 } else if ((cur == '<') && (next == '?')) {
10836 if ((!terminate) &&
10837 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10838 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010839 xmlParsePI(ctxt);
10840 } else if ((cur == '<') && (next != '!')) {
10841 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010842 break;
10843 } else if ((cur == '<') && (next == '!') &&
10844 (ctxt->input->cur[2] == '-') &&
10845 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010846 int term;
10847
10848 if (avail < 4)
10849 goto done;
10850 ctxt->input->cur += 4;
10851 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10852 ctxt->input->cur -= 4;
10853 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010854 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010855 xmlParseComment(ctxt);
10856 ctxt->instate = XML_PARSER_CONTENT;
10857 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10858 (ctxt->input->cur[2] == '[') &&
10859 (ctxt->input->cur[3] == 'C') &&
10860 (ctxt->input->cur[4] == 'D') &&
10861 (ctxt->input->cur[5] == 'A') &&
10862 (ctxt->input->cur[6] == 'T') &&
10863 (ctxt->input->cur[7] == 'A') &&
10864 (ctxt->input->cur[8] == '[')) {
10865 SKIP(9);
10866 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010867 break;
10868 } else if ((cur == '<') && (next == '!') &&
10869 (avail < 9)) {
10870 goto done;
10871 } else if (cur == '&') {
10872 if ((!terminate) &&
10873 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10874 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010875 xmlParseReference(ctxt);
10876 } else {
10877 /* TODO Avoid the extra copy, handle directly !!! */
10878 /*
10879 * Goal of the following test is:
10880 * - minimize calls to the SAX 'character' callback
10881 * when they are mergeable
10882 * - handle an problem for isBlank when we only parse
10883 * a sequence of blank chars and the next one is
10884 * not available to check against '<' presence.
10885 * - tries to homogenize the differences in SAX
10886 * callbacks between the push and pull versions
10887 * of the parser.
10888 */
10889 if ((ctxt->inputNr == 1) &&
10890 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10891 if (!terminate) {
10892 if (ctxt->progressive) {
10893 if ((lastlt == NULL) ||
10894 (ctxt->input->cur > lastlt))
10895 goto done;
10896 } else if (xmlParseLookupSequence(ctxt,
10897 '<', 0, 0) < 0) {
10898 goto done;
10899 }
10900 }
10901 }
10902 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010903 xmlParseCharData(ctxt, 0);
10904 }
10905 /*
10906 * Pop-up of finished entities.
10907 */
10908 while ((RAW == 0) && (ctxt->inputNr > 1))
10909 xmlPopInput(ctxt);
10910 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010911 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10912 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010913 ctxt->instate = XML_PARSER_EOF;
10914 break;
10915 }
10916 break;
10917 }
10918 case XML_PARSER_END_TAG:
10919 if (avail < 2)
10920 goto done;
10921 if (!terminate) {
10922 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010923 /* > can be found unescaped in attribute values */
10924 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010925 goto done;
10926 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10927 goto done;
10928 }
10929 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010930 if (ctxt->sax2) {
10931 xmlParseEndTag2(ctxt,
10932 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10933 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010934 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010935 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010936 }
10937#ifdef LIBXML_SAX1_ENABLED
10938 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010939 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010940#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010941 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010942 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010943 } else {
10944 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010945 }
10946 break;
10947 case XML_PARSER_CDATA_SECTION: {
10948 /*
10949 * The Push mode need to have the SAX callback for
10950 * cdataBlock merge back contiguous callbacks.
10951 */
10952 int base;
10953
10954 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10955 if (base < 0) {
10956 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010957 int tmp;
10958
10959 tmp = xmlCheckCdataPush(ctxt->input->cur,
10960 XML_PARSER_BIG_BUFFER_SIZE);
10961 if (tmp < 0) {
10962 tmp = -tmp;
10963 ctxt->input->cur += tmp;
10964 goto encoding_error;
10965 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010966 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10967 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010968 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010969 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010970 else if (ctxt->sax->characters != NULL)
10971 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010972 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010973 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010974 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010975 ctxt->checkIndex = 0;
10976 }
10977 goto done;
10978 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010979 int tmp;
10980
10981 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10982 if ((tmp < 0) || (tmp != base)) {
10983 tmp = -tmp;
10984 ctxt->input->cur += tmp;
10985 goto encoding_error;
10986 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010987 if ((ctxt->sax != NULL) && (base == 0) &&
10988 (ctxt->sax->cdataBlock != NULL) &&
10989 (!ctxt->disableSAX)) {
10990 /*
10991 * Special case to provide identical behaviour
10992 * between pull and push parsers on enpty CDATA
10993 * sections
10994 */
10995 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10996 (!strncmp((const char *)&ctxt->input->cur[-9],
10997 "<![CDATA[", 9)))
10998 ctxt->sax->cdataBlock(ctxt->userData,
10999 BAD_CAST "", 0);
11000 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011001 (!ctxt->disableSAX)) {
11002 if (ctxt->sax->cdataBlock != NULL)
11003 ctxt->sax->cdataBlock(ctxt->userData,
11004 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011005 else if (ctxt->sax->characters != NULL)
11006 ctxt->sax->characters(ctxt->userData,
11007 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011008 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011009 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011010 ctxt->checkIndex = 0;
11011 ctxt->instate = XML_PARSER_CONTENT;
11012#ifdef DEBUG_PUSH
11013 xmlGenericError(xmlGenericErrorContext,
11014 "PP: entering CONTENT\n");
11015#endif
11016 }
11017 break;
11018 }
Owen Taylor3473f882001-02-23 17:55:21 +000011019 case XML_PARSER_MISC:
11020 SKIP_BLANKS;
11021 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011022 avail = ctxt->input->length -
11023 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011024 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011025 avail = ctxt->input->buf->buffer->use -
11026 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011027 if (avail < 2)
11028 goto done;
11029 cur = ctxt->input->cur[0];
11030 next = ctxt->input->cur[1];
11031 if ((cur == '<') && (next == '?')) {
11032 if ((!terminate) &&
11033 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11034 goto done;
11035#ifdef DEBUG_PUSH
11036 xmlGenericError(xmlGenericErrorContext,
11037 "PP: Parsing PI\n");
11038#endif
11039 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011040 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011041 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011042 (ctxt->input->cur[2] == '-') &&
11043 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011044 if ((!terminate) &&
11045 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11046 goto done;
11047#ifdef DEBUG_PUSH
11048 xmlGenericError(xmlGenericErrorContext,
11049 "PP: Parsing Comment\n");
11050#endif
11051 xmlParseComment(ctxt);
11052 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011053 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011054 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011055 (ctxt->input->cur[2] == 'D') &&
11056 (ctxt->input->cur[3] == 'O') &&
11057 (ctxt->input->cur[4] == 'C') &&
11058 (ctxt->input->cur[5] == 'T') &&
11059 (ctxt->input->cur[6] == 'Y') &&
11060 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011061 (ctxt->input->cur[8] == 'E')) {
11062 if ((!terminate) &&
11063 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11064 goto done;
11065#ifdef DEBUG_PUSH
11066 xmlGenericError(xmlGenericErrorContext,
11067 "PP: Parsing internal subset\n");
11068#endif
11069 ctxt->inSubset = 1;
11070 xmlParseDocTypeDecl(ctxt);
11071 if (RAW == '[') {
11072 ctxt->instate = XML_PARSER_DTD;
11073#ifdef DEBUG_PUSH
11074 xmlGenericError(xmlGenericErrorContext,
11075 "PP: entering DTD\n");
11076#endif
11077 } else {
11078 /*
11079 * Create and update the external subset.
11080 */
11081 ctxt->inSubset = 2;
11082 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11083 (ctxt->sax->externalSubset != NULL))
11084 ctxt->sax->externalSubset(ctxt->userData,
11085 ctxt->intSubName, ctxt->extSubSystem,
11086 ctxt->extSubURI);
11087 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011088 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011089 ctxt->instate = XML_PARSER_PROLOG;
11090#ifdef DEBUG_PUSH
11091 xmlGenericError(xmlGenericErrorContext,
11092 "PP: entering PROLOG\n");
11093#endif
11094 }
11095 } else if ((cur == '<') && (next == '!') &&
11096 (avail < 9)) {
11097 goto done;
11098 } else {
11099 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011100 ctxt->progressive = 1;
11101 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011102#ifdef DEBUG_PUSH
11103 xmlGenericError(xmlGenericErrorContext,
11104 "PP: entering START_TAG\n");
11105#endif
11106 }
11107 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011108 case XML_PARSER_PROLOG:
11109 SKIP_BLANKS;
11110 if (ctxt->input->buf == NULL)
11111 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11112 else
11113 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11114 if (avail < 2)
11115 goto done;
11116 cur = ctxt->input->cur[0];
11117 next = ctxt->input->cur[1];
11118 if ((cur == '<') && (next == '?')) {
11119 if ((!terminate) &&
11120 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11121 goto done;
11122#ifdef DEBUG_PUSH
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: Parsing PI\n");
11125#endif
11126 xmlParsePI(ctxt);
11127 } else if ((cur == '<') && (next == '!') &&
11128 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11129 if ((!terminate) &&
11130 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11131 goto done;
11132#ifdef DEBUG_PUSH
11133 xmlGenericError(xmlGenericErrorContext,
11134 "PP: Parsing Comment\n");
11135#endif
11136 xmlParseComment(ctxt);
11137 ctxt->instate = XML_PARSER_PROLOG;
11138 } else if ((cur == '<') && (next == '!') &&
11139 (avail < 4)) {
11140 goto done;
11141 } else {
11142 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011143 if (ctxt->progressive == 0)
11144 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011145 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011146#ifdef DEBUG_PUSH
11147 xmlGenericError(xmlGenericErrorContext,
11148 "PP: entering START_TAG\n");
11149#endif
11150 }
11151 break;
11152 case XML_PARSER_EPILOG:
11153 SKIP_BLANKS;
11154 if (ctxt->input->buf == NULL)
11155 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11156 else
11157 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11158 if (avail < 2)
11159 goto done;
11160 cur = ctxt->input->cur[0];
11161 next = ctxt->input->cur[1];
11162 if ((cur == '<') && (next == '?')) {
11163 if ((!terminate) &&
11164 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11165 goto done;
11166#ifdef DEBUG_PUSH
11167 xmlGenericError(xmlGenericErrorContext,
11168 "PP: Parsing PI\n");
11169#endif
11170 xmlParsePI(ctxt);
11171 ctxt->instate = XML_PARSER_EPILOG;
11172 } else if ((cur == '<') && (next == '!') &&
11173 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11174 if ((!terminate) &&
11175 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11176 goto done;
11177#ifdef DEBUG_PUSH
11178 xmlGenericError(xmlGenericErrorContext,
11179 "PP: Parsing Comment\n");
11180#endif
11181 xmlParseComment(ctxt);
11182 ctxt->instate = XML_PARSER_EPILOG;
11183 } else if ((cur == '<') && (next == '!') &&
11184 (avail < 4)) {
11185 goto done;
11186 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011187 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011188 ctxt->instate = XML_PARSER_EOF;
11189#ifdef DEBUG_PUSH
11190 xmlGenericError(xmlGenericErrorContext,
11191 "PP: entering EOF\n");
11192#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011193 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011194 ctxt->sax->endDocument(ctxt->userData);
11195 goto done;
11196 }
11197 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011198 case XML_PARSER_DTD: {
11199 /*
11200 * Sorry but progressive parsing of the internal subset
11201 * is not expected to be supported. We first check that
11202 * the full content of the internal subset is available and
11203 * the parsing is launched only at that point.
11204 * Internal subset ends up with "']' S? '>'" in an unescaped
11205 * section and not in a ']]>' sequence which are conditional
11206 * sections (whoever argued to keep that crap in XML deserve
11207 * a place in hell !).
11208 */
11209 int base, i;
11210 xmlChar *buf;
11211 xmlChar quote = 0;
11212
11213 base = ctxt->input->cur - ctxt->input->base;
11214 if (base < 0) return(0);
11215 if (ctxt->checkIndex > base)
11216 base = ctxt->checkIndex;
11217 buf = ctxt->input->buf->buffer->content;
11218 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11219 base++) {
11220 if (quote != 0) {
11221 if (buf[base] == quote)
11222 quote = 0;
11223 continue;
11224 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011225 if ((quote == 0) && (buf[base] == '<')) {
11226 int found = 0;
11227 /* special handling of comments */
11228 if (((unsigned int) base + 4 <
11229 ctxt->input->buf->buffer->use) &&
11230 (buf[base + 1] == '!') &&
11231 (buf[base + 2] == '-') &&
11232 (buf[base + 3] == '-')) {
11233 for (;(unsigned int) base + 3 <
11234 ctxt->input->buf->buffer->use; base++) {
11235 if ((buf[base] == '-') &&
11236 (buf[base + 1] == '-') &&
11237 (buf[base + 2] == '>')) {
11238 found = 1;
11239 base += 2;
11240 break;
11241 }
11242 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011243 if (!found) {
11244#if 0
11245 fprintf(stderr, "unfinished comment\n");
11246#endif
11247 break; /* for */
11248 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011249 continue;
11250 }
11251 }
Owen Taylor3473f882001-02-23 17:55:21 +000011252 if (buf[base] == '"') {
11253 quote = '"';
11254 continue;
11255 }
11256 if (buf[base] == '\'') {
11257 quote = '\'';
11258 continue;
11259 }
11260 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011261#if 0
11262 fprintf(stderr, "%c%c%c%c: ", buf[base],
11263 buf[base + 1], buf[base + 2], buf[base + 3]);
11264#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011265 if ((unsigned int) base +1 >=
11266 ctxt->input->buf->buffer->use)
11267 break;
11268 if (buf[base + 1] == ']') {
11269 /* conditional crap, skip both ']' ! */
11270 base++;
11271 continue;
11272 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011273 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011274 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11275 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011276 if (buf[base + i] == '>') {
11277#if 0
11278 fprintf(stderr, "found\n");
11279#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011280 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011281 }
11282 if (!IS_BLANK_CH(buf[base + i])) {
11283#if 0
11284 fprintf(stderr, "not found\n");
11285#endif
11286 goto not_end_of_int_subset;
11287 }
Owen Taylor3473f882001-02-23 17:55:21 +000011288 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011289#if 0
11290 fprintf(stderr, "end of stream\n");
11291#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011292 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011293
Owen Taylor3473f882001-02-23 17:55:21 +000011294 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011295not_end_of_int_subset:
11296 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011297 }
11298 /*
11299 * We didn't found the end of the Internal subset
11300 */
Owen Taylor3473f882001-02-23 17:55:21 +000011301#ifdef DEBUG_PUSH
11302 if (next == 0)
11303 xmlGenericError(xmlGenericErrorContext,
11304 "PP: lookup of int subset end filed\n");
11305#endif
11306 goto done;
11307
11308found_end_int_subset:
11309 xmlParseInternalSubset(ctxt);
11310 ctxt->inSubset = 2;
11311 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11312 (ctxt->sax->externalSubset != NULL))
11313 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11314 ctxt->extSubSystem, ctxt->extSubURI);
11315 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011316 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011317 ctxt->instate = XML_PARSER_PROLOG;
11318 ctxt->checkIndex = 0;
11319#ifdef DEBUG_PUSH
11320 xmlGenericError(xmlGenericErrorContext,
11321 "PP: entering PROLOG\n");
11322#endif
11323 break;
11324 }
11325 case XML_PARSER_COMMENT:
11326 xmlGenericError(xmlGenericErrorContext,
11327 "PP: internal error, state == COMMENT\n");
11328 ctxt->instate = XML_PARSER_CONTENT;
11329#ifdef DEBUG_PUSH
11330 xmlGenericError(xmlGenericErrorContext,
11331 "PP: entering CONTENT\n");
11332#endif
11333 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011334 case XML_PARSER_IGNORE:
11335 xmlGenericError(xmlGenericErrorContext,
11336 "PP: internal error, state == IGNORE");
11337 ctxt->instate = XML_PARSER_DTD;
11338#ifdef DEBUG_PUSH
11339 xmlGenericError(xmlGenericErrorContext,
11340 "PP: entering DTD\n");
11341#endif
11342 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011343 case XML_PARSER_PI:
11344 xmlGenericError(xmlGenericErrorContext,
11345 "PP: internal error, state == PI\n");
11346 ctxt->instate = XML_PARSER_CONTENT;
11347#ifdef DEBUG_PUSH
11348 xmlGenericError(xmlGenericErrorContext,
11349 "PP: entering CONTENT\n");
11350#endif
11351 break;
11352 case XML_PARSER_ENTITY_DECL:
11353 xmlGenericError(xmlGenericErrorContext,
11354 "PP: internal error, state == ENTITY_DECL\n");
11355 ctxt->instate = XML_PARSER_DTD;
11356#ifdef DEBUG_PUSH
11357 xmlGenericError(xmlGenericErrorContext,
11358 "PP: entering DTD\n");
11359#endif
11360 break;
11361 case XML_PARSER_ENTITY_VALUE:
11362 xmlGenericError(xmlGenericErrorContext,
11363 "PP: internal error, state == ENTITY_VALUE\n");
11364 ctxt->instate = XML_PARSER_CONTENT;
11365#ifdef DEBUG_PUSH
11366 xmlGenericError(xmlGenericErrorContext,
11367 "PP: entering DTD\n");
11368#endif
11369 break;
11370 case XML_PARSER_ATTRIBUTE_VALUE:
11371 xmlGenericError(xmlGenericErrorContext,
11372 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11373 ctxt->instate = XML_PARSER_START_TAG;
11374#ifdef DEBUG_PUSH
11375 xmlGenericError(xmlGenericErrorContext,
11376 "PP: entering START_TAG\n");
11377#endif
11378 break;
11379 case XML_PARSER_SYSTEM_LITERAL:
11380 xmlGenericError(xmlGenericErrorContext,
11381 "PP: internal error, state == SYSTEM_LITERAL\n");
11382 ctxt->instate = XML_PARSER_START_TAG;
11383#ifdef DEBUG_PUSH
11384 xmlGenericError(xmlGenericErrorContext,
11385 "PP: entering START_TAG\n");
11386#endif
11387 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011388 case XML_PARSER_PUBLIC_LITERAL:
11389 xmlGenericError(xmlGenericErrorContext,
11390 "PP: internal error, state == PUBLIC_LITERAL\n");
11391 ctxt->instate = XML_PARSER_START_TAG;
11392#ifdef DEBUG_PUSH
11393 xmlGenericError(xmlGenericErrorContext,
11394 "PP: entering START_TAG\n");
11395#endif
11396 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011397 }
11398 }
11399done:
11400#ifdef DEBUG_PUSH
11401 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11402#endif
11403 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011404encoding_error:
11405 {
11406 char buffer[150];
11407
11408 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11409 ctxt->input->cur[0], ctxt->input->cur[1],
11410 ctxt->input->cur[2], ctxt->input->cur[3]);
11411 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11412 "Input is not proper UTF-8, indicate encoding !\n%s",
11413 BAD_CAST buffer, NULL);
11414 }
11415 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011416}
11417
11418/**
Owen Taylor3473f882001-02-23 17:55:21 +000011419 * xmlParseChunk:
11420 * @ctxt: an XML parser context
11421 * @chunk: an char array
11422 * @size: the size in byte of the chunk
11423 * @terminate: last chunk indicator
11424 *
11425 * Parse a Chunk of memory
11426 *
11427 * Returns zero if no error, the xmlParserErrors otherwise.
11428 */
11429int
11430xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11431 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011432 int end_in_lf = 0;
11433
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011434 if (ctxt == NULL)
11435 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011436 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011437 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011438 if (ctxt->instate == XML_PARSER_START)
11439 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011440 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11441 (chunk[size - 1] == '\r')) {
11442 end_in_lf = 1;
11443 size--;
11444 }
Owen Taylor3473f882001-02-23 17:55:21 +000011445 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11446 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11447 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11448 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011449 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011450
William M. Bracka3215c72004-07-31 16:24:01 +000011451 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11452 if (res < 0) {
11453 ctxt->errNo = XML_PARSER_EOF;
11454 ctxt->disableSAX = 1;
11455 return (XML_PARSER_EOF);
11456 }
Owen Taylor3473f882001-02-23 17:55:21 +000011457 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11458 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011459 ctxt->input->end =
11460 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011461#ifdef DEBUG_PUSH
11462 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11463#endif
11464
Owen Taylor3473f882001-02-23 17:55:21 +000011465 } else if (ctxt->instate != XML_PARSER_EOF) {
11466 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11467 xmlParserInputBufferPtr in = ctxt->input->buf;
11468 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11469 (in->raw != NULL)) {
11470 int nbchars;
11471
11472 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11473 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011474 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011475 xmlGenericError(xmlGenericErrorContext,
11476 "xmlParseChunk: encoder error\n");
11477 return(XML_ERR_INVALID_ENCODING);
11478 }
11479 }
11480 }
11481 }
11482 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011483 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11484 (ctxt->input->buf != NULL)) {
11485 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11486 }
Daniel Veillard14412512005-01-21 23:53:26 +000011487 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011488 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011489 if (terminate) {
11490 /*
11491 * Check for termination
11492 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011493 int avail = 0;
11494
11495 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011496 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011497 avail = ctxt->input->length -
11498 (ctxt->input->cur - ctxt->input->base);
11499 else
11500 avail = ctxt->input->buf->buffer->use -
11501 (ctxt->input->cur - ctxt->input->base);
11502 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011503
Owen Taylor3473f882001-02-23 17:55:21 +000011504 if ((ctxt->instate != XML_PARSER_EOF) &&
11505 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011506 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011507 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011508 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011509 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011510 }
Owen Taylor3473f882001-02-23 17:55:21 +000011511 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011512 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011513 ctxt->sax->endDocument(ctxt->userData);
11514 }
11515 ctxt->instate = XML_PARSER_EOF;
11516 }
11517 return((xmlParserErrors) ctxt->errNo);
11518}
11519
11520/************************************************************************
11521 * *
11522 * I/O front end functions to the parser *
11523 * *
11524 ************************************************************************/
11525
11526/**
Owen Taylor3473f882001-02-23 17:55:21 +000011527 * xmlCreatePushParserCtxt:
11528 * @sax: a SAX handler
11529 * @user_data: The user data returned on SAX callbacks
11530 * @chunk: a pointer to an array of chars
11531 * @size: number of chars in the array
11532 * @filename: an optional file name or URI
11533 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011534 * Create a parser context for using the XML parser in push mode.
11535 * If @buffer and @size are non-NULL, the data is used to detect
11536 * the encoding. The remaining characters will be parsed so they
11537 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011538 * To allow content encoding detection, @size should be >= 4
11539 * The value of @filename is used for fetching external entities
11540 * and error/warning reports.
11541 *
11542 * Returns the new parser context or NULL
11543 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011544
Owen Taylor3473f882001-02-23 17:55:21 +000011545xmlParserCtxtPtr
11546xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11547 const char *chunk, int size, const char *filename) {
11548 xmlParserCtxtPtr ctxt;
11549 xmlParserInputPtr inputStream;
11550 xmlParserInputBufferPtr buf;
11551 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11552
11553 /*
11554 * plug some encoding conversion routines
11555 */
11556 if ((chunk != NULL) && (size >= 4))
11557 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11558
11559 buf = xmlAllocParserInputBuffer(enc);
11560 if (buf == NULL) return(NULL);
11561
11562 ctxt = xmlNewParserCtxt();
11563 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011564 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011565 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011566 return(NULL);
11567 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011568 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011569 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11570 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011571 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011572 xmlFreeParserInputBuffer(buf);
11573 xmlFreeParserCtxt(ctxt);
11574 return(NULL);
11575 }
Owen Taylor3473f882001-02-23 17:55:21 +000011576 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011577#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011578 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011579#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011580 xmlFree(ctxt->sax);
11581 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11582 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011583 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011584 xmlFreeParserInputBuffer(buf);
11585 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011586 return(NULL);
11587 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011588 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11589 if (sax->initialized == XML_SAX2_MAGIC)
11590 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11591 else
11592 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011593 if (user_data != NULL)
11594 ctxt->userData = user_data;
11595 }
11596 if (filename == NULL) {
11597 ctxt->directory = NULL;
11598 } else {
11599 ctxt->directory = xmlParserGetDirectory(filename);
11600 }
11601
11602 inputStream = xmlNewInputStream(ctxt);
11603 if (inputStream == NULL) {
11604 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011605 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011606 return(NULL);
11607 }
11608
11609 if (filename == NULL)
11610 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011611 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011612 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011613 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011614 if (inputStream->filename == NULL) {
11615 xmlFreeParserCtxt(ctxt);
11616 xmlFreeParserInputBuffer(buf);
11617 return(NULL);
11618 }
11619 }
Owen Taylor3473f882001-02-23 17:55:21 +000011620 inputStream->buf = buf;
11621 inputStream->base = inputStream->buf->buffer->content;
11622 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011623 inputStream->end =
11624 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011625
11626 inputPush(ctxt, inputStream);
11627
William M. Brack3a1cd212005-02-11 14:35:54 +000011628 /*
11629 * If the caller didn't provide an initial 'chunk' for determining
11630 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11631 * that it can be automatically determined later
11632 */
11633 if ((size == 0) || (chunk == NULL)) {
11634 ctxt->charset = XML_CHAR_ENCODING_NONE;
11635 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011636 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11637 int cur = ctxt->input->cur - ctxt->input->base;
11638
Owen Taylor3473f882001-02-23 17:55:21 +000011639 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011640
11641 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11642 ctxt->input->cur = ctxt->input->base + cur;
11643 ctxt->input->end =
11644 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011645#ifdef DEBUG_PUSH
11646 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11647#endif
11648 }
11649
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011650 if (enc != XML_CHAR_ENCODING_NONE) {
11651 xmlSwitchEncoding(ctxt, enc);
11652 }
11653
Owen Taylor3473f882001-02-23 17:55:21 +000011654 return(ctxt);
11655}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011656#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011657
11658/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011659 * xmlStopParser:
11660 * @ctxt: an XML parser context
11661 *
11662 * Blocks further parser processing
11663 */
11664void
11665xmlStopParser(xmlParserCtxtPtr ctxt) {
11666 if (ctxt == NULL)
11667 return;
11668 ctxt->instate = XML_PARSER_EOF;
11669 ctxt->disableSAX = 1;
11670 if (ctxt->input != NULL) {
11671 ctxt->input->cur = BAD_CAST"";
11672 ctxt->input->base = ctxt->input->cur;
11673 }
11674}
11675
11676/**
Owen Taylor3473f882001-02-23 17:55:21 +000011677 * xmlCreateIOParserCtxt:
11678 * @sax: a SAX handler
11679 * @user_data: The user data returned on SAX callbacks
11680 * @ioread: an I/O read function
11681 * @ioclose: an I/O close function
11682 * @ioctx: an I/O handler
11683 * @enc: the charset encoding if known
11684 *
11685 * Create a parser context for using the XML parser with an existing
11686 * I/O stream
11687 *
11688 * Returns the new parser context or NULL
11689 */
11690xmlParserCtxtPtr
11691xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11692 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11693 void *ioctx, xmlCharEncoding enc) {
11694 xmlParserCtxtPtr ctxt;
11695 xmlParserInputPtr inputStream;
11696 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011697
11698 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011699
11700 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11701 if (buf == NULL) return(NULL);
11702
11703 ctxt = xmlNewParserCtxt();
11704 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011705 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011706 return(NULL);
11707 }
11708 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011709#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011710 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011711#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011712 xmlFree(ctxt->sax);
11713 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11714 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011715 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011716 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011717 return(NULL);
11718 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011719 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11720 if (sax->initialized == XML_SAX2_MAGIC)
11721 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11722 else
11723 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011724 if (user_data != NULL)
11725 ctxt->userData = user_data;
11726 }
11727
11728 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11729 if (inputStream == NULL) {
11730 xmlFreeParserCtxt(ctxt);
11731 return(NULL);
11732 }
11733 inputPush(ctxt, inputStream);
11734
11735 return(ctxt);
11736}
11737
Daniel Veillard4432df22003-09-28 18:58:27 +000011738#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011739/************************************************************************
11740 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011741 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011742 * *
11743 ************************************************************************/
11744
11745/**
11746 * xmlIOParseDTD:
11747 * @sax: the SAX handler block or NULL
11748 * @input: an Input Buffer
11749 * @enc: the charset encoding if known
11750 *
11751 * Load and parse a DTD
11752 *
11753 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011754 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011755 */
11756
11757xmlDtdPtr
11758xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11759 xmlCharEncoding enc) {
11760 xmlDtdPtr ret = NULL;
11761 xmlParserCtxtPtr ctxt;
11762 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011763 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011764
11765 if (input == NULL)
11766 return(NULL);
11767
11768 ctxt = xmlNewParserCtxt();
11769 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011770 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011771 return(NULL);
11772 }
11773
11774 /*
11775 * Set-up the SAX context
11776 */
11777 if (sax != NULL) {
11778 if (ctxt->sax != NULL)
11779 xmlFree(ctxt->sax);
11780 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011781 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011782 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011783 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011784
11785 /*
11786 * generate a parser input from the I/O handler
11787 */
11788
Daniel Veillard43caefb2003-12-07 19:32:22 +000011789 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011790 if (pinput == NULL) {
11791 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011792 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011793 xmlFreeParserCtxt(ctxt);
11794 return(NULL);
11795 }
11796
11797 /*
11798 * plug some encoding conversion routines here.
11799 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011800 if (xmlPushInput(ctxt, pinput) < 0) {
11801 if (sax != NULL) ctxt->sax = NULL;
11802 xmlFreeParserCtxt(ctxt);
11803 return(NULL);
11804 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011805 if (enc != XML_CHAR_ENCODING_NONE) {
11806 xmlSwitchEncoding(ctxt, enc);
11807 }
Owen Taylor3473f882001-02-23 17:55:21 +000011808
11809 pinput->filename = NULL;
11810 pinput->line = 1;
11811 pinput->col = 1;
11812 pinput->base = ctxt->input->cur;
11813 pinput->cur = ctxt->input->cur;
11814 pinput->free = NULL;
11815
11816 /*
11817 * let's parse that entity knowing it's an external subset.
11818 */
11819 ctxt->inSubset = 2;
11820 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011821 if (ctxt->myDoc == NULL) {
11822 xmlErrMemory(ctxt, "New Doc failed");
11823 return(NULL);
11824 }
11825 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011826 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11827 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011828
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011829 if ((enc == XML_CHAR_ENCODING_NONE) &&
11830 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011831 /*
11832 * Get the 4 first bytes and decode the charset
11833 * if enc != XML_CHAR_ENCODING_NONE
11834 * plug some encoding conversion routines.
11835 */
11836 start[0] = RAW;
11837 start[1] = NXT(1);
11838 start[2] = NXT(2);
11839 start[3] = NXT(3);
11840 enc = xmlDetectCharEncoding(start, 4);
11841 if (enc != XML_CHAR_ENCODING_NONE) {
11842 xmlSwitchEncoding(ctxt, enc);
11843 }
11844 }
11845
Owen Taylor3473f882001-02-23 17:55:21 +000011846 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11847
11848 if (ctxt->myDoc != NULL) {
11849 if (ctxt->wellFormed) {
11850 ret = ctxt->myDoc->extSubset;
11851 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011852 if (ret != NULL) {
11853 xmlNodePtr tmp;
11854
11855 ret->doc = NULL;
11856 tmp = ret->children;
11857 while (tmp != NULL) {
11858 tmp->doc = NULL;
11859 tmp = tmp->next;
11860 }
11861 }
Owen Taylor3473f882001-02-23 17:55:21 +000011862 } else {
11863 ret = NULL;
11864 }
11865 xmlFreeDoc(ctxt->myDoc);
11866 ctxt->myDoc = NULL;
11867 }
11868 if (sax != NULL) ctxt->sax = NULL;
11869 xmlFreeParserCtxt(ctxt);
11870
11871 return(ret);
11872}
11873
11874/**
11875 * xmlSAXParseDTD:
11876 * @sax: the SAX handler block
11877 * @ExternalID: a NAME* containing the External ID of the DTD
11878 * @SystemID: a NAME* containing the URL to the DTD
11879 *
11880 * Load and parse an external subset.
11881 *
11882 * Returns the resulting xmlDtdPtr or NULL in case of error.
11883 */
11884
11885xmlDtdPtr
11886xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11887 const xmlChar *SystemID) {
11888 xmlDtdPtr ret = NULL;
11889 xmlParserCtxtPtr ctxt;
11890 xmlParserInputPtr input = NULL;
11891 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011892 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011893
11894 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11895
11896 ctxt = xmlNewParserCtxt();
11897 if (ctxt == NULL) {
11898 return(NULL);
11899 }
11900
11901 /*
11902 * Set-up the SAX context
11903 */
11904 if (sax != NULL) {
11905 if (ctxt->sax != NULL)
11906 xmlFree(ctxt->sax);
11907 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011908 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011909 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011910
11911 /*
11912 * Canonicalise the system ID
11913 */
11914 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011915 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011916 xmlFreeParserCtxt(ctxt);
11917 return(NULL);
11918 }
Owen Taylor3473f882001-02-23 17:55:21 +000011919
11920 /*
11921 * Ask the Entity resolver to load the damn thing
11922 */
11923
11924 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011925 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11926 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011927 if (input == NULL) {
11928 if (sax != NULL) ctxt->sax = NULL;
11929 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011930 if (systemIdCanonic != NULL)
11931 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011932 return(NULL);
11933 }
11934
11935 /*
11936 * plug some encoding conversion routines here.
11937 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011938 if (xmlPushInput(ctxt, input) < 0) {
11939 if (sax != NULL) ctxt->sax = NULL;
11940 xmlFreeParserCtxt(ctxt);
11941 if (systemIdCanonic != NULL)
11942 xmlFree(systemIdCanonic);
11943 return(NULL);
11944 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011945 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11946 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11947 xmlSwitchEncoding(ctxt, enc);
11948 }
Owen Taylor3473f882001-02-23 17:55:21 +000011949
11950 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011951 input->filename = (char *) systemIdCanonic;
11952 else
11953 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011954 input->line = 1;
11955 input->col = 1;
11956 input->base = ctxt->input->cur;
11957 input->cur = ctxt->input->cur;
11958 input->free = NULL;
11959
11960 /*
11961 * let's parse that entity knowing it's an external subset.
11962 */
11963 ctxt->inSubset = 2;
11964 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011965 if (ctxt->myDoc == NULL) {
11966 xmlErrMemory(ctxt, "New Doc failed");
11967 if (sax != NULL) ctxt->sax = NULL;
11968 xmlFreeParserCtxt(ctxt);
11969 return(NULL);
11970 }
11971 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011972 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11973 ExternalID, SystemID);
11974 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11975
11976 if (ctxt->myDoc != NULL) {
11977 if (ctxt->wellFormed) {
11978 ret = ctxt->myDoc->extSubset;
11979 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011980 if (ret != NULL) {
11981 xmlNodePtr tmp;
11982
11983 ret->doc = NULL;
11984 tmp = ret->children;
11985 while (tmp != NULL) {
11986 tmp->doc = NULL;
11987 tmp = tmp->next;
11988 }
11989 }
Owen Taylor3473f882001-02-23 17:55:21 +000011990 } else {
11991 ret = NULL;
11992 }
11993 xmlFreeDoc(ctxt->myDoc);
11994 ctxt->myDoc = NULL;
11995 }
11996 if (sax != NULL) ctxt->sax = NULL;
11997 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000011998
Owen Taylor3473f882001-02-23 17:55:21 +000011999 return(ret);
12000}
12001
Daniel Veillard4432df22003-09-28 18:58:27 +000012002
Owen Taylor3473f882001-02-23 17:55:21 +000012003/**
12004 * xmlParseDTD:
12005 * @ExternalID: a NAME* containing the External ID of the DTD
12006 * @SystemID: a NAME* containing the URL to the DTD
12007 *
12008 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012009 *
Owen Taylor3473f882001-02-23 17:55:21 +000012010 * Returns the resulting xmlDtdPtr or NULL in case of error.
12011 */
12012
12013xmlDtdPtr
12014xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12015 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12016}
Daniel Veillard4432df22003-09-28 18:58:27 +000012017#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012018
12019/************************************************************************
12020 * *
12021 * Front ends when parsing an Entity *
12022 * *
12023 ************************************************************************/
12024
12025/**
Owen Taylor3473f882001-02-23 17:55:21 +000012026 * xmlParseCtxtExternalEntity:
12027 * @ctx: the existing parsing context
12028 * @URL: the URL for the entity to load
12029 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012030 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012031 *
12032 * Parse an external general entity within an existing parsing context
12033 * An external general parsed entity is well-formed if it matches the
12034 * production labeled extParsedEnt.
12035 *
12036 * [78] extParsedEnt ::= TextDecl? content
12037 *
12038 * Returns 0 if the entity is well formed, -1 in case of args problem and
12039 * the parser error code otherwise
12040 */
12041
12042int
12043xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012044 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012045 xmlParserCtxtPtr ctxt;
12046 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012047 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012048 xmlSAXHandlerPtr oldsax = NULL;
12049 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012050 xmlChar start[4];
12051 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012052 xmlParserInputPtr inputStream;
12053 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012054
Daniel Veillardce682bc2004-11-05 17:22:25 +000012055 if (ctx == NULL) return(-1);
12056
Daniel Veillard0161e632008-08-28 15:36:32 +000012057 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12058 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012059 return(XML_ERR_ENTITY_LOOP);
12060 }
12061
Daniel Veillardcda96922001-08-21 10:56:31 +000012062 if (lst != NULL)
12063 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012064 if ((URL == NULL) && (ID == NULL))
12065 return(-1);
12066 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12067 return(-1);
12068
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012069 ctxt = xmlNewParserCtxt();
12070 if (ctxt == NULL) {
12071 return(-1);
12072 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012073
Owen Taylor3473f882001-02-23 17:55:21 +000012074 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000012075 ctxt->_private = ctx->_private;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012076
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012077 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12078 if (inputStream == NULL) {
12079 xmlFreeParserCtxt(ctxt);
12080 return(-1);
12081 }
12082
12083 inputPush(ctxt, inputStream);
12084
12085 if ((ctxt->directory == NULL) && (directory == NULL))
12086 directory = xmlParserGetDirectory((char *)URL);
12087 if ((ctxt->directory == NULL) && (directory != NULL))
12088 ctxt->directory = directory;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012089
Owen Taylor3473f882001-02-23 17:55:21 +000012090 oldsax = ctxt->sax;
12091 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012092 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012093 newDoc = xmlNewDoc(BAD_CAST "1.0");
12094 if (newDoc == NULL) {
12095 xmlFreeParserCtxt(ctxt);
12096 return(-1);
12097 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012098 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012099 if (ctx->myDoc->dict) {
12100 newDoc->dict = ctx->myDoc->dict;
12101 xmlDictReference(newDoc->dict);
12102 }
Owen Taylor3473f882001-02-23 17:55:21 +000012103 if (ctx->myDoc != NULL) {
12104 newDoc->intSubset = ctx->myDoc->intSubset;
12105 newDoc->extSubset = ctx->myDoc->extSubset;
12106 }
12107 if (ctx->myDoc->URL != NULL) {
12108 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12109 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012110 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12111 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012112 ctxt->sax = oldsax;
12113 xmlFreeParserCtxt(ctxt);
12114 newDoc->intSubset = NULL;
12115 newDoc->extSubset = NULL;
12116 xmlFreeDoc(newDoc);
12117 return(-1);
12118 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012119 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012120 nodePush(ctxt, newDoc->children);
12121 if (ctx->myDoc == NULL) {
12122 ctxt->myDoc = newDoc;
12123 } else {
12124 ctxt->myDoc = ctx->myDoc;
12125 newDoc->children->doc = ctx->myDoc;
12126 }
12127
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012128 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012129 * Get the 4 first bytes and decode the charset
12130 * if enc != XML_CHAR_ENCODING_NONE
12131 * plug some encoding conversion routines.
12132 */
12133 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012134 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12135 start[0] = RAW;
12136 start[1] = NXT(1);
12137 start[2] = NXT(2);
12138 start[3] = NXT(3);
12139 enc = xmlDetectCharEncoding(start, 4);
12140 if (enc != XML_CHAR_ENCODING_NONE) {
12141 xmlSwitchEncoding(ctxt, enc);
12142 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012143 }
12144
Owen Taylor3473f882001-02-23 17:55:21 +000012145 /*
12146 * Parse a possible text declaration first
12147 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012148 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012149 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012150 /*
12151 * An XML-1.0 document can't reference an entity not XML-1.0
12152 */
12153 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12154 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12155 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12156 "Version mismatch between document and entity\n");
12157 }
Owen Taylor3473f882001-02-23 17:55:21 +000012158 }
12159
12160 /*
12161 * Doing validity checking on chunk doesn't make sense
12162 */
12163 ctxt->instate = XML_PARSER_CONTENT;
12164 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012165 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012166 ctxt->loadsubset = ctx->loadsubset;
12167 ctxt->depth = ctx->depth + 1;
12168 ctxt->replaceEntities = ctx->replaceEntities;
12169 if (ctxt->validate) {
12170 ctxt->vctxt.error = ctx->vctxt.error;
12171 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012172 } else {
12173 ctxt->vctxt.error = NULL;
12174 ctxt->vctxt.warning = NULL;
12175 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012176 ctxt->vctxt.nodeTab = NULL;
12177 ctxt->vctxt.nodeNr = 0;
12178 ctxt->vctxt.nodeMax = 0;
12179 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012180 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12181 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012182 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12183 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12184 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012185 ctxt->dictNames = ctx->dictNames;
12186 ctxt->attsDefault = ctx->attsDefault;
12187 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012188 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012189
12190 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012191
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012192 ctx->validate = ctxt->validate;
12193 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012194 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012195 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012196 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012197 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012198 }
12199 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012200 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012201 }
12202
12203 if (!ctxt->wellFormed) {
12204 if (ctxt->errNo == 0)
12205 ret = 1;
12206 else
12207 ret = ctxt->errNo;
12208 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012209 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012210 xmlNodePtr cur;
12211
12212 /*
12213 * Return the newly created nodeset after unlinking it from
12214 * they pseudo parent.
12215 */
12216 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012217 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012218 while (cur != NULL) {
12219 cur->parent = NULL;
12220 cur = cur->next;
12221 }
12222 newDoc->children->children = NULL;
12223 }
12224 ret = 0;
12225 }
12226 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012227 ctxt->dict = NULL;
12228 ctxt->attsDefault = NULL;
12229 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012230 xmlFreeParserCtxt(ctxt);
12231 newDoc->intSubset = NULL;
12232 newDoc->extSubset = NULL;
12233 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012234
Owen Taylor3473f882001-02-23 17:55:21 +000012235 return(ret);
12236}
12237
12238/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012239 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012240 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012241 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012242 * @sax: the SAX handler bloc (possibly NULL)
12243 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12244 * @depth: Used for loop detection, use 0
12245 * @URL: the URL for the entity to load
12246 * @ID: the System ID for the entity to load
12247 * @list: the return value for the set of parsed nodes
12248 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012249 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012250 *
12251 * Returns 0 if the entity is well formed, -1 in case of args problem and
12252 * the parser error code otherwise
12253 */
12254
Daniel Veillard7d515752003-09-26 19:12:37 +000012255static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012256xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12257 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012258 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012259 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012260 xmlParserCtxtPtr ctxt;
12261 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012262 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012263 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012264 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012265 xmlChar start[4];
12266 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012267
Daniel Veillard0161e632008-08-28 15:36:32 +000012268 if (((depth > 40) &&
12269 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12270 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012271 return(XML_ERR_ENTITY_LOOP);
12272 }
12273
Owen Taylor3473f882001-02-23 17:55:21 +000012274 if (list != NULL)
12275 *list = NULL;
12276 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012277 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012278 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012279 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012280
12281
12282 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000012283 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012284 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012285 if (oldctxt != NULL) {
12286 ctxt->_private = oldctxt->_private;
12287 ctxt->loadsubset = oldctxt->loadsubset;
12288 ctxt->validate = oldctxt->validate;
12289 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012290 ctxt->record_info = oldctxt->record_info;
12291 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12292 ctxt->node_seq.length = oldctxt->node_seq.length;
12293 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012294 } else {
12295 /*
12296 * Doing validity checking on chunk without context
12297 * doesn't make sense
12298 */
12299 ctxt->_private = NULL;
12300 ctxt->validate = 0;
12301 ctxt->external = 2;
12302 ctxt->loadsubset = 0;
12303 }
Owen Taylor3473f882001-02-23 17:55:21 +000012304 if (sax != NULL) {
12305 oldsax = ctxt->sax;
12306 ctxt->sax = sax;
12307 if (user_data != NULL)
12308 ctxt->userData = user_data;
12309 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012310 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012311 newDoc = xmlNewDoc(BAD_CAST "1.0");
12312 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012313 ctxt->node_seq.maximum = 0;
12314 ctxt->node_seq.length = 0;
12315 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012316 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012317 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012318 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012319 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012320 newDoc->intSubset = doc->intSubset;
12321 newDoc->extSubset = doc->extSubset;
12322 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012323 xmlDictReference(newDoc->dict);
12324
Owen Taylor3473f882001-02-23 17:55:21 +000012325 if (doc->URL != NULL) {
12326 newDoc->URL = xmlStrdup(doc->URL);
12327 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012328 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12329 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012330 if (sax != NULL)
12331 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012332 ctxt->node_seq.maximum = 0;
12333 ctxt->node_seq.length = 0;
12334 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012335 xmlFreeParserCtxt(ctxt);
12336 newDoc->intSubset = NULL;
12337 newDoc->extSubset = NULL;
12338 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012339 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012340 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012341 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012342 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012343 ctxt->myDoc = doc;
12344 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012345
Daniel Veillard0161e632008-08-28 15:36:32 +000012346 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012347 * Get the 4 first bytes and decode the charset
12348 * if enc != XML_CHAR_ENCODING_NONE
12349 * plug some encoding conversion routines.
12350 */
12351 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012352 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12353 start[0] = RAW;
12354 start[1] = NXT(1);
12355 start[2] = NXT(2);
12356 start[3] = NXT(3);
12357 enc = xmlDetectCharEncoding(start, 4);
12358 if (enc != XML_CHAR_ENCODING_NONE) {
12359 xmlSwitchEncoding(ctxt, enc);
12360 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012361 }
12362
Owen Taylor3473f882001-02-23 17:55:21 +000012363 /*
12364 * Parse a possible text declaration first
12365 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012366 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012367 xmlParseTextDecl(ctxt);
12368 }
12369
Owen Taylor3473f882001-02-23 17:55:21 +000012370 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012371 ctxt->depth = depth;
12372
12373 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012374
Daniel Veillard561b7f82002-03-20 21:55:57 +000012375 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012376 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012377 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012378 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012379 }
12380 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012381 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012382 }
12383
12384 if (!ctxt->wellFormed) {
12385 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012386 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012387 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012388 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012389 } else {
12390 if (list != NULL) {
12391 xmlNodePtr cur;
12392
12393 /*
12394 * Return the newly created nodeset after unlinking it from
12395 * they pseudo parent.
12396 */
12397 cur = newDoc->children->children;
12398 *list = cur;
12399 while (cur != NULL) {
12400 cur->parent = NULL;
12401 cur = cur->next;
12402 }
12403 newDoc->children->children = NULL;
12404 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012405 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012406 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012407
12408 /*
12409 * Record in the parent context the number of entities replacement
12410 * done when parsing that reference.
12411 */
12412 oldctxt->nbentities += ctxt->nbentities;
12413 /*
12414 * Also record the size of the entity parsed
12415 */
12416 if (ctxt->input != NULL) {
12417 oldctxt->sizeentities += ctxt->input->consumed;
12418 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12419 }
12420 /*
12421 * And record the last error if any
12422 */
12423 if (ctxt->lastError.code != XML_ERR_OK)
12424 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12425
Owen Taylor3473f882001-02-23 17:55:21 +000012426 if (sax != NULL)
12427 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012428 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12429 oldctxt->node_seq.length = ctxt->node_seq.length;
12430 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012431 ctxt->node_seq.maximum = 0;
12432 ctxt->node_seq.length = 0;
12433 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012434 xmlFreeParserCtxt(ctxt);
12435 newDoc->intSubset = NULL;
12436 newDoc->extSubset = NULL;
12437 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012438
Owen Taylor3473f882001-02-23 17:55:21 +000012439 return(ret);
12440}
12441
Daniel Veillard81273902003-09-30 00:43:48 +000012442#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012443/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012444 * xmlParseExternalEntity:
12445 * @doc: the document the chunk pertains to
12446 * @sax: the SAX handler bloc (possibly NULL)
12447 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12448 * @depth: Used for loop detection, use 0
12449 * @URL: the URL for the entity to load
12450 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012451 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012452 *
12453 * Parse an external general entity
12454 * An external general parsed entity is well-formed if it matches the
12455 * production labeled extParsedEnt.
12456 *
12457 * [78] extParsedEnt ::= TextDecl? content
12458 *
12459 * Returns 0 if the entity is well formed, -1 in case of args problem and
12460 * the parser error code otherwise
12461 */
12462
12463int
12464xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012465 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012466 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012467 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012468}
12469
12470/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012471 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012472 * @doc: the document the chunk pertains to
12473 * @sax: the SAX handler bloc (possibly NULL)
12474 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12475 * @depth: Used for loop detection, use 0
12476 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012477 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012478 *
12479 * Parse a well-balanced chunk of an XML document
12480 * called by the parser
12481 * The allowed sequence for the Well Balanced Chunk is the one defined by
12482 * the content production in the XML grammar:
12483 *
12484 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12485 *
12486 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12487 * the parser error code otherwise
12488 */
12489
12490int
12491xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012492 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012493 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12494 depth, string, lst, 0 );
12495}
Daniel Veillard81273902003-09-30 00:43:48 +000012496#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012497
12498/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012499 * xmlParseBalancedChunkMemoryInternal:
12500 * @oldctxt: the existing parsing context
12501 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12502 * @user_data: the user data field for the parser context
12503 * @lst: the return value for the set of parsed nodes
12504 *
12505 *
12506 * Parse a well-balanced chunk of an XML document
12507 * called by the parser
12508 * The allowed sequence for the Well Balanced Chunk is the one defined by
12509 * the content production in the XML grammar:
12510 *
12511 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12512 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012513 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12514 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012515 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012516 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012517 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012518 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012519static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012520xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12521 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12522 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012523 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012524 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012525 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012526 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012527 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012528 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012529 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012530
Daniel Veillard0161e632008-08-28 15:36:32 +000012531 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12532 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012533 return(XML_ERR_ENTITY_LOOP);
12534 }
12535
12536
12537 if (lst != NULL)
12538 *lst = NULL;
12539 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012540 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012541
12542 size = xmlStrlen(string);
12543
12544 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012545 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012546 if (user_data != NULL)
12547 ctxt->userData = user_data;
12548 else
12549 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012550 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12551 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012552 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12553 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12554 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012555
12556 oldsax = ctxt->sax;
12557 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012558 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012559 ctxt->replaceEntities = oldctxt->replaceEntities;
12560 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012561
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012562 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012563 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012564 newDoc = xmlNewDoc(BAD_CAST "1.0");
12565 if (newDoc == NULL) {
12566 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012567 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012568 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012569 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012570 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012571 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012572 newDoc->dict = ctxt->dict;
12573 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012574 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012575 } else {
12576 ctxt->myDoc = oldctxt->myDoc;
12577 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012578 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012579 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012580 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12581 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012582 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012583 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012584 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012585 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012586 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012587 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012588 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012589 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012590 ctxt->myDoc->children = NULL;
12591 ctxt->myDoc->last = NULL;
12592 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012593 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012594 ctxt->instate = XML_PARSER_CONTENT;
12595 ctxt->depth = oldctxt->depth + 1;
12596
Daniel Veillard328f48c2002-11-15 15:24:34 +000012597 ctxt->validate = 0;
12598 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012599 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12600 /*
12601 * ID/IDREF registration will be done in xmlValidateElement below
12602 */
12603 ctxt->loadsubset |= XML_SKIP_IDS;
12604 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012605 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012606 ctxt->attsDefault = oldctxt->attsDefault;
12607 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012608
Daniel Veillard68e9e742002-11-16 15:35:11 +000012609 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012610 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012611 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012612 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012613 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012614 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012615 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012616 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012617 }
12618
12619 if (!ctxt->wellFormed) {
12620 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012621 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012622 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012623 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012624 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012625 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012626 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012627
William M. Brack7b9154b2003-09-27 19:23:50 +000012628 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012629 xmlNodePtr cur;
12630
12631 /*
12632 * Return the newly created nodeset after unlinking it from
12633 * they pseudo parent.
12634 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012635 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012636 *lst = cur;
12637 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012638#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012639 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12640 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12641 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012642 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12643 oldctxt->myDoc, cur);
12644 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012645#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012646 cur->parent = NULL;
12647 cur = cur->next;
12648 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012649 ctxt->myDoc->children->children = NULL;
12650 }
12651 if (ctxt->myDoc != NULL) {
12652 xmlFreeNode(ctxt->myDoc->children);
12653 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012654 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012655 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012656
12657 /*
12658 * Record in the parent context the number of entities replacement
12659 * done when parsing that reference.
12660 */
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012661 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard0161e632008-08-28 15:36:32 +000012662 /*
12663 * Also record the last error if any
12664 */
12665 if (ctxt->lastError.code != XML_ERR_OK)
12666 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12667
Daniel Veillard328f48c2002-11-15 15:24:34 +000012668 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012669 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012670 ctxt->attsDefault = NULL;
12671 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012672 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012673 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012674 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012675 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012676
Daniel Veillard328f48c2002-11-15 15:24:34 +000012677 return(ret);
12678}
12679
Daniel Veillard29b17482004-08-16 00:39:03 +000012680/**
12681 * xmlParseInNodeContext:
12682 * @node: the context node
12683 * @data: the input string
12684 * @datalen: the input string length in bytes
12685 * @options: a combination of xmlParserOption
12686 * @lst: the return value for the set of parsed nodes
12687 *
12688 * Parse a well-balanced chunk of an XML document
12689 * within the context (DTD, namespaces, etc ...) of the given node.
12690 *
12691 * The allowed sequence for the data is a Well Balanced Chunk defined by
12692 * the content production in the XML grammar:
12693 *
12694 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12695 *
12696 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12697 * error code otherwise
12698 */
12699xmlParserErrors
12700xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12701 int options, xmlNodePtr *lst) {
12702#ifdef SAX2
12703 xmlParserCtxtPtr ctxt;
12704 xmlDocPtr doc = NULL;
12705 xmlNodePtr fake, cur;
12706 int nsnr = 0;
12707
12708 xmlParserErrors ret = XML_ERR_OK;
12709
12710 /*
12711 * check all input parameters, grab the document
12712 */
12713 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12714 return(XML_ERR_INTERNAL_ERROR);
12715 switch (node->type) {
12716 case XML_ELEMENT_NODE:
12717 case XML_ATTRIBUTE_NODE:
12718 case XML_TEXT_NODE:
12719 case XML_CDATA_SECTION_NODE:
12720 case XML_ENTITY_REF_NODE:
12721 case XML_PI_NODE:
12722 case XML_COMMENT_NODE:
12723 case XML_DOCUMENT_NODE:
12724 case XML_HTML_DOCUMENT_NODE:
12725 break;
12726 default:
12727 return(XML_ERR_INTERNAL_ERROR);
12728
12729 }
12730 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12731 (node->type != XML_DOCUMENT_NODE) &&
12732 (node->type != XML_HTML_DOCUMENT_NODE))
12733 node = node->parent;
12734 if (node == NULL)
12735 return(XML_ERR_INTERNAL_ERROR);
12736 if (node->type == XML_ELEMENT_NODE)
12737 doc = node->doc;
12738 else
12739 doc = (xmlDocPtr) node;
12740 if (doc == NULL)
12741 return(XML_ERR_INTERNAL_ERROR);
12742
12743 /*
12744 * allocate a context and set-up everything not related to the
12745 * node position in the tree
12746 */
12747 if (doc->type == XML_DOCUMENT_NODE)
12748 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12749#ifdef LIBXML_HTML_ENABLED
12750 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12751 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12752#endif
12753 else
12754 return(XML_ERR_INTERNAL_ERROR);
12755
12756 if (ctxt == NULL)
12757 return(XML_ERR_NO_MEMORY);
12758 fake = xmlNewComment(NULL);
12759 if (fake == NULL) {
12760 xmlFreeParserCtxt(ctxt);
12761 return(XML_ERR_NO_MEMORY);
12762 }
12763 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012764
12765 /*
12766 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12767 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12768 * we must wait until the last moment to free the original one.
12769 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012770 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012771 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012772 xmlDictFree(ctxt->dict);
12773 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012774 } else
12775 options |= XML_PARSE_NODICT;
12776
Daniel Veillard37334572008-07-31 08:20:02 +000012777 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012778 xmlDetectSAX2(ctxt);
12779 ctxt->myDoc = doc;
12780
12781 if (node->type == XML_ELEMENT_NODE) {
12782 nodePush(ctxt, node);
12783 /*
12784 * initialize the SAX2 namespaces stack
12785 */
12786 cur = node;
12787 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12788 xmlNsPtr ns = cur->nsDef;
12789 const xmlChar *iprefix, *ihref;
12790
12791 while (ns != NULL) {
12792 if (ctxt->dict) {
12793 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12794 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12795 } else {
12796 iprefix = ns->prefix;
12797 ihref = ns->href;
12798 }
12799
12800 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12801 nsPush(ctxt, iprefix, ihref);
12802 nsnr++;
12803 }
12804 ns = ns->next;
12805 }
12806 cur = cur->parent;
12807 }
12808 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012809 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012810
12811 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12812 /*
12813 * ID/IDREF registration will be done in xmlValidateElement below
12814 */
12815 ctxt->loadsubset |= XML_SKIP_IDS;
12816 }
12817
Daniel Veillard499cc922006-01-18 17:22:35 +000012818#ifdef LIBXML_HTML_ENABLED
12819 if (doc->type == XML_HTML_DOCUMENT_NODE)
12820 __htmlParseContent(ctxt);
12821 else
12822#endif
12823 xmlParseContent(ctxt);
12824
Daniel Veillard29b17482004-08-16 00:39:03 +000012825 nsPop(ctxt, nsnr);
12826 if ((RAW == '<') && (NXT(1) == '/')) {
12827 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12828 } else if (RAW != 0) {
12829 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12830 }
12831 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12832 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12833 ctxt->wellFormed = 0;
12834 }
12835
12836 if (!ctxt->wellFormed) {
12837 if (ctxt->errNo == 0)
12838 ret = XML_ERR_INTERNAL_ERROR;
12839 else
12840 ret = (xmlParserErrors)ctxt->errNo;
12841 } else {
12842 ret = XML_ERR_OK;
12843 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012844
Daniel Veillard29b17482004-08-16 00:39:03 +000012845 /*
12846 * Return the newly created nodeset after unlinking it from
12847 * the pseudo sibling.
12848 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012849
Daniel Veillard29b17482004-08-16 00:39:03 +000012850 cur = fake->next;
12851 fake->next = NULL;
12852 node->last = fake;
12853
12854 if (cur != NULL) {
12855 cur->prev = NULL;
12856 }
12857
12858 *lst = cur;
12859
12860 while (cur != NULL) {
12861 cur->parent = NULL;
12862 cur = cur->next;
12863 }
12864
12865 xmlUnlinkNode(fake);
12866 xmlFreeNode(fake);
12867
12868
12869 if (ret != XML_ERR_OK) {
12870 xmlFreeNodeList(*lst);
12871 *lst = NULL;
12872 }
William M. Brackc3f81342004-10-03 01:22:44 +000012873
William M. Brackb7b54de2004-10-06 16:38:01 +000012874 if (doc->dict != NULL)
12875 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012876 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012877
Daniel Veillard29b17482004-08-16 00:39:03 +000012878 return(ret);
12879#else /* !SAX2 */
12880 return(XML_ERR_INTERNAL_ERROR);
12881#endif
12882}
12883
Daniel Veillard81273902003-09-30 00:43:48 +000012884#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012885/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012886 * xmlParseBalancedChunkMemoryRecover:
12887 * @doc: the document the chunk pertains to
12888 * @sax: the SAX handler bloc (possibly NULL)
12889 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12890 * @depth: Used for loop detection, use 0
12891 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12892 * @lst: the return value for the set of parsed nodes
12893 * @recover: return nodes even if the data is broken (use 0)
12894 *
12895 *
12896 * Parse a well-balanced chunk of an XML document
12897 * called by the parser
12898 * The allowed sequence for the Well Balanced Chunk is the one defined by
12899 * the content production in the XML grammar:
12900 *
12901 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12902 *
12903 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12904 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012905 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012906 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012907 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12908 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012909 */
12910int
12911xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012912 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012913 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012914 xmlParserCtxtPtr ctxt;
12915 xmlDocPtr newDoc;
12916 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012917 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012918 int size;
12919 int ret = 0;
12920
Daniel Veillard0161e632008-08-28 15:36:32 +000012921 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000012922 return(XML_ERR_ENTITY_LOOP);
12923 }
12924
12925
Daniel Veillardcda96922001-08-21 10:56:31 +000012926 if (lst != NULL)
12927 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012928 if (string == NULL)
12929 return(-1);
12930
12931 size = xmlStrlen(string);
12932
12933 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12934 if (ctxt == NULL) return(-1);
12935 ctxt->userData = ctxt;
12936 if (sax != NULL) {
12937 oldsax = ctxt->sax;
12938 ctxt->sax = sax;
12939 if (user_data != NULL)
12940 ctxt->userData = user_data;
12941 }
12942 newDoc = xmlNewDoc(BAD_CAST "1.0");
12943 if (newDoc == NULL) {
12944 xmlFreeParserCtxt(ctxt);
12945 return(-1);
12946 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012947 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012948 if ((doc != NULL) && (doc->dict != NULL)) {
12949 xmlDictFree(ctxt->dict);
12950 ctxt->dict = doc->dict;
12951 xmlDictReference(ctxt->dict);
12952 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12953 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12954 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12955 ctxt->dictNames = 1;
12956 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012957 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012958 }
Owen Taylor3473f882001-02-23 17:55:21 +000012959 if (doc != NULL) {
12960 newDoc->intSubset = doc->intSubset;
12961 newDoc->extSubset = doc->extSubset;
12962 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012963 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12964 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012965 if (sax != NULL)
12966 ctxt->sax = oldsax;
12967 xmlFreeParserCtxt(ctxt);
12968 newDoc->intSubset = NULL;
12969 newDoc->extSubset = NULL;
12970 xmlFreeDoc(newDoc);
12971 return(-1);
12972 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012973 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12974 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012975 if (doc == NULL) {
12976 ctxt->myDoc = newDoc;
12977 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012978 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012979 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012980 /* Ensure that doc has XML spec namespace */
12981 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12982 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012983 }
12984 ctxt->instate = XML_PARSER_CONTENT;
12985 ctxt->depth = depth;
12986
12987 /*
12988 * Doing validity checking on chunk doesn't make sense
12989 */
12990 ctxt->validate = 0;
12991 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012992 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012993
Daniel Veillardb39bc392002-10-26 19:29:51 +000012994 if ( doc != NULL ){
12995 content = doc->children;
12996 doc->children = NULL;
12997 xmlParseContent(ctxt);
12998 doc->children = content;
12999 }
13000 else {
13001 xmlParseContent(ctxt);
13002 }
Owen Taylor3473f882001-02-23 17:55:21 +000013003 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013004 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013005 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013006 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013007 }
13008 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013009 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013010 }
13011
13012 if (!ctxt->wellFormed) {
13013 if (ctxt->errNo == 0)
13014 ret = 1;
13015 else
13016 ret = ctxt->errNo;
13017 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013018 ret = 0;
13019 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013020
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013021 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13022 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013023
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013024 /*
13025 * Return the newly created nodeset after unlinking it from
13026 * they pseudo parent.
13027 */
13028 cur = newDoc->children->children;
13029 *lst = cur;
13030 while (cur != NULL) {
13031 xmlSetTreeDoc(cur, doc);
13032 cur->parent = NULL;
13033 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013034 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013035 newDoc->children->children = NULL;
13036 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013037
13038 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013039 ctxt->sax = oldsax;
13040 xmlFreeParserCtxt(ctxt);
13041 newDoc->intSubset = NULL;
13042 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013043 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013044 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013045
Owen Taylor3473f882001-02-23 17:55:21 +000013046 return(ret);
13047}
13048
13049/**
13050 * xmlSAXParseEntity:
13051 * @sax: the SAX handler block
13052 * @filename: the filename
13053 *
13054 * parse an XML external entity out of context and build a tree.
13055 * It use the given SAX function block to handle the parsing callback.
13056 * If sax is NULL, fallback to the default DOM tree building routines.
13057 *
13058 * [78] extParsedEnt ::= TextDecl? content
13059 *
13060 * This correspond to a "Well Balanced" chunk
13061 *
13062 * Returns the resulting document tree
13063 */
13064
13065xmlDocPtr
13066xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13067 xmlDocPtr ret;
13068 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013069
13070 ctxt = xmlCreateFileParserCtxt(filename);
13071 if (ctxt == NULL) {
13072 return(NULL);
13073 }
13074 if (sax != NULL) {
13075 if (ctxt->sax != NULL)
13076 xmlFree(ctxt->sax);
13077 ctxt->sax = sax;
13078 ctxt->userData = NULL;
13079 }
13080
Owen Taylor3473f882001-02-23 17:55:21 +000013081 xmlParseExtParsedEnt(ctxt);
13082
13083 if (ctxt->wellFormed)
13084 ret = ctxt->myDoc;
13085 else {
13086 ret = NULL;
13087 xmlFreeDoc(ctxt->myDoc);
13088 ctxt->myDoc = NULL;
13089 }
13090 if (sax != NULL)
13091 ctxt->sax = NULL;
13092 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013093
Owen Taylor3473f882001-02-23 17:55:21 +000013094 return(ret);
13095}
13096
13097/**
13098 * xmlParseEntity:
13099 * @filename: the filename
13100 *
13101 * parse an XML external entity out of context and build a tree.
13102 *
13103 * [78] extParsedEnt ::= TextDecl? content
13104 *
13105 * This correspond to a "Well Balanced" chunk
13106 *
13107 * Returns the resulting document tree
13108 */
13109
13110xmlDocPtr
13111xmlParseEntity(const char *filename) {
13112 return(xmlSAXParseEntity(NULL, filename));
13113}
Daniel Veillard81273902003-09-30 00:43:48 +000013114#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013115
13116/**
13117 * xmlCreateEntityParserCtxt:
13118 * @URL: the entity URL
13119 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013120 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000013121 *
13122 * Create a parser context for an external entity
13123 * Automatic support for ZLIB/Compress compressed document is provided
13124 * by default if found at compile-time.
13125 *
13126 * Returns the new parser context or NULL
13127 */
13128xmlParserCtxtPtr
13129xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13130 const xmlChar *base) {
13131 xmlParserCtxtPtr ctxt;
13132 xmlParserInputPtr inputStream;
13133 char *directory = NULL;
13134 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013135
Owen Taylor3473f882001-02-23 17:55:21 +000013136 ctxt = xmlNewParserCtxt();
13137 if (ctxt == NULL) {
13138 return(NULL);
13139 }
13140
13141 uri = xmlBuildURI(URL, base);
13142
13143 if (uri == NULL) {
13144 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13145 if (inputStream == NULL) {
13146 xmlFreeParserCtxt(ctxt);
13147 return(NULL);
13148 }
13149
13150 inputPush(ctxt, inputStream);
13151
13152 if ((ctxt->directory == NULL) && (directory == NULL))
13153 directory = xmlParserGetDirectory((char *)URL);
13154 if ((ctxt->directory == NULL) && (directory != NULL))
13155 ctxt->directory = directory;
13156 } else {
13157 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13158 if (inputStream == NULL) {
13159 xmlFree(uri);
13160 xmlFreeParserCtxt(ctxt);
13161 return(NULL);
13162 }
13163
13164 inputPush(ctxt, inputStream);
13165
13166 if ((ctxt->directory == NULL) && (directory == NULL))
13167 directory = xmlParserGetDirectory((char *)uri);
13168 if ((ctxt->directory == NULL) && (directory != NULL))
13169 ctxt->directory = directory;
13170 xmlFree(uri);
13171 }
Owen Taylor3473f882001-02-23 17:55:21 +000013172 return(ctxt);
13173}
13174
13175/************************************************************************
13176 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013177 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013178 * *
13179 ************************************************************************/
13180
13181/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013182 * xmlCreateURLParserCtxt:
13183 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013184 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013185 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013186 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013187 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013188 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013189 *
13190 * Returns the new parser context or NULL
13191 */
13192xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013193xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013194{
13195 xmlParserCtxtPtr ctxt;
13196 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013197 char *directory = NULL;
13198
Owen Taylor3473f882001-02-23 17:55:21 +000013199 ctxt = xmlNewParserCtxt();
13200 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013201 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013202 return(NULL);
13203 }
13204
Daniel Veillarddf292f72005-01-16 19:00:15 +000013205 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013206 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013207 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013208
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013209 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013210 if (inputStream == NULL) {
13211 xmlFreeParserCtxt(ctxt);
13212 return(NULL);
13213 }
13214
Owen Taylor3473f882001-02-23 17:55:21 +000013215 inputPush(ctxt, inputStream);
13216 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013217 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013218 if ((ctxt->directory == NULL) && (directory != NULL))
13219 ctxt->directory = directory;
13220
13221 return(ctxt);
13222}
13223
Daniel Veillard61b93382003-11-03 14:28:31 +000013224/**
13225 * xmlCreateFileParserCtxt:
13226 * @filename: the filename
13227 *
13228 * Create a parser context for a file content.
13229 * Automatic support for ZLIB/Compress compressed document is provided
13230 * by default if found at compile-time.
13231 *
13232 * Returns the new parser context or NULL
13233 */
13234xmlParserCtxtPtr
13235xmlCreateFileParserCtxt(const char *filename)
13236{
13237 return(xmlCreateURLParserCtxt(filename, 0));
13238}
13239
Daniel Veillard81273902003-09-30 00:43:48 +000013240#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013241/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013242 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013243 * @sax: the SAX handler block
13244 * @filename: the filename
13245 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13246 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013247 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013248 *
13249 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13250 * compressed document is provided by default if found at compile-time.
13251 * It use the given SAX function block to handle the parsing callback.
13252 * If sax is NULL, fallback to the default DOM tree building routines.
13253 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013254 * User data (void *) is stored within the parser context in the
13255 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013256 *
Owen Taylor3473f882001-02-23 17:55:21 +000013257 * Returns the resulting document tree
13258 */
13259
13260xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013261xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13262 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013263 xmlDocPtr ret;
13264 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013265
Daniel Veillard635ef722001-10-29 11:48:19 +000013266 xmlInitParser();
13267
Owen Taylor3473f882001-02-23 17:55:21 +000013268 ctxt = xmlCreateFileParserCtxt(filename);
13269 if (ctxt == NULL) {
13270 return(NULL);
13271 }
13272 if (sax != NULL) {
13273 if (ctxt->sax != NULL)
13274 xmlFree(ctxt->sax);
13275 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013276 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013277 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013278 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013279 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013280 }
Owen Taylor3473f882001-02-23 17:55:21 +000013281
Daniel Veillard37d2d162008-03-14 10:54:00 +000013282 if (ctxt->directory == NULL)
13283 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013284
Daniel Veillarddad3f682002-11-17 16:47:27 +000013285 ctxt->recovery = recovery;
13286
Owen Taylor3473f882001-02-23 17:55:21 +000013287 xmlParseDocument(ctxt);
13288
William M. Brackc07329e2003-09-08 01:57:30 +000013289 if ((ctxt->wellFormed) || recovery) {
13290 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013291 if (ret != NULL) {
13292 if (ctxt->input->buf->compressed > 0)
13293 ret->compression = 9;
13294 else
13295 ret->compression = ctxt->input->buf->compressed;
13296 }
William M. Brackc07329e2003-09-08 01:57:30 +000013297 }
Owen Taylor3473f882001-02-23 17:55:21 +000013298 else {
13299 ret = NULL;
13300 xmlFreeDoc(ctxt->myDoc);
13301 ctxt->myDoc = NULL;
13302 }
13303 if (sax != NULL)
13304 ctxt->sax = NULL;
13305 xmlFreeParserCtxt(ctxt);
13306
13307 return(ret);
13308}
13309
13310/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013311 * xmlSAXParseFile:
13312 * @sax: the SAX handler block
13313 * @filename: the filename
13314 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13315 * documents
13316 *
13317 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13318 * compressed document is provided by default if found at compile-time.
13319 * It use the given SAX function block to handle the parsing callback.
13320 * If sax is NULL, fallback to the default DOM tree building routines.
13321 *
13322 * Returns the resulting document tree
13323 */
13324
13325xmlDocPtr
13326xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13327 int recovery) {
13328 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13329}
13330
13331/**
Owen Taylor3473f882001-02-23 17:55:21 +000013332 * xmlRecoverDoc:
13333 * @cur: a pointer to an array of xmlChar
13334 *
13335 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013336 * In the case the document is not Well Formed, a attempt to build a
13337 * tree is tried anyway
13338 *
13339 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013340 */
13341
13342xmlDocPtr
13343xmlRecoverDoc(xmlChar *cur) {
13344 return(xmlSAXParseDoc(NULL, cur, 1));
13345}
13346
13347/**
13348 * xmlParseFile:
13349 * @filename: the filename
13350 *
13351 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13352 * compressed document is provided by default if found at compile-time.
13353 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013354 * Returns the resulting document tree if the file was wellformed,
13355 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013356 */
13357
13358xmlDocPtr
13359xmlParseFile(const char *filename) {
13360 return(xmlSAXParseFile(NULL, filename, 0));
13361}
13362
13363/**
13364 * xmlRecoverFile:
13365 * @filename: the filename
13366 *
13367 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13368 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013369 * In the case the document is not Well Formed, it attempts to build
13370 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013371 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013372 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013373 */
13374
13375xmlDocPtr
13376xmlRecoverFile(const char *filename) {
13377 return(xmlSAXParseFile(NULL, filename, 1));
13378}
13379
13380
13381/**
13382 * xmlSetupParserForBuffer:
13383 * @ctxt: an XML parser context
13384 * @buffer: a xmlChar * buffer
13385 * @filename: a file name
13386 *
13387 * Setup the parser context to parse a new buffer; Clears any prior
13388 * contents from the parser context. The buffer parameter must not be
13389 * NULL, but the filename parameter can be
13390 */
13391void
13392xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13393 const char* filename)
13394{
13395 xmlParserInputPtr input;
13396
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013397 if ((ctxt == NULL) || (buffer == NULL))
13398 return;
13399
Owen Taylor3473f882001-02-23 17:55:21 +000013400 input = xmlNewInputStream(ctxt);
13401 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013402 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013403 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013404 return;
13405 }
13406
13407 xmlClearParserCtxt(ctxt);
13408 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013409 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013410 input->base = buffer;
13411 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013412 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013413 inputPush(ctxt, input);
13414}
13415
13416/**
13417 * xmlSAXUserParseFile:
13418 * @sax: a SAX handler
13419 * @user_data: The user data returned on SAX callbacks
13420 * @filename: a file name
13421 *
13422 * parse an XML file and call the given SAX handler routines.
13423 * Automatic support for ZLIB/Compress compressed document is provided
13424 *
13425 * Returns 0 in case of success or a error number otherwise
13426 */
13427int
13428xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13429 const char *filename) {
13430 int ret = 0;
13431 xmlParserCtxtPtr ctxt;
13432
13433 ctxt = xmlCreateFileParserCtxt(filename);
13434 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013435 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013436 xmlFree(ctxt->sax);
13437 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013438 xmlDetectSAX2(ctxt);
13439
Owen Taylor3473f882001-02-23 17:55:21 +000013440 if (user_data != NULL)
13441 ctxt->userData = user_data;
13442
13443 xmlParseDocument(ctxt);
13444
13445 if (ctxt->wellFormed)
13446 ret = 0;
13447 else {
13448 if (ctxt->errNo != 0)
13449 ret = ctxt->errNo;
13450 else
13451 ret = -1;
13452 }
13453 if (sax != NULL)
13454 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013455 if (ctxt->myDoc != NULL) {
13456 xmlFreeDoc(ctxt->myDoc);
13457 ctxt->myDoc = NULL;
13458 }
Owen Taylor3473f882001-02-23 17:55:21 +000013459 xmlFreeParserCtxt(ctxt);
13460
13461 return ret;
13462}
Daniel Veillard81273902003-09-30 00:43:48 +000013463#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013464
13465/************************************************************************
13466 * *
13467 * Front ends when parsing from memory *
13468 * *
13469 ************************************************************************/
13470
13471/**
13472 * xmlCreateMemoryParserCtxt:
13473 * @buffer: a pointer to a char array
13474 * @size: the size of the array
13475 *
13476 * Create a parser context for an XML in-memory document.
13477 *
13478 * Returns the new parser context or NULL
13479 */
13480xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013481xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013482 xmlParserCtxtPtr ctxt;
13483 xmlParserInputPtr input;
13484 xmlParserInputBufferPtr buf;
13485
13486 if (buffer == NULL)
13487 return(NULL);
13488 if (size <= 0)
13489 return(NULL);
13490
13491 ctxt = xmlNewParserCtxt();
13492 if (ctxt == NULL)
13493 return(NULL);
13494
Daniel Veillard53350552003-09-18 13:35:51 +000013495 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013496 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013497 if (buf == NULL) {
13498 xmlFreeParserCtxt(ctxt);
13499 return(NULL);
13500 }
Owen Taylor3473f882001-02-23 17:55:21 +000013501
13502 input = xmlNewInputStream(ctxt);
13503 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013504 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013505 xmlFreeParserCtxt(ctxt);
13506 return(NULL);
13507 }
13508
13509 input->filename = NULL;
13510 input->buf = buf;
13511 input->base = input->buf->buffer->content;
13512 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013513 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013514
13515 inputPush(ctxt, input);
13516 return(ctxt);
13517}
13518
Daniel Veillard81273902003-09-30 00:43:48 +000013519#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013520/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013521 * xmlSAXParseMemoryWithData:
13522 * @sax: the SAX handler block
13523 * @buffer: an pointer to a char array
13524 * @size: the size of the array
13525 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13526 * documents
13527 * @data: the userdata
13528 *
13529 * parse an XML in-memory block and use the given SAX function block
13530 * to handle the parsing callback. If sax is NULL, fallback to the default
13531 * DOM tree building routines.
13532 *
13533 * User data (void *) is stored within the parser context in the
13534 * context's _private member, so it is available nearly everywhere in libxml
13535 *
13536 * Returns the resulting document tree
13537 */
13538
13539xmlDocPtr
13540xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13541 int size, int recovery, void *data) {
13542 xmlDocPtr ret;
13543 xmlParserCtxtPtr ctxt;
13544
13545 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13546 if (ctxt == NULL) return(NULL);
13547 if (sax != NULL) {
13548 if (ctxt->sax != NULL)
13549 xmlFree(ctxt->sax);
13550 ctxt->sax = sax;
13551 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013552 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013553 if (data!=NULL) {
13554 ctxt->_private=data;
13555 }
13556
Daniel Veillardadba5f12003-04-04 16:09:01 +000013557 ctxt->recovery = recovery;
13558
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013559 xmlParseDocument(ctxt);
13560
13561 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13562 else {
13563 ret = NULL;
13564 xmlFreeDoc(ctxt->myDoc);
13565 ctxt->myDoc = NULL;
13566 }
13567 if (sax != NULL)
13568 ctxt->sax = NULL;
13569 xmlFreeParserCtxt(ctxt);
13570
13571 return(ret);
13572}
13573
13574/**
Owen Taylor3473f882001-02-23 17:55:21 +000013575 * xmlSAXParseMemory:
13576 * @sax: the SAX handler block
13577 * @buffer: an pointer to a char array
13578 * @size: the size of the array
13579 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13580 * documents
13581 *
13582 * parse an XML in-memory block and use the given SAX function block
13583 * to handle the parsing callback. If sax is NULL, fallback to the default
13584 * DOM tree building routines.
13585 *
13586 * Returns the resulting document tree
13587 */
13588xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013589xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13590 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013591 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013592}
13593
13594/**
13595 * xmlParseMemory:
13596 * @buffer: an pointer to a char array
13597 * @size: the size of the array
13598 *
13599 * parse an XML in-memory block and build a tree.
13600 *
13601 * Returns the resulting document tree
13602 */
13603
Daniel Veillard50822cb2001-07-26 20:05:51 +000013604xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013605 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13606}
13607
13608/**
13609 * xmlRecoverMemory:
13610 * @buffer: an pointer to a char array
13611 * @size: the size of the array
13612 *
13613 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013614 * In the case the document is not Well Formed, an attempt to
13615 * build a tree is tried anyway
13616 *
13617 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013618 */
13619
Daniel Veillard50822cb2001-07-26 20:05:51 +000013620xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013621 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13622}
13623
13624/**
13625 * xmlSAXUserParseMemory:
13626 * @sax: a SAX handler
13627 * @user_data: The user data returned on SAX callbacks
13628 * @buffer: an in-memory XML document input
13629 * @size: the length of the XML document in bytes
13630 *
13631 * A better SAX parsing routine.
13632 * parse an XML in-memory buffer and call the given SAX handler routines.
13633 *
13634 * Returns 0 in case of success or a error number otherwise
13635 */
13636int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013637 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013638 int ret = 0;
13639 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013640
13641 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13642 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013643 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13644 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013645 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013646 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013647
Daniel Veillard30211a02001-04-26 09:33:18 +000013648 if (user_data != NULL)
13649 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013650
13651 xmlParseDocument(ctxt);
13652
13653 if (ctxt->wellFormed)
13654 ret = 0;
13655 else {
13656 if (ctxt->errNo != 0)
13657 ret = ctxt->errNo;
13658 else
13659 ret = -1;
13660 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013661 if (sax != NULL)
13662 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013663 if (ctxt->myDoc != NULL) {
13664 xmlFreeDoc(ctxt->myDoc);
13665 ctxt->myDoc = NULL;
13666 }
Owen Taylor3473f882001-02-23 17:55:21 +000013667 xmlFreeParserCtxt(ctxt);
13668
13669 return ret;
13670}
Daniel Veillard81273902003-09-30 00:43:48 +000013671#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013672
13673/**
13674 * xmlCreateDocParserCtxt:
13675 * @cur: a pointer to an array of xmlChar
13676 *
13677 * Creates a parser context for an XML in-memory document.
13678 *
13679 * Returns the new parser context or NULL
13680 */
13681xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013682xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013683 int len;
13684
13685 if (cur == NULL)
13686 return(NULL);
13687 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013688 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013689}
13690
Daniel Veillard81273902003-09-30 00:43:48 +000013691#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013692/**
13693 * xmlSAXParseDoc:
13694 * @sax: the SAX handler block
13695 * @cur: a pointer to an array of xmlChar
13696 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13697 * documents
13698 *
13699 * parse an XML in-memory document and build a tree.
13700 * It use the given SAX function block to handle the parsing callback.
13701 * If sax is NULL, fallback to the default DOM tree building routines.
13702 *
13703 * Returns the resulting document tree
13704 */
13705
13706xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013707xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013708 xmlDocPtr ret;
13709 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013710 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013711
Daniel Veillard38936062004-11-04 17:45:11 +000013712 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013713
13714
13715 ctxt = xmlCreateDocParserCtxt(cur);
13716 if (ctxt == NULL) return(NULL);
13717 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013718 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013719 ctxt->sax = sax;
13720 ctxt->userData = NULL;
13721 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013722 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013723
13724 xmlParseDocument(ctxt);
13725 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13726 else {
13727 ret = NULL;
13728 xmlFreeDoc(ctxt->myDoc);
13729 ctxt->myDoc = NULL;
13730 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013731 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013732 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013733 xmlFreeParserCtxt(ctxt);
13734
13735 return(ret);
13736}
13737
13738/**
13739 * xmlParseDoc:
13740 * @cur: a pointer to an array of xmlChar
13741 *
13742 * parse an XML in-memory document and build a tree.
13743 *
13744 * Returns the resulting document tree
13745 */
13746
13747xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013748xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013749 return(xmlSAXParseDoc(NULL, cur, 0));
13750}
Daniel Veillard81273902003-09-30 00:43:48 +000013751#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013752
Daniel Veillard81273902003-09-30 00:43:48 +000013753#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013754/************************************************************************
13755 * *
13756 * Specific function to keep track of entities references *
13757 * and used by the XSLT debugger *
13758 * *
13759 ************************************************************************/
13760
13761static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13762
13763/**
13764 * xmlAddEntityReference:
13765 * @ent : A valid entity
13766 * @firstNode : A valid first node for children of entity
13767 * @lastNode : A valid last node of children entity
13768 *
13769 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13770 */
13771static void
13772xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13773 xmlNodePtr lastNode)
13774{
13775 if (xmlEntityRefFunc != NULL) {
13776 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13777 }
13778}
13779
13780
13781/**
13782 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013783 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013784 *
13785 * Set the function to call call back when a xml reference has been made
13786 */
13787void
13788xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13789{
13790 xmlEntityRefFunc = func;
13791}
Daniel Veillard81273902003-09-30 00:43:48 +000013792#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013793
13794/************************************************************************
13795 * *
13796 * Miscellaneous *
13797 * *
13798 ************************************************************************/
13799
13800#ifdef LIBXML_XPATH_ENABLED
13801#include <libxml/xpath.h>
13802#endif
13803
Daniel Veillardffa3c742005-07-21 13:24:09 +000013804extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013805static int xmlParserInitialized = 0;
13806
13807/**
13808 * xmlInitParser:
13809 *
13810 * Initialization function for the XML parser.
13811 * This is not reentrant. Call once before processing in case of
13812 * use in multithreaded programs.
13813 */
13814
13815void
13816xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013817 if (xmlParserInitialized != 0)
13818 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013819
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013820#ifdef LIBXML_THREAD_ENABLED
13821 __xmlGlobalInitMutexLock();
13822 if (xmlParserInitialized == 0) {
13823#endif
13824 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13825 (xmlGenericError == NULL))
13826 initGenericErrorDefaultFunc(NULL);
13827 xmlInitGlobals();
13828 xmlInitThreads();
13829 xmlInitMemory();
13830 xmlInitCharEncodingHandlers();
13831 xmlDefaultSAXHandlerInit();
13832 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013833#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013834 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013835#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013836#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013837 htmlInitAutoClose();
13838 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013839#endif
13840#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013841 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013842#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013843 xmlParserInitialized = 1;
13844#ifdef LIBXML_THREAD_ENABLED
13845 }
13846 __xmlGlobalInitMutexUnlock();
13847#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013848}
13849
13850/**
13851 * xmlCleanupParser:
13852 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013853 * This function name is somewhat misleading. It does not clean up
13854 * parser state, it cleans up memory allocated by the library itself.
13855 * It is a cleanup function for the XML library. It tries to reclaim all
13856 * related global memory allocated for the library processing.
13857 * It doesn't deallocate any document related memory. One should
13858 * call xmlCleanupParser() only when the process has finished using
13859 * the library and all XML/HTML documents built with it.
13860 * See also xmlInitParser() which has the opposite function of preparing
13861 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000013862 *
13863 * WARNING: if your application is multithreaded or has plugin support
13864 * calling this may crash the application if another thread or
13865 * a plugin is still using libxml2. It's sometimes very hard to
13866 * guess if libxml2 is in use in the application, some libraries
13867 * or plugins may use it without notice. In case of doubt abstain
13868 * from calling this function or do it just before calling exit()
13869 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000013870 */
13871
13872void
13873xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013874 if (!xmlParserInitialized)
13875 return;
13876
Owen Taylor3473f882001-02-23 17:55:21 +000013877 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013878#ifdef LIBXML_CATALOG_ENABLED
13879 xmlCatalogCleanup();
13880#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013881 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013882 xmlCleanupInputCallbacks();
13883#ifdef LIBXML_OUTPUT_ENABLED
13884 xmlCleanupOutputCallbacks();
13885#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013886#ifdef LIBXML_SCHEMAS_ENABLED
13887 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013888 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013889#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013890 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013891 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013892 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013893 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013894 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013895}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013896
13897/************************************************************************
13898 * *
13899 * New set (2.6.0) of simpler and more flexible APIs *
13900 * *
13901 ************************************************************************/
13902
13903/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013904 * DICT_FREE:
13905 * @str: a string
13906 *
13907 * Free a string if it is not owned by the "dict" dictionnary in the
13908 * current scope
13909 */
13910#define DICT_FREE(str) \
13911 if ((str) && ((!dict) || \
13912 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13913 xmlFree((char *)(str));
13914
13915/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013916 * xmlCtxtReset:
13917 * @ctxt: an XML parser context
13918 *
13919 * Reset a parser context
13920 */
13921void
13922xmlCtxtReset(xmlParserCtxtPtr ctxt)
13923{
13924 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013925 xmlDictPtr dict;
13926
13927 if (ctxt == NULL)
13928 return;
13929
13930 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013931
13932 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13933 xmlFreeInputStream(input);
13934 }
13935 ctxt->inputNr = 0;
13936 ctxt->input = NULL;
13937
13938 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013939 if (ctxt->spaceTab != NULL) {
13940 ctxt->spaceTab[0] = -1;
13941 ctxt->space = &ctxt->spaceTab[0];
13942 } else {
13943 ctxt->space = NULL;
13944 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013945
13946
13947 ctxt->nodeNr = 0;
13948 ctxt->node = NULL;
13949
13950 ctxt->nameNr = 0;
13951 ctxt->name = NULL;
13952
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013953 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013954 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013955 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013956 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013957 DICT_FREE(ctxt->directory);
13958 ctxt->directory = NULL;
13959 DICT_FREE(ctxt->extSubURI);
13960 ctxt->extSubURI = NULL;
13961 DICT_FREE(ctxt->extSubSystem);
13962 ctxt->extSubSystem = NULL;
13963 if (ctxt->myDoc != NULL)
13964 xmlFreeDoc(ctxt->myDoc);
13965 ctxt->myDoc = NULL;
13966
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013967 ctxt->standalone = -1;
13968 ctxt->hasExternalSubset = 0;
13969 ctxt->hasPErefs = 0;
13970 ctxt->html = 0;
13971 ctxt->external = 0;
13972 ctxt->instate = XML_PARSER_START;
13973 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013974
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013975 ctxt->wellFormed = 1;
13976 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013977 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013978 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013979#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013980 ctxt->vctxt.userData = ctxt;
13981 ctxt->vctxt.error = xmlParserValidityError;
13982 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013983#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013984 ctxt->record_info = 0;
13985 ctxt->nbChars = 0;
13986 ctxt->checkIndex = 0;
13987 ctxt->inSubset = 0;
13988 ctxt->errNo = XML_ERR_OK;
13989 ctxt->depth = 0;
13990 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13991 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000013992 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000013993 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013994 xmlInitNodeInfoSeq(&ctxt->node_seq);
13995
13996 if (ctxt->attsDefault != NULL) {
13997 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13998 ctxt->attsDefault = NULL;
13999 }
14000 if (ctxt->attsSpecial != NULL) {
14001 xmlHashFree(ctxt->attsSpecial, NULL);
14002 ctxt->attsSpecial = NULL;
14003 }
14004
Daniel Veillard4432df22003-09-28 18:58:27 +000014005#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014006 if (ctxt->catalogs != NULL)
14007 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014008#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014009 if (ctxt->lastError.code != XML_ERR_OK)
14010 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014011}
14012
14013/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014014 * xmlCtxtResetPush:
14015 * @ctxt: an XML parser context
14016 * @chunk: a pointer to an array of chars
14017 * @size: number of chars in the array
14018 * @filename: an optional file name or URI
14019 * @encoding: the document encoding, or NULL
14020 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014021 * Reset a push parser context
14022 *
14023 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014024 */
14025int
14026xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14027 int size, const char *filename, const char *encoding)
14028{
14029 xmlParserInputPtr inputStream;
14030 xmlParserInputBufferPtr buf;
14031 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14032
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014033 if (ctxt == NULL)
14034 return(1);
14035
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014036 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14037 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14038
14039 buf = xmlAllocParserInputBuffer(enc);
14040 if (buf == NULL)
14041 return(1);
14042
14043 if (ctxt == NULL) {
14044 xmlFreeParserInputBuffer(buf);
14045 return(1);
14046 }
14047
14048 xmlCtxtReset(ctxt);
14049
14050 if (ctxt->pushTab == NULL) {
14051 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14052 sizeof(xmlChar *));
14053 if (ctxt->pushTab == NULL) {
14054 xmlErrMemory(ctxt, NULL);
14055 xmlFreeParserInputBuffer(buf);
14056 return(1);
14057 }
14058 }
14059
14060 if (filename == NULL) {
14061 ctxt->directory = NULL;
14062 } else {
14063 ctxt->directory = xmlParserGetDirectory(filename);
14064 }
14065
14066 inputStream = xmlNewInputStream(ctxt);
14067 if (inputStream == NULL) {
14068 xmlFreeParserInputBuffer(buf);
14069 return(1);
14070 }
14071
14072 if (filename == NULL)
14073 inputStream->filename = NULL;
14074 else
14075 inputStream->filename = (char *)
14076 xmlCanonicPath((const xmlChar *) filename);
14077 inputStream->buf = buf;
14078 inputStream->base = inputStream->buf->buffer->content;
14079 inputStream->cur = inputStream->buf->buffer->content;
14080 inputStream->end =
14081 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14082
14083 inputPush(ctxt, inputStream);
14084
14085 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14086 (ctxt->input->buf != NULL)) {
14087 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14088 int cur = ctxt->input->cur - ctxt->input->base;
14089
14090 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14091
14092 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14093 ctxt->input->cur = ctxt->input->base + cur;
14094 ctxt->input->end =
14095 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14096 use];
14097#ifdef DEBUG_PUSH
14098 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14099#endif
14100 }
14101
14102 if (encoding != NULL) {
14103 xmlCharEncodingHandlerPtr hdlr;
14104
Daniel Veillard37334572008-07-31 08:20:02 +000014105 if (ctxt->encoding != NULL)
14106 xmlFree((xmlChar *) ctxt->encoding);
14107 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14108
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014109 hdlr = xmlFindCharEncodingHandler(encoding);
14110 if (hdlr != NULL) {
14111 xmlSwitchToEncoding(ctxt, hdlr);
14112 } else {
14113 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14114 "Unsupported encoding %s\n", BAD_CAST encoding);
14115 }
14116 } else if (enc != XML_CHAR_ENCODING_NONE) {
14117 xmlSwitchEncoding(ctxt, enc);
14118 }
14119
14120 return(0);
14121}
14122
Daniel Veillard37334572008-07-31 08:20:02 +000014123
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014124/**
Daniel Veillard37334572008-07-31 08:20:02 +000014125 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014126 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014127 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014128 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014129 *
14130 * Applies the options to the parser context
14131 *
14132 * Returns 0 in case of success, the set of unknown or unimplemented options
14133 * in case of error.
14134 */
Daniel Veillard37334572008-07-31 08:20:02 +000014135static int
14136xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014137{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014138 if (ctxt == NULL)
14139 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014140 if (encoding != NULL) {
14141 if (ctxt->encoding != NULL)
14142 xmlFree((xmlChar *) ctxt->encoding);
14143 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14144 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014145 if (options & XML_PARSE_RECOVER) {
14146 ctxt->recovery = 1;
14147 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014148 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014149 } else
14150 ctxt->recovery = 0;
14151 if (options & XML_PARSE_DTDLOAD) {
14152 ctxt->loadsubset = XML_DETECT_IDS;
14153 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014154 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014155 } else
14156 ctxt->loadsubset = 0;
14157 if (options & XML_PARSE_DTDATTR) {
14158 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14159 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014160 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014161 }
14162 if (options & XML_PARSE_NOENT) {
14163 ctxt->replaceEntities = 1;
14164 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14165 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014166 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014167 } else
14168 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014169 if (options & XML_PARSE_PEDANTIC) {
14170 ctxt->pedantic = 1;
14171 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014172 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014173 } else
14174 ctxt->pedantic = 0;
14175 if (options & XML_PARSE_NOBLANKS) {
14176 ctxt->keepBlanks = 0;
14177 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14178 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014179 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014180 } else
14181 ctxt->keepBlanks = 1;
14182 if (options & XML_PARSE_DTDVALID) {
14183 ctxt->validate = 1;
14184 if (options & XML_PARSE_NOWARNING)
14185 ctxt->vctxt.warning = NULL;
14186 if (options & XML_PARSE_NOERROR)
14187 ctxt->vctxt.error = NULL;
14188 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014189 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014190 } else
14191 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014192 if (options & XML_PARSE_NOWARNING) {
14193 ctxt->sax->warning = NULL;
14194 options -= XML_PARSE_NOWARNING;
14195 }
14196 if (options & XML_PARSE_NOERROR) {
14197 ctxt->sax->error = NULL;
14198 ctxt->sax->fatalError = NULL;
14199 options -= XML_PARSE_NOERROR;
14200 }
Daniel Veillard81273902003-09-30 00:43:48 +000014201#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014202 if (options & XML_PARSE_SAX1) {
14203 ctxt->sax->startElement = xmlSAX2StartElement;
14204 ctxt->sax->endElement = xmlSAX2EndElement;
14205 ctxt->sax->startElementNs = NULL;
14206 ctxt->sax->endElementNs = NULL;
14207 ctxt->sax->initialized = 1;
14208 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014209 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014210 }
Daniel Veillard81273902003-09-30 00:43:48 +000014211#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014212 if (options & XML_PARSE_NODICT) {
14213 ctxt->dictNames = 0;
14214 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014215 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014216 } else {
14217 ctxt->dictNames = 1;
14218 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014219 if (options & XML_PARSE_NOCDATA) {
14220 ctxt->sax->cdataBlock = NULL;
14221 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014222 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014223 }
14224 if (options & XML_PARSE_NSCLEAN) {
14225 ctxt->options |= XML_PARSE_NSCLEAN;
14226 options -= XML_PARSE_NSCLEAN;
14227 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014228 if (options & XML_PARSE_NONET) {
14229 ctxt->options |= XML_PARSE_NONET;
14230 options -= XML_PARSE_NONET;
14231 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014232 if (options & XML_PARSE_COMPACT) {
14233 ctxt->options |= XML_PARSE_COMPACT;
14234 options -= XML_PARSE_COMPACT;
14235 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014236 if (options & XML_PARSE_OLD10) {
14237 ctxt->options |= XML_PARSE_OLD10;
14238 options -= XML_PARSE_OLD10;
14239 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014240 if (options & XML_PARSE_NOBASEFIX) {
14241 ctxt->options |= XML_PARSE_NOBASEFIX;
14242 options -= XML_PARSE_NOBASEFIX;
14243 }
14244 if (options & XML_PARSE_HUGE) {
14245 ctxt->options |= XML_PARSE_HUGE;
14246 options -= XML_PARSE_HUGE;
14247 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014248 if (options & XML_PARSE_OLDSAX) {
14249 ctxt->options |= XML_PARSE_OLDSAX;
14250 options -= XML_PARSE_OLDSAX;
14251 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014252 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014253 return (options);
14254}
14255
14256/**
Daniel Veillard37334572008-07-31 08:20:02 +000014257 * xmlCtxtUseOptions:
14258 * @ctxt: an XML parser context
14259 * @options: a combination of xmlParserOption
14260 *
14261 * Applies the options to the parser context
14262 *
14263 * Returns 0 in case of success, the set of unknown or unimplemented options
14264 * in case of error.
14265 */
14266int
14267xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14268{
14269 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14270}
14271
14272/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014273 * xmlDoRead:
14274 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014275 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014276 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014277 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014278 * @reuse: keep the context for reuse
14279 *
14280 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014281 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014282 * Returns the resulting document tree or NULL
14283 */
14284static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014285xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14286 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014287{
14288 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014289
14290 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014291 if (encoding != NULL) {
14292 xmlCharEncodingHandlerPtr hdlr;
14293
14294 hdlr = xmlFindCharEncodingHandler(encoding);
14295 if (hdlr != NULL)
14296 xmlSwitchToEncoding(ctxt, hdlr);
14297 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014298 if ((URL != NULL) && (ctxt->input != NULL) &&
14299 (ctxt->input->filename == NULL))
14300 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014301 xmlParseDocument(ctxt);
14302 if ((ctxt->wellFormed) || ctxt->recovery)
14303 ret = ctxt->myDoc;
14304 else {
14305 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014306 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014307 xmlFreeDoc(ctxt->myDoc);
14308 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014309 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014310 ctxt->myDoc = NULL;
14311 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014312 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014313 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014314
14315 return (ret);
14316}
14317
14318/**
14319 * xmlReadDoc:
14320 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014321 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014322 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014323 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014324 *
14325 * parse an XML in-memory document and build a tree.
14326 *
14327 * Returns the resulting document tree
14328 */
14329xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014330xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014331{
14332 xmlParserCtxtPtr ctxt;
14333
14334 if (cur == NULL)
14335 return (NULL);
14336
14337 ctxt = xmlCreateDocParserCtxt(cur);
14338 if (ctxt == NULL)
14339 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014340 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014341}
14342
14343/**
14344 * xmlReadFile:
14345 * @filename: a file or URL
14346 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014347 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014348 *
14349 * parse an XML file from the filesystem or the network.
14350 *
14351 * Returns the resulting document tree
14352 */
14353xmlDocPtr
14354xmlReadFile(const char *filename, const char *encoding, int options)
14355{
14356 xmlParserCtxtPtr ctxt;
14357
Daniel Veillard61b93382003-11-03 14:28:31 +000014358 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014359 if (ctxt == NULL)
14360 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014361 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014362}
14363
14364/**
14365 * xmlReadMemory:
14366 * @buffer: a pointer to a char array
14367 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014368 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014369 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014370 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014371 *
14372 * parse an XML in-memory document and build a tree.
14373 *
14374 * Returns the resulting document tree
14375 */
14376xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014377xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014378{
14379 xmlParserCtxtPtr ctxt;
14380
14381 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14382 if (ctxt == NULL)
14383 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014384 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014385}
14386
14387/**
14388 * xmlReadFd:
14389 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014390 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014391 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014392 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014393 *
14394 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014395 * NOTE that the file descriptor will not be closed when the
14396 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014397 *
14398 * Returns the resulting document tree
14399 */
14400xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014401xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014402{
14403 xmlParserCtxtPtr ctxt;
14404 xmlParserInputBufferPtr input;
14405 xmlParserInputPtr stream;
14406
14407 if (fd < 0)
14408 return (NULL);
14409
14410 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14411 if (input == NULL)
14412 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014413 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014414 ctxt = xmlNewParserCtxt();
14415 if (ctxt == NULL) {
14416 xmlFreeParserInputBuffer(input);
14417 return (NULL);
14418 }
14419 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14420 if (stream == NULL) {
14421 xmlFreeParserInputBuffer(input);
14422 xmlFreeParserCtxt(ctxt);
14423 return (NULL);
14424 }
14425 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014426 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014427}
14428
14429/**
14430 * xmlReadIO:
14431 * @ioread: an I/O read function
14432 * @ioclose: an I/O close function
14433 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014434 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014435 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014436 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014437 *
14438 * parse an XML document from I/O functions and source and build a tree.
14439 *
14440 * Returns the resulting document tree
14441 */
14442xmlDocPtr
14443xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014444 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014445{
14446 xmlParserCtxtPtr ctxt;
14447 xmlParserInputBufferPtr input;
14448 xmlParserInputPtr stream;
14449
14450 if (ioread == NULL)
14451 return (NULL);
14452
14453 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14454 XML_CHAR_ENCODING_NONE);
14455 if (input == NULL)
14456 return (NULL);
14457 ctxt = xmlNewParserCtxt();
14458 if (ctxt == NULL) {
14459 xmlFreeParserInputBuffer(input);
14460 return (NULL);
14461 }
14462 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14463 if (stream == NULL) {
14464 xmlFreeParserInputBuffer(input);
14465 xmlFreeParserCtxt(ctxt);
14466 return (NULL);
14467 }
14468 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014469 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014470}
14471
14472/**
14473 * xmlCtxtReadDoc:
14474 * @ctxt: an XML parser context
14475 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014476 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014477 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014478 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014479 *
14480 * parse an XML in-memory document and build a tree.
14481 * This reuses the existing @ctxt parser context
14482 *
14483 * Returns the resulting document tree
14484 */
14485xmlDocPtr
14486xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014487 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014488{
14489 xmlParserInputPtr stream;
14490
14491 if (cur == NULL)
14492 return (NULL);
14493 if (ctxt == NULL)
14494 return (NULL);
14495
14496 xmlCtxtReset(ctxt);
14497
14498 stream = xmlNewStringInputStream(ctxt, cur);
14499 if (stream == NULL) {
14500 return (NULL);
14501 }
14502 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014503 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014504}
14505
14506/**
14507 * xmlCtxtReadFile:
14508 * @ctxt: an XML parser context
14509 * @filename: a file or URL
14510 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014511 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014512 *
14513 * parse an XML file from the filesystem or the network.
14514 * This reuses the existing @ctxt parser context
14515 *
14516 * Returns the resulting document tree
14517 */
14518xmlDocPtr
14519xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14520 const char *encoding, int options)
14521{
14522 xmlParserInputPtr stream;
14523
14524 if (filename == NULL)
14525 return (NULL);
14526 if (ctxt == NULL)
14527 return (NULL);
14528
14529 xmlCtxtReset(ctxt);
14530
Daniel Veillard29614c72004-11-26 10:47:26 +000014531 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014532 if (stream == NULL) {
14533 return (NULL);
14534 }
14535 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014536 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014537}
14538
14539/**
14540 * xmlCtxtReadMemory:
14541 * @ctxt: an XML parser context
14542 * @buffer: a pointer to a char array
14543 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014544 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014545 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014546 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014547 *
14548 * parse an XML in-memory document and build a tree.
14549 * This reuses the existing @ctxt parser context
14550 *
14551 * Returns the resulting document tree
14552 */
14553xmlDocPtr
14554xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014555 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014556{
14557 xmlParserInputBufferPtr input;
14558 xmlParserInputPtr stream;
14559
14560 if (ctxt == NULL)
14561 return (NULL);
14562 if (buffer == NULL)
14563 return (NULL);
14564
14565 xmlCtxtReset(ctxt);
14566
14567 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14568 if (input == NULL) {
14569 return(NULL);
14570 }
14571
14572 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14573 if (stream == NULL) {
14574 xmlFreeParserInputBuffer(input);
14575 return(NULL);
14576 }
14577
14578 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014579 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014580}
14581
14582/**
14583 * xmlCtxtReadFd:
14584 * @ctxt: an XML parser context
14585 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014586 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014587 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014588 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014589 *
14590 * parse an XML from a file descriptor and build a tree.
14591 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014592 * NOTE that the file descriptor will not be closed when the
14593 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014594 *
14595 * Returns the resulting document tree
14596 */
14597xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014598xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14599 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014600{
14601 xmlParserInputBufferPtr input;
14602 xmlParserInputPtr stream;
14603
14604 if (fd < 0)
14605 return (NULL);
14606 if (ctxt == NULL)
14607 return (NULL);
14608
14609 xmlCtxtReset(ctxt);
14610
14611
14612 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14613 if (input == NULL)
14614 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014615 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014616 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14617 if (stream == NULL) {
14618 xmlFreeParserInputBuffer(input);
14619 return (NULL);
14620 }
14621 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014622 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014623}
14624
14625/**
14626 * xmlCtxtReadIO:
14627 * @ctxt: an XML parser context
14628 * @ioread: an I/O read function
14629 * @ioclose: an I/O close function
14630 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014631 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014632 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014633 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014634 *
14635 * parse an XML document from I/O functions and source and build a tree.
14636 * This reuses the existing @ctxt parser context
14637 *
14638 * Returns the resulting document tree
14639 */
14640xmlDocPtr
14641xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14642 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014643 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014644 const char *encoding, int options)
14645{
14646 xmlParserInputBufferPtr input;
14647 xmlParserInputPtr stream;
14648
14649 if (ioread == NULL)
14650 return (NULL);
14651 if (ctxt == NULL)
14652 return (NULL);
14653
14654 xmlCtxtReset(ctxt);
14655
14656 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14657 XML_CHAR_ENCODING_NONE);
14658 if (input == NULL)
14659 return (NULL);
14660 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14661 if (stream == NULL) {
14662 xmlFreeParserInputBuffer(input);
14663 return (NULL);
14664 }
14665 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014666 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014667}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014668
14669#define bottom_parser
14670#include "elfgcchack.h"