blob: 65af5252e0d713cf2eeb87bfe2b446854bfce6c3 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
86/************************************************************************
87 * *
88 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
89 * *
90 ************************************************************************/
91
92#define XML_PARSER_BIG_ENTITY 1000
93#define XML_PARSER_LOT_ENTITY 5000
94
95/*
96 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
97 * replacement over the size in byte of the input indicates that you have
98 * and eponential behaviour. A value of 10 correspond to at least 3 entity
99 * replacement per byte of input.
100 */
101#define XML_PARSER_NON_LINEAR 10
102
103/*
104 * xmlParserEntityCheck
105 *
106 * Function to check non-linear entity expansion behaviour
107 * This is here to detect and stop exponential linear entity expansion
108 * This is not a limitation of the parser but a safety
109 * boundary feature. It can be disabled with the XML_PARSE_HUGE
110 * parser option.
111 */
112static int
113xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
114 xmlEntityPtr ent)
115{
Daniel Veillardcba68392008-08-29 12:43:40 +0000116 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000117
118 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
119 return (0);
120 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
121 return (1);
122 if (size != 0) {
123 /*
124 * Do the check based on the replacement size of the entity
125 */
126 if (size < XML_PARSER_BIG_ENTITY)
127 return(0);
128
129 /*
130 * A limit on the amount of text data reasonably used
131 */
132 if (ctxt->input != NULL) {
133 consumed = ctxt->input->consumed +
134 (ctxt->input->cur - ctxt->input->base);
135 }
136 consumed += ctxt->sizeentities;
137
138 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
139 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
140 return (0);
141 } else if (ent != NULL) {
142 /*
143 * use the number of parsed entities in the replacement
144 */
145 size = ent->checked;
146
147 /*
148 * The amount of data parsed counting entities size only once
149 */
150 if (ctxt->input != NULL) {
151 consumed = ctxt->input->consumed +
152 (ctxt->input->cur - ctxt->input->base);
153 }
154 consumed += ctxt->sizeentities;
155
156 /*
157 * Check the density of entities for the amount of data
158 * knowing an entity reference will take at least 3 bytes
159 */
160 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
161 return (0);
162 } else {
163 /*
164 * strange we got no data for checking just return
165 */
166 return (0);
167 }
168
169 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
170 return (1);
171}
172
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000173/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000174 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000175 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000176 * arbitrary depth limit for the XML documents that we allow to
177 * process. This is not a limitation of the parser but a safety
178 * boundary feature. It can be disabled with the XML_PARSE_HUGE
179 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000180 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000181unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000182
Daniel Veillard0fb18932003-09-07 09:14:37 +0000183
Daniel Veillard0161e632008-08-28 15:36:32 +0000184
185#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000186#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000187#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000188#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
189
Owen Taylor3473f882001-02-23 17:55:21 +0000190/*
Owen Taylor3473f882001-02-23 17:55:21 +0000191 * List of XML prefixed PI allowed by W3C specs
192 */
193
Daniel Veillardb44025c2001-10-11 22:55:55 +0000194static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000195 "xml-stylesheet",
196 NULL
197};
198
Daniel Veillarda07050d2003-10-19 14:46:32 +0000199
Owen Taylor3473f882001-02-23 17:55:21 +0000200/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000201xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
202 const xmlChar **str);
203
Daniel Veillard7d515752003-09-26 19:12:37 +0000204static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000205xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
206 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000207 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000208 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard37334572008-07-31 08:20:02 +0000210static int
211xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
212 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000213#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000214static void
215xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
216 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000218
Daniel Veillard7d515752003-09-26 19:12:37 +0000219static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000220xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
221 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000222
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000223static int
224xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
225
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226/************************************************************************
227 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000228 * Some factorized error routines *
229 * *
230 ************************************************************************/
231
232/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000233 * xmlErrAttributeDup:
234 * @ctxt: an XML parser context
235 * @prefix: the attribute prefix
236 * @localname: the attribute localname
237 *
238 * Handle a redefinition of attribute error
239 */
240static void
241xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
242 const xmlChar * localname)
243{
Daniel Veillard157fee02003-10-31 10:36:03 +0000244 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
245 (ctxt->instate == XML_PARSER_EOF))
246 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000247 if (ctxt != NULL)
248 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000249 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000250 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
252 (const char *) localname, NULL, NULL, 0, 0,
253 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000254 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000256 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
257 (const char *) prefix, (const char *) localname,
258 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
259 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000260 if (ctxt != NULL) {
261 ctxt->wellFormed = 0;
262 if (ctxt->recovery == 0)
263 ctxt->disableSAX = 1;
264 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265}
266
267/**
268 * xmlFatalErr:
269 * @ctxt: an XML parser context
270 * @error: the error number
271 * @extra: extra information string
272 *
273 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
274 */
275static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000276xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277{
278 const char *errmsg;
279
Daniel Veillard157fee02003-10-31 10:36:03 +0000280 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
281 (ctxt->instate == XML_PARSER_EOF))
282 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 switch (error) {
284 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000285 errmsg = "CharRef: invalid hexadecimal value\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "CharRef: invalid decimal value\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "CharRef: invalid value\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "internal error";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "PEReference at end of document\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "PEReference in prolog\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "PEReference in epilog\n";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "PEReference: no name\n";
307 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000309 errmsg = "PEReference: expecting ';'\n";
310 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000311 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000312 errmsg = "Detected an entity reference loop\n";
313 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000314 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000315 errmsg = "EntityValue: \" or ' expected\n";
316 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000318 errmsg = "PEReferences forbidden in internal subset\n";
319 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000321 errmsg = "EntityValue: \" or ' expected\n";
322 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000323 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000324 errmsg = "AttValue: \" or ' expected\n";
325 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000326 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 errmsg = "Unescaped '<' not allowed in attributes values\n";
328 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000329 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 errmsg = "SystemLiteral \" or ' expected\n";
331 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000332 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 errmsg = "Unfinished System or Public ID \" or ' expected\n";
334 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000335 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000336 errmsg = "Sequence ']]>' not allowed in content\n";
337 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000338 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
340 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000341 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 errmsg = "PUBLIC, the Public Identifier is missing\n";
343 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000344 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 errmsg = "Comment must not contain '--' (double-hyphen)\n";
346 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000347 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 errmsg = "xmlParsePI : no target name\n";
349 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000350 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 errmsg = "Invalid PI name\n";
352 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000353 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 errmsg = "NOTATION: Name expected here\n";
355 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 errmsg = "'>' required to close NOTATION declaration\n";
358 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000359 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 errmsg = "Entity value required\n";
361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 errmsg = "Fragment not allowed";
364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 errmsg = "'(' required to start ATTLIST enumeration\n";
367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 errmsg = "NmToken expected in ATTLIST enumeration\n";
370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 errmsg = "')' required to finish ATTLIST enumeration\n";
373 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000374 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
379 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 errmsg = "ContentDecl : Name or '(' expected\n";
382 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
385 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000386 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 errmsg =
388 "PEReference: forbidden within markup decl in internal subset\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg = "expected '>'\n";
392 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000393 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 errmsg = "XML conditional section '[' expected\n";
395 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 errmsg = "Content error in the external subset\n";
398 break;
399 case XML_ERR_CONDSEC_INVALID_KEYWORD:
400 errmsg =
401 "conditional section INCLUDE or IGNORE keyword expected\n";
402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "XML conditional section not closed\n";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 errmsg = "Text declaration '<?xml' required\n";
408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000410 errmsg = "parsing XML declaration: '?>' expected\n";
411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000413 errmsg = "external parsed entities cannot be standalone\n";
414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000416 errmsg = "EntityRef: expecting ';'\n";
417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000419 errmsg = "DOCTYPE improperly terminated\n";
420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000422 errmsg = "EndTag: '</' not found\n";
423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000425 errmsg = "expected '='\n";
426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg = "String not closed expecting \" or '\n";
429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000431 errmsg = "String not started expecting ' or \"\n";
432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000434 errmsg = "Invalid XML encoding name\n";
435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000437 errmsg = "standalone accepts only 'yes' or 'no'\n";
438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000440 errmsg = "Document is empty\n";
441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 errmsg = "Extra content at the end of the document\n";
444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 errmsg = "chunk is not well balanced\n";
447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 errmsg = "extra content at the end of well balanced chunk\n";
450 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000451 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 errmsg = "Malformed declaration expecting version\n";
453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 case:
456 errmsg = "\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 default:
460 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000461 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000462 if (ctxt != NULL)
463 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000464 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
466 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000467 if (ctxt != NULL) {
468 ctxt->wellFormed = 0;
469 if (ctxt->recovery == 0)
470 ctxt->disableSAX = 1;
471 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000472}
473
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474/**
475 * xmlFatalErrMsg:
476 * @ctxt: an XML parser context
477 * @error: the error number
478 * @msg: the error message
479 *
480 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
481 */
482static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
484 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000485{
Daniel Veillard157fee02003-10-31 10:36:03 +0000486 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
487 (ctxt->instate == XML_PARSER_EOF))
488 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL)
490 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000491 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000493 if (ctxt != NULL) {
494 ctxt->wellFormed = 0;
495 if (ctxt->recovery == 0)
496 ctxt->disableSAX = 1;
497 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000498}
499
500/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000501 * xmlWarningMsg:
502 * @ctxt: an XML parser context
503 * @error: the error number
504 * @msg: the error message
505 * @str1: extra data
506 * @str2: extra data
507 *
508 * Handle a warning.
509 */
510static void
511xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, const xmlChar *str2)
513{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000514 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000515
Daniel Veillard157fee02003-10-31 10:36:03 +0000516 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
517 (ctxt->instate == XML_PARSER_EOF))
518 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000519 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
520 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000521 schannel = ctxt->sax->serror;
522 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 (ctxt->sax) ? ctxt->sax->warning : NULL,
524 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000525 ctxt, NULL, XML_FROM_PARSER, error,
526 XML_ERR_WARNING, NULL, 0,
527 (const char *) str1, (const char *) str2, NULL, 0, 0,
528 msg, (const char *) str1, (const char *) str2);
529}
530
531/**
532 * xmlValidityError:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @str1: extra data
537 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000538 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000539 */
540static void
541xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000542 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000543{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000544 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000545
546 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
547 (ctxt->instate == XML_PARSER_EOF))
548 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000549 if (ctxt != NULL) {
550 ctxt->errNo = error;
551 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
552 schannel = ctxt->sax->serror;
553 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000554 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000555 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000556 ctxt, NULL, XML_FROM_DTD, error,
557 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000558 (const char *) str2, NULL, 0, 0,
559 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000560 if (ctxt != NULL) {
561 ctxt->valid = 0;
562 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000563}
564
565/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000566 * xmlFatalErrMsgInt:
567 * @ctxt: an XML parser context
568 * @error: the error number
569 * @msg: the error message
570 * @val: an integer value
571 *
572 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
573 */
574static void
575xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000577{
Daniel Veillard157fee02003-10-31 10:36:03 +0000578 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
579 (ctxt->instate == XML_PARSER_EOF))
580 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000581 if (ctxt != NULL)
582 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000583 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000584 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
585 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000586 if (ctxt != NULL) {
587 ctxt->wellFormed = 0;
588 if (ctxt->recovery == 0)
589 ctxt->disableSAX = 1;
590 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000591}
592
593/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000594 * xmlFatalErrMsgStrIntStr:
595 * @ctxt: an XML parser context
596 * @error: the error number
597 * @msg: the error message
598 * @str1: an string info
599 * @val: an integer value
600 * @str2: an string info
601 *
602 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
603 */
604static void
605xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606 const char *msg, const xmlChar *str1, int val,
607 const xmlChar *str2)
608{
Daniel Veillard157fee02003-10-31 10:36:03 +0000609 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
610 (ctxt->instate == XML_PARSER_EOF))
611 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000612 if (ctxt != NULL)
613 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000614 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000615 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
616 NULL, 0, (const char *) str1, (const char *) str2,
617 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000618 if (ctxt != NULL) {
619 ctxt->wellFormed = 0;
620 if (ctxt->recovery == 0)
621 ctxt->disableSAX = 1;
622 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000623}
624
625/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000626 * xmlFatalErrMsgStr:
627 * @ctxt: an XML parser context
628 * @error: the error number
629 * @msg: the error message
630 * @val: a string value
631 *
632 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
633 */
634static void
635xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000636 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000637{
Daniel Veillard157fee02003-10-31 10:36:03 +0000638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000641 if (ctxt != NULL)
642 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000643 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000644 XML_FROM_PARSER, error, XML_ERR_FATAL,
645 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
646 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000647 if (ctxt != NULL) {
648 ctxt->wellFormed = 0;
649 if (ctxt->recovery == 0)
650 ctxt->disableSAX = 1;
651 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000652}
653
654/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000655 * xmlErrMsgStr:
656 * @ctxt: an XML parser context
657 * @error: the error number
658 * @msg: the error message
659 * @val: a string value
660 *
661 * Handle a non fatal parser error
662 */
663static void
664xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
665 const char *msg, const xmlChar * val)
666{
Daniel Veillard157fee02003-10-31 10:36:03 +0000667 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
668 (ctxt->instate == XML_PARSER_EOF))
669 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000670 if (ctxt != NULL)
671 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000672 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000673 XML_FROM_PARSER, error, XML_ERR_ERROR,
674 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
675 val);
676}
677
678/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000679 * xmlNsErr:
680 * @ctxt: an XML parser context
681 * @error: the error number
682 * @msg: the message
683 * @info1: extra information string
684 * @info2: extra information string
685 *
686 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
687 */
688static void
689xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
690 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000691 const xmlChar * info1, const xmlChar * info2,
692 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000693{
Daniel Veillard157fee02003-10-31 10:36:03 +0000694 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
695 (ctxt->instate == XML_PARSER_EOF))
696 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000697 if (ctxt != NULL)
698 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000699 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000700 XML_ERR_ERROR, NULL, 0, (const char *) info1,
701 (const char *) info2, (const char *) info3, 0, 0, msg,
702 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000705}
706
Daniel Veillard37334572008-07-31 08:20:02 +0000707/**
708 * xmlNsWarn
709 * @ctxt: an XML parser context
710 * @error: the error number
711 * @msg: the message
712 * @info1: extra information string
713 * @info2: extra information string
714 *
715 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
716 */
717static void
718xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
719 const char *msg,
720 const xmlChar * info1, const xmlChar * info2,
721 const xmlChar * info3)
722{
723 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
724 (ctxt->instate == XML_PARSER_EOF))
725 return;
726 if (ctxt != NULL)
727 ctxt->errNo = error;
728 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
729 XML_ERR_WARNING, NULL, 0, (const char *) info1,
730 (const char *) info2, (const char *) info3, 0, 0, msg,
731 info1, info2, info3);
732}
733
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000734/************************************************************************
735 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736 * Library wide options *
737 * *
738 ************************************************************************/
739
740/**
741 * xmlHasFeature:
742 * @feature: the feature to be examined
743 *
744 * Examines if the library has been compiled with a given feature.
745 *
746 * Returns a non-zero value if the feature exist, otherwise zero.
747 * Returns zero (0) if the feature does not exist or an unknown
748 * unknown feature is requested, non-zero otherwise.
749 */
750int
751xmlHasFeature(xmlFeature feature)
752{
753 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_THREAD_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_TREE_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_OUTPUT_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef LIBXML_PUSH_ENABLED
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_READER_ENABLED
780 return(1);
781#else
782 return(0);
783#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000784 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000785#ifdef LIBXML_PATTERN_ENABLED
786 return(1);
787#else
788 return(0);
789#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000790 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000791#ifdef LIBXML_WRITER_ENABLED
792 return(1);
793#else
794 return(0);
795#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000796 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797#ifdef LIBXML_SAX1_ENABLED
798 return(1);
799#else
800 return(0);
801#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000802 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000803#ifdef LIBXML_FTP_ENABLED
804 return(1);
805#else
806 return(0);
807#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000808 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000809#ifdef LIBXML_HTTP_ENABLED
810 return(1);
811#else
812 return(0);
813#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_VALID_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_HTML_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_LEGACY_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_C14N_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_CATALOG_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_XPATH_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_XPTR_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_XINCLUDE_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_ICONV_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_ISO8859X_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_UNICODE_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_REGEXP_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_AUTOMATA_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_EXPR_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_SCHEMAS_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_SCHEMATRON_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_MODULES_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_DEBUG_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef DEBUG_MEMORY_LOCATION
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_DEBUG_RUNTIME
930 return(1);
931#else
932 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000933#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000934 case XML_WITH_ZLIB:
935#ifdef LIBXML_ZLIB_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940 default:
941 break;
942 }
943 return(0);
944}
945
946/************************************************************************
947 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000948 * SAX2 defaulted attributes handling *
949 * *
950 ************************************************************************/
951
952/**
953 * xmlDetectSAX2:
954 * @ctxt: an XML parser context
955 *
956 * Do the SAX2 detection and specific intialization
957 */
958static void
959xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
960 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000961#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000962 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
963 ((ctxt->sax->startElementNs != NULL) ||
964 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000965#else
966 ctxt->sax2 = 1;
967#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000968
969 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
970 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
971 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000972 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
973 (ctxt->str_xml_ns == NULL)) {
974 xmlErrMemory(ctxt, NULL);
975 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000976}
977
Daniel Veillarde57ec792003-09-10 10:50:59 +0000978typedef struct _xmlDefAttrs xmlDefAttrs;
979typedef xmlDefAttrs *xmlDefAttrsPtr;
980struct _xmlDefAttrs {
981 int nbAttrs; /* number of defaulted attributes on that element */
982 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000983 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000984};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985
986/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000987 * xmlAttrNormalizeSpace:
988 * @src: the source string
989 * @dst: the target string
990 *
991 * Normalize the space in non CDATA attribute values:
992 * If the attribute type is not CDATA, then the XML processor MUST further
993 * process the normalized attribute value by discarding any leading and
994 * trailing space (#x20) characters, and by replacing sequences of space
995 * (#x20) characters by a single space (#x20) character.
996 * Note that the size of dst need to be at least src, and if one doesn't need
997 * to preserve dst (and it doesn't come from a dictionary or read-only) then
998 * passing src as dst is just fine.
999 *
1000 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1001 * is needed.
1002 */
1003static xmlChar *
1004xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1005{
1006 if ((src == NULL) || (dst == NULL))
1007 return(NULL);
1008
1009 while (*src == 0x20) src++;
1010 while (*src != 0) {
1011 if (*src == 0x20) {
1012 while (*src == 0x20) src++;
1013 if (*src != 0)
1014 *dst++ = 0x20;
1015 } else {
1016 *dst++ = *src++;
1017 }
1018 }
1019 *dst = 0;
1020 if (dst == src)
1021 return(NULL);
1022 return(dst);
1023}
1024
1025/**
1026 * xmlAttrNormalizeSpace2:
1027 * @src: the source string
1028 *
1029 * Normalize the space in non CDATA attribute values, a slightly more complex
1030 * front end to avoid allocation problems when running on attribute values
1031 * coming from the input.
1032 *
1033 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1034 * is needed.
1035 */
1036static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001037xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001038{
1039 int i;
1040 int remove_head = 0;
1041 int need_realloc = 0;
1042 const xmlChar *cur;
1043
1044 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1045 return(NULL);
1046 i = *len;
1047 if (i <= 0)
1048 return(NULL);
1049
1050 cur = src;
1051 while (*cur == 0x20) {
1052 cur++;
1053 remove_head++;
1054 }
1055 while (*cur != 0) {
1056 if (*cur == 0x20) {
1057 cur++;
1058 if ((*cur == 0x20) || (*cur == 0)) {
1059 need_realloc = 1;
1060 break;
1061 }
1062 } else
1063 cur++;
1064 }
1065 if (need_realloc) {
1066 xmlChar *ret;
1067
1068 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1069 if (ret == NULL) {
1070 xmlErrMemory(ctxt, NULL);
1071 return(NULL);
1072 }
1073 xmlAttrNormalizeSpace(ret, ret);
1074 *len = (int) strlen((const char *)ret);
1075 return(ret);
1076 } else if (remove_head) {
1077 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001078 memmove(src, src + remove_head, 1 + *len);
1079 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001080 }
1081 return(NULL);
1082}
1083
1084/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001085 * xmlAddDefAttrs:
1086 * @ctxt: an XML parser context
1087 * @fullname: the element fullname
1088 * @fullattr: the attribute fullname
1089 * @value: the attribute value
1090 *
1091 * Add a defaulted attribute for an element
1092 */
1093static void
1094xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1095 const xmlChar *fullname,
1096 const xmlChar *fullattr,
1097 const xmlChar *value) {
1098 xmlDefAttrsPtr defaults;
1099 int len;
1100 const xmlChar *name;
1101 const xmlChar *prefix;
1102
Daniel Veillard6a31b832008-03-26 14:06:44 +00001103 /*
1104 * Allows to detect attribute redefinitions
1105 */
1106 if (ctxt->attsSpecial != NULL) {
1107 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1108 return;
1109 }
1110
Daniel Veillarde57ec792003-09-10 10:50:59 +00001111 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001112 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001113 if (ctxt->attsDefault == NULL)
1114 goto mem_error;
1115 }
1116
1117 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001118 * split the element name into prefix:localname , the string found
1119 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 */
1121 name = xmlSplitQName3(fullname, &len);
1122 if (name == NULL) {
1123 name = xmlDictLookup(ctxt->dict, fullname, -1);
1124 prefix = NULL;
1125 } else {
1126 name = xmlDictLookup(ctxt->dict, name, -1);
1127 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1128 }
1129
1130 /*
1131 * make sure there is some storage
1132 */
1133 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1134 if (defaults == NULL) {
1135 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001136 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001137 if (defaults == NULL)
1138 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001139 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001140 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001141 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1142 defaults, NULL) < 0) {
1143 xmlFree(defaults);
1144 goto mem_error;
1145 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001146 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001147 xmlDefAttrsPtr temp;
1148
1149 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001151 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001152 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001153 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001155 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1156 defaults, NULL) < 0) {
1157 xmlFree(defaults);
1158 goto mem_error;
1159 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001160 }
1161
1162 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001163 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001164 * are within the DTD and hen not associated to namespace names.
1165 */
1166 name = xmlSplitQName3(fullattr, &len);
1167 if (name == NULL) {
1168 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1169 prefix = NULL;
1170 } else {
1171 name = xmlDictLookup(ctxt->dict, name, -1);
1172 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1173 }
1174
Daniel Veillardae0765b2008-07-31 19:54:59 +00001175 defaults->values[5 * defaults->nbAttrs] = name;
1176 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 /* intern the string and precompute the end */
1178 len = xmlStrlen(value);
1179 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001180 defaults->values[5 * defaults->nbAttrs + 2] = value;
1181 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1182 if (ctxt->external)
1183 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1184 else
1185 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001186 defaults->nbAttrs++;
1187
1188 return;
1189
1190mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001191 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 return;
1193}
1194
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001195/**
1196 * xmlAddSpecialAttr:
1197 * @ctxt: an XML parser context
1198 * @fullname: the element fullname
1199 * @fullattr: the attribute fullname
1200 * @type: the attribute type
1201 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001202 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001203 */
1204static void
1205xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1206 const xmlChar *fullname,
1207 const xmlChar *fullattr,
1208 int type)
1209{
1210 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001211 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001212 if (ctxt->attsSpecial == NULL)
1213 goto mem_error;
1214 }
1215
Daniel Veillardac4118d2008-01-11 05:27:32 +00001216 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1217 return;
1218
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001219 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1220 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001221 return;
1222
1223mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001224 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001225 return;
1226}
1227
Daniel Veillard4432df22003-09-28 18:58:27 +00001228/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001229 * xmlCleanSpecialAttrCallback:
1230 *
1231 * Removes CDATA attributes from the special attribute table
1232 */
1233static void
1234xmlCleanSpecialAttrCallback(void *payload, void *data,
1235 const xmlChar *fullname, const xmlChar *fullattr,
1236 const xmlChar *unused ATTRIBUTE_UNUSED) {
1237 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1238
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001239 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001240 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1241 }
1242}
1243
1244/**
1245 * xmlCleanSpecialAttr:
1246 * @ctxt: an XML parser context
1247 *
1248 * Trim the list of attributes defined to remove all those of type
1249 * CDATA as they are not special. This call should be done when finishing
1250 * to parse the DTD and before starting to parse the document root.
1251 */
1252static void
1253xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1254{
1255 if (ctxt->attsSpecial == NULL)
1256 return;
1257
1258 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1259
1260 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1261 xmlHashFree(ctxt->attsSpecial, NULL);
1262 ctxt->attsSpecial = NULL;
1263 }
1264 return;
1265}
1266
1267/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001268 * xmlCheckLanguageID:
1269 * @lang: pointer to the string value
1270 *
1271 * Checks that the value conforms to the LanguageID production:
1272 *
1273 * NOTE: this is somewhat deprecated, those productions were removed from
1274 * the XML Second edition.
1275 *
1276 * [33] LanguageID ::= Langcode ('-' Subcode)*
1277 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1278 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1279 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1280 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1281 * [38] Subcode ::= ([a-z] | [A-Z])+
1282 *
1283 * Returns 1 if correct 0 otherwise
1284 **/
1285int
1286xmlCheckLanguageID(const xmlChar * lang)
1287{
1288 const xmlChar *cur = lang;
1289
1290 if (cur == NULL)
1291 return (0);
1292 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1293 ((cur[0] == 'I') && (cur[1] == '-'))) {
1294 /*
1295 * IANA code
1296 */
1297 cur += 2;
1298 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1299 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1300 cur++;
1301 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1302 ((cur[0] == 'X') && (cur[1] == '-'))) {
1303 /*
1304 * User code
1305 */
1306 cur += 2;
1307 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1308 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1309 cur++;
1310 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1311 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1312 /*
1313 * ISO639
1314 */
1315 cur++;
1316 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1317 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1318 cur++;
1319 else
1320 return (0);
1321 } else
1322 return (0);
1323 while (cur[0] != 0) { /* non input consuming */
1324 if (cur[0] != '-')
1325 return (0);
1326 cur++;
1327 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1328 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1329 cur++;
1330 else
1331 return (0);
1332 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1333 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1334 cur++;
1335 }
1336 return (1);
1337}
1338
Owen Taylor3473f882001-02-23 17:55:21 +00001339/************************************************************************
1340 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001341 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001342 * *
1343 ************************************************************************/
1344
1345xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1346 const xmlChar ** str);
1347
Daniel Veillard0fb18932003-09-07 09:14:37 +00001348#ifdef SAX2
1349/**
1350 * nsPush:
1351 * @ctxt: an XML parser context
1352 * @prefix: the namespace prefix or NULL
1353 * @URL: the namespace name
1354 *
1355 * Pushes a new parser namespace on top of the ns stack
1356 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001357 * Returns -1 in case of error, -2 if the namespace should be discarded
1358 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001359 */
1360static int
1361nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1362{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001363 if (ctxt->options & XML_PARSE_NSCLEAN) {
1364 int i;
1365 for (i = 0;i < ctxt->nsNr;i += 2) {
1366 if (ctxt->nsTab[i] == prefix) {
1367 /* in scope */
1368 if (ctxt->nsTab[i + 1] == URL)
1369 return(-2);
1370 /* out of scope keep it */
1371 break;
1372 }
1373 }
1374 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001375 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1376 ctxt->nsMax = 10;
1377 ctxt->nsNr = 0;
1378 ctxt->nsTab = (const xmlChar **)
1379 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1380 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001381 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001382 ctxt->nsMax = 0;
1383 return (-1);
1384 }
1385 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001386 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001387 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001388 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1389 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1390 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001391 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001392 ctxt->nsMax /= 2;
1393 return (-1);
1394 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001395 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001396 }
1397 ctxt->nsTab[ctxt->nsNr++] = prefix;
1398 ctxt->nsTab[ctxt->nsNr++] = URL;
1399 return (ctxt->nsNr);
1400}
1401/**
1402 * nsPop:
1403 * @ctxt: an XML parser context
1404 * @nr: the number to pop
1405 *
1406 * Pops the top @nr parser prefix/namespace from the ns stack
1407 *
1408 * Returns the number of namespaces removed
1409 */
1410static int
1411nsPop(xmlParserCtxtPtr ctxt, int nr)
1412{
1413 int i;
1414
1415 if (ctxt->nsTab == NULL) return(0);
1416 if (ctxt->nsNr < nr) {
1417 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1418 nr = ctxt->nsNr;
1419 }
1420 if (ctxt->nsNr <= 0)
1421 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001422
Daniel Veillard0fb18932003-09-07 09:14:37 +00001423 for (i = 0;i < nr;i++) {
1424 ctxt->nsNr--;
1425 ctxt->nsTab[ctxt->nsNr] = NULL;
1426 }
1427 return(nr);
1428}
1429#endif
1430
1431static int
1432xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1433 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001434 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001435 int maxatts;
1436
1437 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001438 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001439 atts = (const xmlChar **)
1440 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001441 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001442 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001443 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1444 if (attallocs == NULL) goto mem_error;
1445 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001446 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001447 } else if (nr + 5 > ctxt->maxatts) {
1448 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001449 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1450 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001451 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001452 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001453 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1454 (maxatts / 5) * sizeof(int));
1455 if (attallocs == NULL) goto mem_error;
1456 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001457 ctxt->maxatts = maxatts;
1458 }
1459 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001460mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001461 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001462 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001463}
1464
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001465/**
1466 * inputPush:
1467 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001468 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001469 *
1470 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001471 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001472 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001473 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001474int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001475inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1476{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001477 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001478 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001479 if (ctxt->inputNr >= ctxt->inputMax) {
1480 ctxt->inputMax *= 2;
1481 ctxt->inputTab =
1482 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1483 ctxt->inputMax *
1484 sizeof(ctxt->inputTab[0]));
1485 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001486 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001487 xmlFreeInputStream(value);
1488 ctxt->inputMax /= 2;
1489 value = NULL;
1490 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001491 }
1492 }
1493 ctxt->inputTab[ctxt->inputNr] = value;
1494 ctxt->input = value;
1495 return (ctxt->inputNr++);
1496}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001497/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001498 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001499 * @ctxt: an XML parser context
1500 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001501 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001502 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001503 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001504 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001505xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001506inputPop(xmlParserCtxtPtr ctxt)
1507{
1508 xmlParserInputPtr ret;
1509
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001510 if (ctxt == NULL)
1511 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001512 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001513 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001514 ctxt->inputNr--;
1515 if (ctxt->inputNr > 0)
1516 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1517 else
1518 ctxt->input = NULL;
1519 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001520 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001521 return (ret);
1522}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001523/**
1524 * nodePush:
1525 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001526 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001527 *
1528 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001529 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001530 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001531 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001532int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001533nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1534{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001535 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001536 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001537 xmlNodePtr *tmp;
1538
1539 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1540 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001541 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001542 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001543 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001544 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001545 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001546 ctxt->nodeTab = tmp;
1547 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001548 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001549 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1550 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001551 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001552 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001553 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001554 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001555 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001556 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001557 ctxt->nodeTab[ctxt->nodeNr] = value;
1558 ctxt->node = value;
1559 return (ctxt->nodeNr++);
1560}
Daniel Veillard8915c152008-08-26 13:05:34 +00001561
Daniel Veillard1c732d22002-11-30 11:22:59 +00001562/**
1563 * nodePop:
1564 * @ctxt: an XML parser context
1565 *
1566 * Pops the top element node from the node stack
1567 *
1568 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001569 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001570xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001571nodePop(xmlParserCtxtPtr ctxt)
1572{
1573 xmlNodePtr ret;
1574
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001575 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001576 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001577 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 ctxt->nodeNr--;
1579 if (ctxt->nodeNr > 0)
1580 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1581 else
1582 ctxt->node = NULL;
1583 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001584 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001585 return (ret);
1586}
Daniel Veillarda2351322004-06-27 12:08:10 +00001587
1588#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001589/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001590 * nameNsPush:
1591 * @ctxt: an XML parser context
1592 * @value: the element name
1593 * @prefix: the element prefix
1594 * @URI: the element namespace name
1595 *
1596 * Pushes a new element name/prefix/URL on top of the name stack
1597 *
1598 * Returns -1 in case of error, the index in the stack otherwise
1599 */
1600static int
1601nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1602 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1603{
1604 if (ctxt->nameNr >= ctxt->nameMax) {
1605 const xmlChar * *tmp;
1606 void **tmp2;
1607 ctxt->nameMax *= 2;
1608 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1609 ctxt->nameMax *
1610 sizeof(ctxt->nameTab[0]));
1611 if (tmp == NULL) {
1612 ctxt->nameMax /= 2;
1613 goto mem_error;
1614 }
1615 ctxt->nameTab = tmp;
1616 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1617 ctxt->nameMax * 3 *
1618 sizeof(ctxt->pushTab[0]));
1619 if (tmp2 == NULL) {
1620 ctxt->nameMax /= 2;
1621 goto mem_error;
1622 }
1623 ctxt->pushTab = tmp2;
1624 }
1625 ctxt->nameTab[ctxt->nameNr] = value;
1626 ctxt->name = value;
1627 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1628 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001629 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001630 return (ctxt->nameNr++);
1631mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001632 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001633 return (-1);
1634}
1635/**
1636 * nameNsPop:
1637 * @ctxt: an XML parser context
1638 *
1639 * Pops the top element/prefix/URI name from the name stack
1640 *
1641 * Returns the name just removed
1642 */
1643static const xmlChar *
1644nameNsPop(xmlParserCtxtPtr ctxt)
1645{
1646 const xmlChar *ret;
1647
1648 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001649 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001650 ctxt->nameNr--;
1651 if (ctxt->nameNr > 0)
1652 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1653 else
1654 ctxt->name = NULL;
1655 ret = ctxt->nameTab[ctxt->nameNr];
1656 ctxt->nameTab[ctxt->nameNr] = NULL;
1657 return (ret);
1658}
Daniel Veillarda2351322004-06-27 12:08:10 +00001659#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001660
1661/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001662 * namePush:
1663 * @ctxt: an XML parser context
1664 * @value: the element name
1665 *
1666 * Pushes a new element name on top of the name stack
1667 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001668 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001670int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001671namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001672{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001673 if (ctxt == NULL) return (-1);
1674
Daniel Veillard1c732d22002-11-30 11:22:59 +00001675 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001676 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001677 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 ctxt->nameMax *
1680 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001681 if (tmp == NULL) {
1682 ctxt->nameMax /= 2;
1683 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001684 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001685 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 }
1687 ctxt->nameTab[ctxt->nameNr] = value;
1688 ctxt->name = value;
1689 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001690mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001691 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001692 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001693}
1694/**
1695 * namePop:
1696 * @ctxt: an XML parser context
1697 *
1698 * Pops the top element name from the name stack
1699 *
1700 * Returns the name just removed
1701 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001702const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001703namePop(xmlParserCtxtPtr ctxt)
1704{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001705 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001707 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1708 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001709 ctxt->nameNr--;
1710 if (ctxt->nameNr > 0)
1711 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1712 else
1713 ctxt->name = NULL;
1714 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001715 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001716 return (ret);
1717}
Owen Taylor3473f882001-02-23 17:55:21 +00001718
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001719static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001720 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001721 int *tmp;
1722
Owen Taylor3473f882001-02-23 17:55:21 +00001723 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001724 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1725 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1726 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001727 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001728 ctxt->spaceMax /=2;
1729 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001730 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001731 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001732 }
1733 ctxt->spaceTab[ctxt->spaceNr] = val;
1734 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1735 return(ctxt->spaceNr++);
1736}
1737
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001738static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001739 int ret;
1740 if (ctxt->spaceNr <= 0) return(0);
1741 ctxt->spaceNr--;
1742 if (ctxt->spaceNr > 0)
1743 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1744 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001745 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001746 ret = ctxt->spaceTab[ctxt->spaceNr];
1747 ctxt->spaceTab[ctxt->spaceNr] = -1;
1748 return(ret);
1749}
1750
1751/*
1752 * Macros for accessing the content. Those should be used only by the parser,
1753 * and not exported.
1754 *
1755 * Dirty macros, i.e. one often need to make assumption on the context to
1756 * use them
1757 *
1758 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1759 * To be used with extreme caution since operations consuming
1760 * characters may move the input buffer to a different location !
1761 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1762 * This should be used internally by the parser
1763 * only to compare to ASCII values otherwise it would break when
1764 * running with UTF-8 encoding.
1765 * RAW same as CUR but in the input buffer, bypass any token
1766 * extraction that may have been done
1767 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1768 * to compare on ASCII based substring.
1769 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001770 * strings without newlines within the parser.
1771 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1772 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001773 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1774 *
1775 * NEXT Skip to the next character, this does the proper decoding
1776 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001777 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001778 * CUR_CHAR(l) returns the current unicode character (int), set l
1779 * to the number of xmlChars used for the encoding [0-5].
1780 * CUR_SCHAR same but operate on a string instead of the context
1781 * COPY_BUF copy the current unicode char to the target buffer, increment
1782 * the index
1783 * GROW, SHRINK handling of input buffers
1784 */
1785
Daniel Veillardfdc91562002-07-01 21:52:03 +00001786#define RAW (*ctxt->input->cur)
1787#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001788#define NXT(val) ctxt->input->cur[(val)]
1789#define CUR_PTR ctxt->input->cur
1790
Daniel Veillarda07050d2003-10-19 14:46:32 +00001791#define CMP4( s, c1, c2, c3, c4 ) \
1792 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1793 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1794#define CMP5( s, c1, c2, c3, c4, c5 ) \
1795 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1796#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1797 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1798#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1799 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1800#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1801 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1802#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1803 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1804 ((unsigned char *) s)[ 8 ] == c9 )
1805#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1806 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1807 ((unsigned char *) s)[ 9 ] == c10 )
1808
Owen Taylor3473f882001-02-23 17:55:21 +00001809#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001810 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001811 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001812 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001813 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1814 xmlPopInput(ctxt); \
1815 } while (0)
1816
Daniel Veillard0b787f32004-03-26 17:29:53 +00001817#define SKIPL(val) do { \
1818 int skipl; \
1819 for(skipl=0; skipl<val; skipl++) { \
1820 if (*(ctxt->input->cur) == '\n') { \
1821 ctxt->input->line++; ctxt->input->col = 1; \
1822 } else ctxt->input->col++; \
1823 ctxt->nbChars++; \
1824 ctxt->input->cur++; \
1825 } \
1826 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1827 if ((*ctxt->input->cur == 0) && \
1828 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1829 xmlPopInput(ctxt); \
1830 } while (0)
1831
Daniel Veillarda880b122003-04-21 21:36:41 +00001832#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001833 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1834 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001835 xmlSHRINK (ctxt);
1836
1837static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1838 xmlParserInputShrink(ctxt->input);
1839 if ((*ctxt->input->cur == 0) &&
1840 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1841 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001842 }
Owen Taylor3473f882001-02-23 17:55:21 +00001843
Daniel Veillarda880b122003-04-21 21:36:41 +00001844#define GROW if ((ctxt->progressive == 0) && \
1845 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001846 xmlGROW (ctxt);
1847
1848static void xmlGROW (xmlParserCtxtPtr ctxt) {
1849 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1850 if ((*ctxt->input->cur == 0) &&
1851 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1852 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001853}
Owen Taylor3473f882001-02-23 17:55:21 +00001854
1855#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1856
1857#define NEXT xmlNextChar(ctxt)
1858
Daniel Veillard21a0f912001-02-25 19:54:14 +00001859#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001860 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001861 ctxt->input->cur++; \
1862 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001863 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001864 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1865 }
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867#define NEXTL(l) do { \
1868 if (*(ctxt->input->cur) == '\n') { \
1869 ctxt->input->line++; ctxt->input->col = 1; \
1870 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001871 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001872 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001873 } while (0)
1874
1875#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1876#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1877
1878#define COPY_BUF(l,b,i,v) \
1879 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001880 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001881
1882/**
1883 * xmlSkipBlankChars:
1884 * @ctxt: the XML parser context
1885 *
1886 * skip all blanks character found at that point in the input streams.
1887 * It pops up finished entities in the process if allowable at that point.
1888 *
1889 * Returns the number of space chars skipped
1890 */
1891
1892int
1893xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001894 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001895
1896 /*
1897 * It's Okay to use CUR/NEXT here since all the blanks are on
1898 * the ASCII range.
1899 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001900 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1901 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001902 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001903 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001904 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001905 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001906 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001907 if (*cur == '\n') {
1908 ctxt->input->line++; ctxt->input->col = 1;
1909 }
1910 cur++;
1911 res++;
1912 if (*cur == 0) {
1913 ctxt->input->cur = cur;
1914 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1915 cur = ctxt->input->cur;
1916 }
1917 }
1918 ctxt->input->cur = cur;
1919 } else {
1920 int cur;
1921 do {
1922 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001923 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001924 NEXT;
1925 cur = CUR;
1926 res++;
1927 }
1928 while ((cur == 0) && (ctxt->inputNr > 1) &&
1929 (ctxt->instate != XML_PARSER_COMMENT)) {
1930 xmlPopInput(ctxt);
1931 cur = CUR;
1932 }
1933 /*
1934 * Need to handle support of entities branching here
1935 */
1936 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1937 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1938 }
Owen Taylor3473f882001-02-23 17:55:21 +00001939 return(res);
1940}
1941
1942/************************************************************************
1943 * *
1944 * Commodity functions to handle entities *
1945 * *
1946 ************************************************************************/
1947
1948/**
1949 * xmlPopInput:
1950 * @ctxt: an XML parser context
1951 *
1952 * xmlPopInput: the current input pointed by ctxt->input came to an end
1953 * pop it and return the next char.
1954 *
1955 * Returns the current xmlChar in the parser context
1956 */
1957xmlChar
1958xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001959 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001960 if (xmlParserDebugEntities)
1961 xmlGenericError(xmlGenericErrorContext,
1962 "Popping input %d\n", ctxt->inputNr);
1963 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001964 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001965 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1966 return(xmlPopInput(ctxt));
1967 return(CUR);
1968}
1969
1970/**
1971 * xmlPushInput:
1972 * @ctxt: an XML parser context
1973 * @input: an XML parser input fragment (entity, XML fragment ...).
1974 *
1975 * xmlPushInput: switch to a new input stream which is stacked on top
1976 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001977 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001978 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001979int
Owen Taylor3473f882001-02-23 17:55:21 +00001980xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001981 int ret;
1982 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001983
1984 if (xmlParserDebugEntities) {
1985 if ((ctxt->input != NULL) && (ctxt->input->filename))
1986 xmlGenericError(xmlGenericErrorContext,
1987 "%s(%d): ", ctxt->input->filename,
1988 ctxt->input->line);
1989 xmlGenericError(xmlGenericErrorContext,
1990 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1991 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001992 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001994 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001995}
1996
1997/**
1998 * xmlParseCharRef:
1999 * @ctxt: an XML parser context
2000 *
2001 * parse Reference declarations
2002 *
2003 * [66] CharRef ::= '&#' [0-9]+ ';' |
2004 * '&#x' [0-9a-fA-F]+ ';'
2005 *
2006 * [ WFC: Legal Character ]
2007 * Characters referred to using character references must match the
2008 * production for Char.
2009 *
2010 * Returns the value parsed (as an int), 0 in case of error
2011 */
2012int
2013xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002014 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002015 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002016 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002017
Owen Taylor3473f882001-02-23 17:55:21 +00002018 /*
2019 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2020 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002021 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002022 (NXT(2) == 'x')) {
2023 SKIP(3);
2024 GROW;
2025 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002026 if (count++ > 20) {
2027 count = 0;
2028 GROW;
2029 }
2030 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002031 val = val * 16 + (CUR - '0');
2032 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2033 val = val * 16 + (CUR - 'a') + 10;
2034 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2035 val = val * 16 + (CUR - 'A') + 10;
2036 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002037 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002038 val = 0;
2039 break;
2040 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002041 if (val > 0x10FFFF)
2042 outofrange = val;
2043
Owen Taylor3473f882001-02-23 17:55:21 +00002044 NEXT;
2045 count++;
2046 }
2047 if (RAW == ';') {
2048 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002049 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002050 ctxt->nbChars ++;
2051 ctxt->input->cur++;
2052 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002053 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002054 SKIP(2);
2055 GROW;
2056 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002057 if (count++ > 20) {
2058 count = 0;
2059 GROW;
2060 }
2061 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002062 val = val * 10 + (CUR - '0');
2063 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002064 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002065 val = 0;
2066 break;
2067 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002068 if (val > 0x10FFFF)
2069 outofrange = val;
2070
Owen Taylor3473f882001-02-23 17:55:21 +00002071 NEXT;
2072 count++;
2073 }
2074 if (RAW == ';') {
2075 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002076 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002077 ctxt->nbChars ++;
2078 ctxt->input->cur++;
2079 }
2080 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002081 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002082 }
2083
2084 /*
2085 * [ WFC: Legal Character ]
2086 * Characters referred to using character references must match the
2087 * production for Char.
2088 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002089 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 return(val);
2091 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002092 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2093 "xmlParseCharRef: invalid xmlChar value %d\n",
2094 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002095 }
2096 return(0);
2097}
2098
2099/**
2100 * xmlParseStringCharRef:
2101 * @ctxt: an XML parser context
2102 * @str: a pointer to an index in the string
2103 *
2104 * parse Reference declarations, variant parsing from a string rather
2105 * than an an input flow.
2106 *
2107 * [66] CharRef ::= '&#' [0-9]+ ';' |
2108 * '&#x' [0-9a-fA-F]+ ';'
2109 *
2110 * [ WFC: Legal Character ]
2111 * Characters referred to using character references must match the
2112 * production for Char.
2113 *
2114 * Returns the value parsed (as an int), 0 in case of error, str will be
2115 * updated to the current value of the index
2116 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002117static int
Owen Taylor3473f882001-02-23 17:55:21 +00002118xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2119 const xmlChar *ptr;
2120 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002121 unsigned int val = 0;
2122 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002123
2124 if ((str == NULL) || (*str == NULL)) return(0);
2125 ptr = *str;
2126 cur = *ptr;
2127 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2128 ptr += 3;
2129 cur = *ptr;
2130 while (cur != ';') { /* Non input consuming loop */
2131 if ((cur >= '0') && (cur <= '9'))
2132 val = val * 16 + (cur - '0');
2133 else if ((cur >= 'a') && (cur <= 'f'))
2134 val = val * 16 + (cur - 'a') + 10;
2135 else if ((cur >= 'A') && (cur <= 'F'))
2136 val = val * 16 + (cur - 'A') + 10;
2137 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002138 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002139 val = 0;
2140 break;
2141 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002142 if (val > 0x10FFFF)
2143 outofrange = val;
2144
Owen Taylor3473f882001-02-23 17:55:21 +00002145 ptr++;
2146 cur = *ptr;
2147 }
2148 if (cur == ';')
2149 ptr++;
2150 } else if ((cur == '&') && (ptr[1] == '#')){
2151 ptr += 2;
2152 cur = *ptr;
2153 while (cur != ';') { /* Non input consuming loops */
2154 if ((cur >= '0') && (cur <= '9'))
2155 val = val * 10 + (cur - '0');
2156 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002157 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002158 val = 0;
2159 break;
2160 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002161 if (val > 0x10FFFF)
2162 outofrange = val;
2163
Owen Taylor3473f882001-02-23 17:55:21 +00002164 ptr++;
2165 cur = *ptr;
2166 }
2167 if (cur == ';')
2168 ptr++;
2169 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002170 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002171 return(0);
2172 }
2173 *str = ptr;
2174
2175 /*
2176 * [ WFC: Legal Character ]
2177 * Characters referred to using character references must match the
2178 * production for Char.
2179 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002180 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002181 return(val);
2182 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002183 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2184 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2185 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002186 }
2187 return(0);
2188}
2189
2190/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002191 * xmlNewBlanksWrapperInputStream:
2192 * @ctxt: an XML parser context
2193 * @entity: an Entity pointer
2194 *
2195 * Create a new input stream for wrapping
2196 * blanks around a PEReference
2197 *
2198 * Returns the new input stream or NULL
2199 */
2200
2201static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2202
Daniel Veillardf4862f02002-09-10 11:13:43 +00002203static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002204xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2205 xmlParserInputPtr input;
2206 xmlChar *buffer;
2207 size_t length;
2208 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002209 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2210 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002211 return(NULL);
2212 }
2213 if (xmlParserDebugEntities)
2214 xmlGenericError(xmlGenericErrorContext,
2215 "new blanks wrapper for entity: %s\n", entity->name);
2216 input = xmlNewInputStream(ctxt);
2217 if (input == NULL) {
2218 return(NULL);
2219 }
2220 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002221 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002222 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002223 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002224 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002225 return(NULL);
2226 }
2227 buffer [0] = ' ';
2228 buffer [1] = '%';
2229 buffer [length-3] = ';';
2230 buffer [length-2] = ' ';
2231 buffer [length-1] = 0;
2232 memcpy(buffer + 2, entity->name, length - 5);
2233 input->free = deallocblankswrapper;
2234 input->base = buffer;
2235 input->cur = buffer;
2236 input->length = length;
2237 input->end = &buffer[length];
2238 return(input);
2239}
2240
2241/**
Owen Taylor3473f882001-02-23 17:55:21 +00002242 * xmlParserHandlePEReference:
2243 * @ctxt: the parser context
2244 *
2245 * [69] PEReference ::= '%' Name ';'
2246 *
2247 * [ WFC: No Recursion ]
2248 * A parsed entity must not contain a recursive
2249 * reference to itself, either directly or indirectly.
2250 *
2251 * [ WFC: Entity Declared ]
2252 * In a document without any DTD, a document with only an internal DTD
2253 * subset which contains no parameter entity references, or a document
2254 * with "standalone='yes'", ... ... The declaration of a parameter
2255 * entity must precede any reference to it...
2256 *
2257 * [ VC: Entity Declared ]
2258 * In a document with an external subset or external parameter entities
2259 * with "standalone='no'", ... ... The declaration of a parameter entity
2260 * must precede any reference to it...
2261 *
2262 * [ WFC: In DTD ]
2263 * Parameter-entity references may only appear in the DTD.
2264 * NOTE: misleading but this is handled.
2265 *
2266 * A PEReference may have been detected in the current input stream
2267 * the handling is done accordingly to
2268 * http://www.w3.org/TR/REC-xml#entproc
2269 * i.e.
2270 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002271 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002272 */
2273void
2274xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002275 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002276 xmlEntityPtr entity = NULL;
2277 xmlParserInputPtr input;
2278
Owen Taylor3473f882001-02-23 17:55:21 +00002279 if (RAW != '%') return;
2280 switch(ctxt->instate) {
2281 case XML_PARSER_CDATA_SECTION:
2282 return;
2283 case XML_PARSER_COMMENT:
2284 return;
2285 case XML_PARSER_START_TAG:
2286 return;
2287 case XML_PARSER_END_TAG:
2288 return;
2289 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002290 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002291 return;
2292 case XML_PARSER_PROLOG:
2293 case XML_PARSER_START:
2294 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002295 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002296 return;
2297 case XML_PARSER_ENTITY_DECL:
2298 case XML_PARSER_CONTENT:
2299 case XML_PARSER_ATTRIBUTE_VALUE:
2300 case XML_PARSER_PI:
2301 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002302 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002303 /* we just ignore it there */
2304 return;
2305 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002306 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002307 return;
2308 case XML_PARSER_ENTITY_VALUE:
2309 /*
2310 * NOTE: in the case of entity values, we don't do the
2311 * substitution here since we need the literal
2312 * entity value to be able to save the internal
2313 * subset of the document.
2314 * This will be handled by xmlStringDecodeEntities
2315 */
2316 return;
2317 case XML_PARSER_DTD:
2318 /*
2319 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2320 * In the internal DTD subset, parameter-entity references
2321 * can occur only where markup declarations can occur, not
2322 * within markup declarations.
2323 * In that case this is handled in xmlParseMarkupDecl
2324 */
2325 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2326 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002327 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002328 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002329 break;
2330 case XML_PARSER_IGNORE:
2331 return;
2332 }
2333
2334 NEXT;
2335 name = xmlParseName(ctxt);
2336 if (xmlParserDebugEntities)
2337 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002338 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002339 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002340 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 } else {
2342 if (RAW == ';') {
2343 NEXT;
2344 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2345 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2346 if (entity == NULL) {
2347
2348 /*
2349 * [ WFC: Entity Declared ]
2350 * In a document without any DTD, a document with only an
2351 * internal DTD subset which contains no parameter entity
2352 * references, or a document with "standalone='yes'", ...
2353 * ... The declaration of a parameter entity must precede
2354 * any reference to it...
2355 */
2356 if ((ctxt->standalone == 1) ||
2357 ((ctxt->hasExternalSubset == 0) &&
2358 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002359 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002360 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002361 } else {
2362 /*
2363 * [ VC: Entity Declared ]
2364 * In a document with an external subset or external
2365 * parameter entities with "standalone='no'", ...
2366 * ... The declaration of a parameter entity must precede
2367 * any reference to it...
2368 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002369 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2370 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2371 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002372 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002373 } else
2374 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2375 "PEReference: %%%s; not found\n",
2376 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002377 ctxt->valid = 0;
2378 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002379 } else if (ctxt->input->free != deallocblankswrapper) {
2380 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002381 if (xmlPushInput(ctxt, input) < 0)
2382 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002383 } else {
2384 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2385 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002386 xmlChar start[4];
2387 xmlCharEncoding enc;
2388
Owen Taylor3473f882001-02-23 17:55:21 +00002389 /*
2390 * handle the extra spaces added before and after
2391 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002392 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002393 */
2394 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002395 if (xmlPushInput(ctxt, input) < 0)
2396 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002397
2398 /*
2399 * Get the 4 first bytes and decode the charset
2400 * if enc != XML_CHAR_ENCODING_NONE
2401 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002402 * Note that, since we may have some non-UTF8
2403 * encoding (like UTF16, bug 135229), the 'length'
2404 * is not known, but we can calculate based upon
2405 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002406 */
2407 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002408 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002409 start[0] = RAW;
2410 start[1] = NXT(1);
2411 start[2] = NXT(2);
2412 start[3] = NXT(3);
2413 enc = xmlDetectCharEncoding(start, 4);
2414 if (enc != XML_CHAR_ENCODING_NONE) {
2415 xmlSwitchEncoding(ctxt, enc);
2416 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002417 }
2418
Owen Taylor3473f882001-02-23 17:55:21 +00002419 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002420 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2421 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002422 xmlParseTextDecl(ctxt);
2423 }
Owen Taylor3473f882001-02-23 17:55:21 +00002424 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002425 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2426 "PEReference: %s is not a parameter entity\n",
2427 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002428 }
2429 }
2430 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002431 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002432 }
Owen Taylor3473f882001-02-23 17:55:21 +00002433 }
2434}
2435
2436/*
2437 * Macro used to grow the current buffer.
2438 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002439#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002440 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002441 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002442 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002443 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002444 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002445 if (tmp == NULL) goto mem_error; \
2446 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002447}
2448
2449/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002450 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002451 * @ctxt: the parser context
2452 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002453 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002454 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2455 * @end: an end marker xmlChar, 0 if none
2456 * @end2: an end marker xmlChar, 0 if none
2457 * @end3: an end marker xmlChar, 0 if none
2458 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002459 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002460 *
2461 * [67] Reference ::= EntityRef | CharRef
2462 *
2463 * [69] PEReference ::= '%' Name ';'
2464 *
2465 * Returns A newly allocated string with the substitution done. The caller
2466 * must deallocate it !
2467 */
2468xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002469xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2470 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002471 xmlChar *buffer = NULL;
2472 int buffer_size = 0;
2473
2474 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002475 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002476 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002477 xmlEntityPtr ent;
2478 int c,l;
2479 int nbchars = 0;
2480
Daniel Veillarda82b1822004-11-08 16:24:57 +00002481 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002482 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002483 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002484
Daniel Veillard0161e632008-08-28 15:36:32 +00002485 if (((ctxt->depth > 40) &&
2486 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2487 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002488 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002489 return(NULL);
2490 }
2491
2492 /*
2493 * allocate a translation buffer.
2494 */
2495 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002496 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002497 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002498
2499 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002500 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002501 * we are operating on already parsed values.
2502 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002503 if (str < last)
2504 c = CUR_SCHAR(str, l);
2505 else
2506 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002507 while ((c != 0) && (c != end) && /* non input consuming loop */
2508 (c != end2) && (c != end3)) {
2509
2510 if (c == 0) break;
2511 if ((c == '&') && (str[1] == '#')) {
2512 int val = xmlParseStringCharRef(ctxt, &str);
2513 if (val != 0) {
2514 COPY_BUF(0,buffer,nbchars,val);
2515 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002516 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002517 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002518 }
Owen Taylor3473f882001-02-23 17:55:21 +00002519 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2520 if (xmlParserDebugEntities)
2521 xmlGenericError(xmlGenericErrorContext,
2522 "String decoding Entity Reference: %.30s\n",
2523 str);
2524 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002525 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2526 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002527 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002528 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002529 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002530 if ((ent != NULL) &&
2531 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2532 if (ent->content != NULL) {
2533 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002534 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002535 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002536 }
Owen Taylor3473f882001-02-23 17:55:21 +00002537 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002538 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2539 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002540 }
2541 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002542 ctxt->depth++;
2543 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2544 0, 0, 0);
2545 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002546
Owen Taylor3473f882001-02-23 17:55:21 +00002547 if (rep != NULL) {
2548 current = rep;
2549 while (*current != 0) { /* non input consuming loop */
2550 buffer[nbchars++] = *current++;
2551 if (nbchars >
2552 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002553 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2554 goto int_error;
2555 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002556 }
2557 }
2558 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002559 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002560 }
2561 } else if (ent != NULL) {
2562 int i = xmlStrlen(ent->name);
2563 const xmlChar *cur = ent->name;
2564
2565 buffer[nbchars++] = '&';
2566 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002567 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002568 }
2569 for (;i > 0;i--)
2570 buffer[nbchars++] = *cur++;
2571 buffer[nbchars++] = ';';
2572 }
2573 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2574 if (xmlParserDebugEntities)
2575 xmlGenericError(xmlGenericErrorContext,
2576 "String decoding PE Reference: %.30s\n", str);
2577 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002578 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2579 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002580 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002581 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002582 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002583 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002584 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002585 }
Owen Taylor3473f882001-02-23 17:55:21 +00002586 ctxt->depth++;
2587 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2588 0, 0, 0);
2589 ctxt->depth--;
2590 if (rep != NULL) {
2591 current = rep;
2592 while (*current != 0) { /* non input consuming loop */
2593 buffer[nbchars++] = *current++;
2594 if (nbchars >
2595 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002596 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2597 goto int_error;
2598 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002599 }
2600 }
2601 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002602 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002603 }
2604 }
2605 } else {
2606 COPY_BUF(l,buffer,nbchars,c);
2607 str += l;
2608 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002609 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002610 }
2611 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002612 if (str < last)
2613 c = CUR_SCHAR(str, l);
2614 else
2615 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002616 }
2617 buffer[nbchars++] = 0;
2618 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002619
2620mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002621 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002622int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002623 if (rep != NULL)
2624 xmlFree(rep);
2625 if (buffer != NULL)
2626 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002627 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002628}
2629
Daniel Veillarde57ec792003-09-10 10:50:59 +00002630/**
2631 * xmlStringDecodeEntities:
2632 * @ctxt: the parser context
2633 * @str: the input string
2634 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2635 * @end: an end marker xmlChar, 0 if none
2636 * @end2: an end marker xmlChar, 0 if none
2637 * @end3: an end marker xmlChar, 0 if none
2638 *
2639 * Takes a entity string content and process to do the adequate substitutions.
2640 *
2641 * [67] Reference ::= EntityRef | CharRef
2642 *
2643 * [69] PEReference ::= '%' Name ';'
2644 *
2645 * Returns A newly allocated string with the substitution done. The caller
2646 * must deallocate it !
2647 */
2648xmlChar *
2649xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2650 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002651 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002652 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2653 end, end2, end3));
2654}
Owen Taylor3473f882001-02-23 17:55:21 +00002655
2656/************************************************************************
2657 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002658 * Commodity functions, cleanup needed ? *
2659 * *
2660 ************************************************************************/
2661
2662/**
2663 * areBlanks:
2664 * @ctxt: an XML parser context
2665 * @str: a xmlChar *
2666 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002667 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002668 *
2669 * Is this a sequence of blank chars that one can ignore ?
2670 *
2671 * Returns 1 if ignorable 0 otherwise.
2672 */
2673
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002674static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2675 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002676 int i, ret;
2677 xmlNodePtr lastChild;
2678
Daniel Veillard05c13a22001-09-09 08:38:09 +00002679 /*
2680 * Don't spend time trying to differentiate them, the same callback is
2681 * used !
2682 */
2683 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002684 return(0);
2685
Owen Taylor3473f882001-02-23 17:55:21 +00002686 /*
2687 * Check for xml:space value.
2688 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002689 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2690 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002691 return(0);
2692
2693 /*
2694 * Check that the string is made of blanks
2695 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002696 if (blank_chars == 0) {
2697 for (i = 0;i < len;i++)
2698 if (!(IS_BLANK_CH(str[i]))) return(0);
2699 }
Owen Taylor3473f882001-02-23 17:55:21 +00002700
2701 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002702 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002703 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002704 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002705 if (ctxt->myDoc != NULL) {
2706 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2707 if (ret == 0) return(1);
2708 if (ret == 1) return(0);
2709 }
2710
2711 /*
2712 * Otherwise, heuristic :-\
2713 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002714 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002715 if ((ctxt->node->children == NULL) &&
2716 (RAW == '<') && (NXT(1) == '/')) return(0);
2717
2718 lastChild = xmlGetLastChild(ctxt->node);
2719 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002720 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2721 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002722 } else if (xmlNodeIsText(lastChild))
2723 return(0);
2724 else if ((ctxt->node->children != NULL) &&
2725 (xmlNodeIsText(ctxt->node->children)))
2726 return(0);
2727 return(1);
2728}
2729
Owen Taylor3473f882001-02-23 17:55:21 +00002730/************************************************************************
2731 * *
2732 * Extra stuff for namespace support *
2733 * Relates to http://www.w3.org/TR/WD-xml-names *
2734 * *
2735 ************************************************************************/
2736
2737/**
2738 * xmlSplitQName:
2739 * @ctxt: an XML parser context
2740 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002741 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002742 *
2743 * parse an UTF8 encoded XML qualified name string
2744 *
2745 * [NS 5] QName ::= (Prefix ':')? LocalPart
2746 *
2747 * [NS 6] Prefix ::= NCName
2748 *
2749 * [NS 7] LocalPart ::= NCName
2750 *
2751 * Returns the local part, and prefix is updated
2752 * to get the Prefix if any.
2753 */
2754
2755xmlChar *
2756xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2757 xmlChar buf[XML_MAX_NAMELEN + 5];
2758 xmlChar *buffer = NULL;
2759 int len = 0;
2760 int max = XML_MAX_NAMELEN;
2761 xmlChar *ret = NULL;
2762 const xmlChar *cur = name;
2763 int c;
2764
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002765 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002766 *prefix = NULL;
2767
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002768 if (cur == NULL) return(NULL);
2769
Owen Taylor3473f882001-02-23 17:55:21 +00002770#ifndef XML_XML_NAMESPACE
2771 /* xml: prefix is not really a namespace */
2772 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2773 (cur[2] == 'l') && (cur[3] == ':'))
2774 return(xmlStrdup(name));
2775#endif
2776
Daniel Veillard597bc482003-07-24 16:08:28 +00002777 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002778 if (cur[0] == ':')
2779 return(xmlStrdup(name));
2780
2781 c = *cur++;
2782 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2783 buf[len++] = c;
2784 c = *cur++;
2785 }
2786 if (len >= max) {
2787 /*
2788 * Okay someone managed to make a huge name, so he's ready to pay
2789 * for the processing speed.
2790 */
2791 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002792
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002793 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002794 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002795 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002796 return(NULL);
2797 }
2798 memcpy(buffer, buf, len);
2799 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2800 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002801 xmlChar *tmp;
2802
Owen Taylor3473f882001-02-23 17:55:21 +00002803 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002804 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002805 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002806 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002807 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002808 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002809 return(NULL);
2810 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002811 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002812 }
2813 buffer[len++] = c;
2814 c = *cur++;
2815 }
2816 buffer[len] = 0;
2817 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002818
Daniel Veillard597bc482003-07-24 16:08:28 +00002819 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002820 if (buffer != NULL)
2821 xmlFree(buffer);
2822 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002823 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002824 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002825
Owen Taylor3473f882001-02-23 17:55:21 +00002826 if (buffer == NULL)
2827 ret = xmlStrndup(buf, len);
2828 else {
2829 ret = buffer;
2830 buffer = NULL;
2831 max = XML_MAX_NAMELEN;
2832 }
2833
2834
2835 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002836 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002837 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002838 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002839 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002840 }
Owen Taylor3473f882001-02-23 17:55:21 +00002841 len = 0;
2842
Daniel Veillardbb284f42002-10-16 18:02:47 +00002843 /*
2844 * Check that the first character is proper to start
2845 * a new name
2846 */
2847 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2848 ((c >= 0x41) && (c <= 0x5A)) ||
2849 (c == '_') || (c == ':'))) {
2850 int l;
2851 int first = CUR_SCHAR(cur, l);
2852
2853 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002854 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002855 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002856 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002857 }
2858 }
2859 cur++;
2860
Owen Taylor3473f882001-02-23 17:55:21 +00002861 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2862 buf[len++] = c;
2863 c = *cur++;
2864 }
2865 if (len >= max) {
2866 /*
2867 * Okay someone managed to make a huge name, so he's ready to pay
2868 * for the processing speed.
2869 */
2870 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002871
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002872 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002873 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002874 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002875 return(NULL);
2876 }
2877 memcpy(buffer, buf, len);
2878 while (c != 0) { /* tested bigname2.xml */
2879 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002880 xmlChar *tmp;
2881
Owen Taylor3473f882001-02-23 17:55:21 +00002882 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002883 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002884 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002885 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002886 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002887 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(NULL);
2889 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002890 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002891 }
2892 buffer[len++] = c;
2893 c = *cur++;
2894 }
2895 buffer[len] = 0;
2896 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002897
Owen Taylor3473f882001-02-23 17:55:21 +00002898 if (buffer == NULL)
2899 ret = xmlStrndup(buf, len);
2900 else {
2901 ret = buffer;
2902 }
2903 }
2904
2905 return(ret);
2906}
2907
2908/************************************************************************
2909 * *
2910 * The parser itself *
2911 * Relates to http://www.w3.org/TR/REC-xml *
2912 * *
2913 ************************************************************************/
2914
Daniel Veillard34e3f642008-07-29 09:02:27 +00002915/************************************************************************
2916 * *
2917 * Routines to parse Name, NCName and NmToken *
2918 * *
2919 ************************************************************************/
2920unsigned long nbParseName = 0;
2921unsigned long nbParseNmToken = 0;
2922unsigned long nbParseNCName = 0;
2923unsigned long nbParseNCNameComplex = 0;
2924unsigned long nbParseNameComplex = 0;
2925unsigned long nbParseStringName = 0;
2926/*
2927 * The two following functions are related to the change of accepted
2928 * characters for Name and NmToken in the Revision 5 of XML-1.0
2929 * They correspond to the modified production [4] and the new production [4a]
2930 * changes in that revision. Also note that the macros used for the
2931 * productions Letter, Digit, CombiningChar and Extender are not needed
2932 * anymore.
2933 * We still keep compatibility to pre-revision5 parsing semantic if the
2934 * new XML_PARSE_OLD10 option is given to the parser.
2935 */
2936static int
2937xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2938 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2939 /*
2940 * Use the new checks of production [4] [4a] amd [5] of the
2941 * Update 5 of XML-1.0
2942 */
2943 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2944 (((c >= 'a') && (c <= 'z')) ||
2945 ((c >= 'A') && (c <= 'Z')) ||
2946 (c == '_') || (c == ':') ||
2947 ((c >= 0xC0) && (c <= 0xD6)) ||
2948 ((c >= 0xD8) && (c <= 0xF6)) ||
2949 ((c >= 0xF8) && (c <= 0x2FF)) ||
2950 ((c >= 0x370) && (c <= 0x37D)) ||
2951 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2952 ((c >= 0x200C) && (c <= 0x200D)) ||
2953 ((c >= 0x2070) && (c <= 0x218F)) ||
2954 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2955 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2956 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2957 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2958 ((c >= 0x10000) && (c <= 0xEFFFF))))
2959 return(1);
2960 } else {
2961 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2962 return(1);
2963 }
2964 return(0);
2965}
2966
2967static int
2968xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2969 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2970 /*
2971 * Use the new checks of production [4] [4a] amd [5] of the
2972 * Update 5 of XML-1.0
2973 */
2974 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2975 (((c >= 'a') && (c <= 'z')) ||
2976 ((c >= 'A') && (c <= 'Z')) ||
2977 ((c >= '0') && (c <= '9')) || /* !start */
2978 (c == '_') || (c == ':') ||
2979 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2980 ((c >= 0xC0) && (c <= 0xD6)) ||
2981 ((c >= 0xD8) && (c <= 0xF6)) ||
2982 ((c >= 0xF8) && (c <= 0x2FF)) ||
2983 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2984 ((c >= 0x370) && (c <= 0x37D)) ||
2985 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2986 ((c >= 0x200C) && (c <= 0x200D)) ||
2987 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2988 ((c >= 0x2070) && (c <= 0x218F)) ||
2989 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2990 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2991 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2992 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2993 ((c >= 0x10000) && (c <= 0xEFFFF))))
2994 return(1);
2995 } else {
2996 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2997 (c == '.') || (c == '-') ||
2998 (c == '_') || (c == ':') ||
2999 (IS_COMBINING(c)) ||
3000 (IS_EXTENDER(c)))
3001 return(1);
3002 }
3003 return(0);
3004}
3005
Daniel Veillarde57ec792003-09-10 10:50:59 +00003006static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003007 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003008
Daniel Veillard34e3f642008-07-29 09:02:27 +00003009static const xmlChar *
3010xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3011 int len = 0, l;
3012 int c;
3013 int count = 0;
3014
3015 nbParseNameComplex++;
3016
3017 /*
3018 * Handler for more complex cases
3019 */
3020 GROW;
3021 c = CUR_CHAR(l);
3022 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3023 /*
3024 * Use the new checks of production [4] [4a] amd [5] of the
3025 * Update 5 of XML-1.0
3026 */
3027 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3028 (!(((c >= 'a') && (c <= 'z')) ||
3029 ((c >= 'A') && (c <= 'Z')) ||
3030 (c == '_') || (c == ':') ||
3031 ((c >= 0xC0) && (c <= 0xD6)) ||
3032 ((c >= 0xD8) && (c <= 0xF6)) ||
3033 ((c >= 0xF8) && (c <= 0x2FF)) ||
3034 ((c >= 0x370) && (c <= 0x37D)) ||
3035 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3036 ((c >= 0x200C) && (c <= 0x200D)) ||
3037 ((c >= 0x2070) && (c <= 0x218F)) ||
3038 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3039 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3040 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3041 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3042 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3043 return(NULL);
3044 }
3045 len += l;
3046 NEXTL(l);
3047 c = CUR_CHAR(l);
3048 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3049 (((c >= 'a') && (c <= 'z')) ||
3050 ((c >= 'A') && (c <= 'Z')) ||
3051 ((c >= '0') && (c <= '9')) || /* !start */
3052 (c == '_') || (c == ':') ||
3053 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3054 ((c >= 0xC0) && (c <= 0xD6)) ||
3055 ((c >= 0xD8) && (c <= 0xF6)) ||
3056 ((c >= 0xF8) && (c <= 0x2FF)) ||
3057 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3058 ((c >= 0x370) && (c <= 0x37D)) ||
3059 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3060 ((c >= 0x200C) && (c <= 0x200D)) ||
3061 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3062 ((c >= 0x2070) && (c <= 0x218F)) ||
3063 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3064 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3065 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3066 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3067 ((c >= 0x10000) && (c <= 0xEFFFF))
3068 )) {
3069 if (count++ > 100) {
3070 count = 0;
3071 GROW;
3072 }
3073 len += l;
3074 NEXTL(l);
3075 c = CUR_CHAR(l);
3076 }
3077 } else {
3078 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3079 (!IS_LETTER(c) && (c != '_') &&
3080 (c != ':'))) {
3081 return(NULL);
3082 }
3083 len += l;
3084 NEXTL(l);
3085 c = CUR_CHAR(l);
3086
3087 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3088 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3089 (c == '.') || (c == '-') ||
3090 (c == '_') || (c == ':') ||
3091 (IS_COMBINING(c)) ||
3092 (IS_EXTENDER(c)))) {
3093 if (count++ > 100) {
3094 count = 0;
3095 GROW;
3096 }
3097 len += l;
3098 NEXTL(l);
3099 c = CUR_CHAR(l);
3100 }
3101 }
3102 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3103 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3104 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3105}
3106
Owen Taylor3473f882001-02-23 17:55:21 +00003107/**
3108 * xmlParseName:
3109 * @ctxt: an XML parser context
3110 *
3111 * parse an XML name.
3112 *
3113 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3114 * CombiningChar | Extender
3115 *
3116 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3117 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003118 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003119 *
3120 * Returns the Name parsed or NULL
3121 */
3122
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003123const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003124xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003125 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003126 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003127 int count = 0;
3128
3129 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003130
Daniel Veillard34e3f642008-07-29 09:02:27 +00003131 nbParseName++;
3132
Daniel Veillard48b2f892001-02-25 16:11:03 +00003133 /*
3134 * Accelerator for simple ASCII names
3135 */
3136 in = ctxt->input->cur;
3137 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3138 ((*in >= 0x41) && (*in <= 0x5A)) ||
3139 (*in == '_') || (*in == ':')) {
3140 in++;
3141 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3142 ((*in >= 0x41) && (*in <= 0x5A)) ||
3143 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003144 (*in == '_') || (*in == '-') ||
3145 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003146 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003147 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003148 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003149 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003150 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003151 ctxt->nbChars += count;
3152 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003153 if (ret == NULL)
3154 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003155 return(ret);
3156 }
3157 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003158 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003159 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003160}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003161
Daniel Veillard34e3f642008-07-29 09:02:27 +00003162static const xmlChar *
3163xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3164 int len = 0, l;
3165 int c;
3166 int count = 0;
3167
3168 nbParseNCNameComplex++;
3169
3170 /*
3171 * Handler for more complex cases
3172 */
3173 GROW;
3174 c = CUR_CHAR(l);
3175 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3176 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3177 return(NULL);
3178 }
3179
3180 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3181 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3182 if (count++ > 100) {
3183 count = 0;
3184 GROW;
3185 }
3186 len += l;
3187 NEXTL(l);
3188 c = CUR_CHAR(l);
3189 }
3190 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3191}
3192
3193/**
3194 * xmlParseNCName:
3195 * @ctxt: an XML parser context
3196 * @len: lenght of the string parsed
3197 *
3198 * parse an XML name.
3199 *
3200 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3201 * CombiningChar | Extender
3202 *
3203 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3204 *
3205 * Returns the Name parsed or NULL
3206 */
3207
3208static const xmlChar *
3209xmlParseNCName(xmlParserCtxtPtr ctxt) {
3210 const xmlChar *in;
3211 const xmlChar *ret;
3212 int count = 0;
3213
3214 nbParseNCName++;
3215
3216 /*
3217 * Accelerator for simple ASCII names
3218 */
3219 in = ctxt->input->cur;
3220 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3221 ((*in >= 0x41) && (*in <= 0x5A)) ||
3222 (*in == '_')) {
3223 in++;
3224 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3225 ((*in >= 0x41) && (*in <= 0x5A)) ||
3226 ((*in >= 0x30) && (*in <= 0x39)) ||
3227 (*in == '_') || (*in == '-') ||
3228 (*in == '.'))
3229 in++;
3230 if ((*in > 0) && (*in < 0x80)) {
3231 count = in - ctxt->input->cur;
3232 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3233 ctxt->input->cur = in;
3234 ctxt->nbChars += count;
3235 ctxt->input->col += count;
3236 if (ret == NULL) {
3237 xmlErrMemory(ctxt, NULL);
3238 }
3239 return(ret);
3240 }
3241 }
3242 return(xmlParseNCNameComplex(ctxt));
3243}
3244
Daniel Veillard46de64e2002-05-29 08:21:33 +00003245/**
3246 * xmlParseNameAndCompare:
3247 * @ctxt: an XML parser context
3248 *
3249 * parse an XML name and compares for match
3250 * (specialized for endtag parsing)
3251 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003252 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3253 * and the name for mismatch
3254 */
3255
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003256static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003257xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003258 register const xmlChar *cmp = other;
3259 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003260 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003261
3262 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003263
Daniel Veillard46de64e2002-05-29 08:21:33 +00003264 in = ctxt->input->cur;
3265 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003266 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003267 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003268 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003269 }
William M. Brack76e95df2003-10-18 16:20:14 +00003270 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003271 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003272 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003273 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003274 }
3275 /* failure (or end of input buffer), check with full function */
3276 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003277 /* strings coming from the dictionnary direct compare possible */
3278 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003279 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003280 }
3281 return ret;
3282}
3283
Owen Taylor3473f882001-02-23 17:55:21 +00003284/**
3285 * xmlParseStringName:
3286 * @ctxt: an XML parser context
3287 * @str: a pointer to the string pointer (IN/OUT)
3288 *
3289 * parse an XML name.
3290 *
3291 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3292 * CombiningChar | Extender
3293 *
3294 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3295 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003296 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003297 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003298 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003299 * is updated to the current location in the string.
3300 */
3301
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003302static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003303xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3304 xmlChar buf[XML_MAX_NAMELEN + 5];
3305 const xmlChar *cur = *str;
3306 int len = 0, l;
3307 int c;
3308
Daniel Veillard34e3f642008-07-29 09:02:27 +00003309 nbParseStringName++;
3310
Owen Taylor3473f882001-02-23 17:55:21 +00003311 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003312 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003313 return(NULL);
3314 }
3315
Daniel Veillard34e3f642008-07-29 09:02:27 +00003316 COPY_BUF(l,buf,len,c);
3317 cur += l;
3318 c = CUR_SCHAR(cur, l);
3319 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003320 COPY_BUF(l,buf,len,c);
3321 cur += l;
3322 c = CUR_SCHAR(cur, l);
3323 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3324 /*
3325 * Okay someone managed to make a huge name, so he's ready to pay
3326 * for the processing speed.
3327 */
3328 xmlChar *buffer;
3329 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003330
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003331 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003332 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003333 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003334 return(NULL);
3335 }
3336 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003337 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003338 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003339 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003340 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003341 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003342 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003343 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003344 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003345 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003346 return(NULL);
3347 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003348 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003349 }
3350 COPY_BUF(l,buffer,len,c);
3351 cur += l;
3352 c = CUR_SCHAR(cur, l);
3353 }
3354 buffer[len] = 0;
3355 *str = cur;
3356 return(buffer);
3357 }
3358 }
3359 *str = cur;
3360 return(xmlStrndup(buf, len));
3361}
3362
3363/**
3364 * xmlParseNmtoken:
3365 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003366 *
Owen Taylor3473f882001-02-23 17:55:21 +00003367 * parse an XML Nmtoken.
3368 *
3369 * [7] Nmtoken ::= (NameChar)+
3370 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003371 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003372 *
3373 * Returns the Nmtoken parsed or NULL
3374 */
3375
3376xmlChar *
3377xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3378 xmlChar buf[XML_MAX_NAMELEN + 5];
3379 int len = 0, l;
3380 int c;
3381 int count = 0;
3382
Daniel Veillard34e3f642008-07-29 09:02:27 +00003383 nbParseNmToken++;
3384
Owen Taylor3473f882001-02-23 17:55:21 +00003385 GROW;
3386 c = CUR_CHAR(l);
3387
Daniel Veillard34e3f642008-07-29 09:02:27 +00003388 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003389 if (count++ > 100) {
3390 count = 0;
3391 GROW;
3392 }
3393 COPY_BUF(l,buf,len,c);
3394 NEXTL(l);
3395 c = CUR_CHAR(l);
3396 if (len >= XML_MAX_NAMELEN) {
3397 /*
3398 * Okay someone managed to make a huge token, so he's ready to pay
3399 * for the processing speed.
3400 */
3401 xmlChar *buffer;
3402 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003403
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003404 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003405 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003406 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003407 return(NULL);
3408 }
3409 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003410 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003411 if (count++ > 100) {
3412 count = 0;
3413 GROW;
3414 }
3415 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003416 xmlChar *tmp;
3417
Owen Taylor3473f882001-02-23 17:55:21 +00003418 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003419 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003420 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003421 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003422 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003423 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003424 return(NULL);
3425 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003426 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003427 }
3428 COPY_BUF(l,buffer,len,c);
3429 NEXTL(l);
3430 c = CUR_CHAR(l);
3431 }
3432 buffer[len] = 0;
3433 return(buffer);
3434 }
3435 }
3436 if (len == 0)
3437 return(NULL);
3438 return(xmlStrndup(buf, len));
3439}
3440
3441/**
3442 * xmlParseEntityValue:
3443 * @ctxt: an XML parser context
3444 * @orig: if non-NULL store a copy of the original entity value
3445 *
3446 * parse a value for ENTITY declarations
3447 *
3448 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3449 * "'" ([^%&'] | PEReference | Reference)* "'"
3450 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003451 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003452 */
3453
3454xmlChar *
3455xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3456 xmlChar *buf = NULL;
3457 int len = 0;
3458 int size = XML_PARSER_BUFFER_SIZE;
3459 int c, l;
3460 xmlChar stop;
3461 xmlChar *ret = NULL;
3462 const xmlChar *cur = NULL;
3463 xmlParserInputPtr input;
3464
3465 if (RAW == '"') stop = '"';
3466 else if (RAW == '\'') stop = '\'';
3467 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003468 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003469 return(NULL);
3470 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003471 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003472 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003473 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003474 return(NULL);
3475 }
3476
3477 /*
3478 * The content of the entity definition is copied in a buffer.
3479 */
3480
3481 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3482 input = ctxt->input;
3483 GROW;
3484 NEXT;
3485 c = CUR_CHAR(l);
3486 /*
3487 * NOTE: 4.4.5 Included in Literal
3488 * When a parameter entity reference appears in a literal entity
3489 * value, ... a single or double quote character in the replacement
3490 * text is always treated as a normal data character and will not
3491 * terminate the literal.
3492 * In practice it means we stop the loop only when back at parsing
3493 * the initial entity and the quote is found
3494 */
William M. Brack871611b2003-10-18 04:53:14 +00003495 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003496 (ctxt->input != input))) {
3497 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003498 xmlChar *tmp;
3499
Owen Taylor3473f882001-02-23 17:55:21 +00003500 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003501 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3502 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003503 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003504 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003505 return(NULL);
3506 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003507 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003508 }
3509 COPY_BUF(l,buf,len,c);
3510 NEXTL(l);
3511 /*
3512 * Pop-up of finished entities.
3513 */
3514 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3515 xmlPopInput(ctxt);
3516
3517 GROW;
3518 c = CUR_CHAR(l);
3519 if (c == 0) {
3520 GROW;
3521 c = CUR_CHAR(l);
3522 }
3523 }
3524 buf[len] = 0;
3525
3526 /*
3527 * Raise problem w.r.t. '&' and '%' being used in non-entities
3528 * reference constructs. Note Charref will be handled in
3529 * xmlStringDecodeEntities()
3530 */
3531 cur = buf;
3532 while (*cur != 0) { /* non input consuming */
3533 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3534 xmlChar *name;
3535 xmlChar tmp = *cur;
3536
3537 cur++;
3538 name = xmlParseStringName(ctxt, &cur);
3539 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003540 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003541 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003542 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003543 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003544 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3545 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003546 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003547 }
3548 if (name != NULL)
3549 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003550 if (*cur == 0)
3551 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003552 }
3553 cur++;
3554 }
3555
3556 /*
3557 * Then PEReference entities are substituted.
3558 */
3559 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003560 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003561 xmlFree(buf);
3562 } else {
3563 NEXT;
3564 /*
3565 * NOTE: 4.4.7 Bypassed
3566 * When a general entity reference appears in the EntityValue in
3567 * an entity declaration, it is bypassed and left as is.
3568 * so XML_SUBSTITUTE_REF is not set here.
3569 */
3570 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3571 0, 0, 0);
3572 if (orig != NULL)
3573 *orig = buf;
3574 else
3575 xmlFree(buf);
3576 }
3577
3578 return(ret);
3579}
3580
3581/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003582 * xmlParseAttValueComplex:
3583 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003584 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003585 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003586 *
3587 * parse a value for an attribute, this is the fallback function
3588 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003589 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003590 *
3591 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3592 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003593static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003594xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003595 xmlChar limit = 0;
3596 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003597 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003598 int len = 0;
3599 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003600 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003601 xmlChar *current = NULL;
3602 xmlEntityPtr ent;
3603
Owen Taylor3473f882001-02-23 17:55:21 +00003604 if (NXT(0) == '"') {
3605 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3606 limit = '"';
3607 NEXT;
3608 } else if (NXT(0) == '\'') {
3609 limit = '\'';
3610 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3611 NEXT;
3612 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003613 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003614 return(NULL);
3615 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003616
Owen Taylor3473f882001-02-23 17:55:21 +00003617 /*
3618 * allocate a translation buffer.
3619 */
3620 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003621 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003622 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003623
3624 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003625 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003626 */
3627 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003628 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003629 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003630 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003631 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003632 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003633 if (NXT(1) == '#') {
3634 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003635
Owen Taylor3473f882001-02-23 17:55:21 +00003636 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003637 if (ctxt->replaceEntities) {
3638 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003639 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003640 }
3641 buf[len++] = '&';
3642 } else {
3643 /*
3644 * The reparsing will be done in xmlStringGetNodeList()
3645 * called by the attribute() function in SAX.c
3646 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003647 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003648 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003649 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003650 buf[len++] = '&';
3651 buf[len++] = '#';
3652 buf[len++] = '3';
3653 buf[len++] = '8';
3654 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003655 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003656 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003657 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003658 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003659 }
Owen Taylor3473f882001-02-23 17:55:21 +00003660 len += xmlCopyChar(0, &buf[len], val);
3661 }
3662 } else {
3663 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003664 ctxt->nbentities++;
3665 if (ent != NULL)
3666 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003667 if ((ent != NULL) &&
3668 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3669 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003670 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003671 }
3672 if ((ctxt->replaceEntities == 0) &&
3673 (ent->content[0] == '&')) {
3674 buf[len++] = '&';
3675 buf[len++] = '#';
3676 buf[len++] = '3';
3677 buf[len++] = '8';
3678 buf[len++] = ';';
3679 } else {
3680 buf[len++] = ent->content[0];
3681 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003682 } else if ((ent != NULL) &&
3683 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003684 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3685 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003686 XML_SUBSTITUTE_REF,
3687 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003688 if (rep != NULL) {
3689 current = rep;
3690 while (*current != 0) { /* non input consuming */
3691 buf[len++] = *current++;
3692 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003693 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003694 }
3695 }
3696 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003697 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003698 }
3699 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003700 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003701 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003702 }
Owen Taylor3473f882001-02-23 17:55:21 +00003703 if (ent->content != NULL)
3704 buf[len++] = ent->content[0];
3705 }
3706 } else if (ent != NULL) {
3707 int i = xmlStrlen(ent->name);
3708 const xmlChar *cur = ent->name;
3709
3710 /*
3711 * This may look absurd but is needed to detect
3712 * entities problems
3713 */
3714 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3715 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003716 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003717 XML_SUBSTITUTE_REF, 0, 0, 0);
3718 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003719 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003720 rep = NULL;
3721 }
Owen Taylor3473f882001-02-23 17:55:21 +00003722 }
3723
3724 /*
3725 * Just output the reference
3726 */
3727 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003728 while (len > buf_size - i - 10) {
3729 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003730 }
3731 for (;i > 0;i--)
3732 buf[len++] = *cur++;
3733 buf[len++] = ';';
3734 }
3735 }
3736 } else {
3737 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003738 if ((len != 0) || (!normalize)) {
3739 if ((!normalize) || (!in_space)) {
3740 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003741 while (len > buf_size - 10) {
3742 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003743 }
3744 }
3745 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003746 }
3747 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003748 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003749 COPY_BUF(l,buf,len,c);
3750 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003751 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003752 }
3753 }
3754 NEXTL(l);
3755 }
3756 GROW;
3757 c = CUR_CHAR(l);
3758 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003759 if ((in_space) && (normalize)) {
3760 while (buf[len - 1] == 0x20) len--;
3761 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003762 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003763 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003764 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003765 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003766 if ((c != 0) && (!IS_CHAR(c))) {
3767 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3768 "invalid character in attribute value\n");
3769 } else {
3770 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3771 "AttValue: ' expected\n");
3772 }
Owen Taylor3473f882001-02-23 17:55:21 +00003773 } else
3774 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003775 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003776 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003777
3778mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003779 xmlErrMemory(ctxt, NULL);
Daniel Veillard8915c152008-08-26 13:05:34 +00003780int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00003781 if (buf != NULL)
3782 xmlFree(buf);
3783 if (rep != NULL)
3784 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003785 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003786}
3787
3788/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003789 * xmlParseAttValue:
3790 * @ctxt: an XML parser context
3791 *
3792 * parse a value for an attribute
3793 * Note: the parser won't do substitution of entities here, this
3794 * will be handled later in xmlStringGetNodeList
3795 *
3796 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3797 * "'" ([^<&'] | Reference)* "'"
3798 *
3799 * 3.3.3 Attribute-Value Normalization:
3800 * Before the value of an attribute is passed to the application or
3801 * checked for validity, the XML processor must normalize it as follows:
3802 * - a character reference is processed by appending the referenced
3803 * character to the attribute value
3804 * - an entity reference is processed by recursively processing the
3805 * replacement text of the entity
3806 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3807 * appending #x20 to the normalized value, except that only a single
3808 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3809 * parsed entity or the literal entity value of an internal parsed entity
3810 * - other characters are processed by appending them to the normalized value
3811 * If the declared value is not CDATA, then the XML processor must further
3812 * process the normalized attribute value by discarding any leading and
3813 * trailing space (#x20) characters, and by replacing sequences of space
3814 * (#x20) characters by a single space (#x20) character.
3815 * All attributes for which no declaration has been read should be treated
3816 * by a non-validating parser as if declared CDATA.
3817 *
3818 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3819 */
3820
3821
3822xmlChar *
3823xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003824 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003825 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003826}
3827
3828/**
Owen Taylor3473f882001-02-23 17:55:21 +00003829 * xmlParseSystemLiteral:
3830 * @ctxt: an XML parser context
3831 *
3832 * parse an XML Literal
3833 *
3834 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3835 *
3836 * Returns the SystemLiteral parsed or NULL
3837 */
3838
3839xmlChar *
3840xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3841 xmlChar *buf = NULL;
3842 int len = 0;
3843 int size = XML_PARSER_BUFFER_SIZE;
3844 int cur, l;
3845 xmlChar stop;
3846 int state = ctxt->instate;
3847 int count = 0;
3848
3849 SHRINK;
3850 if (RAW == '"') {
3851 NEXT;
3852 stop = '"';
3853 } else if (RAW == '\'') {
3854 NEXT;
3855 stop = '\'';
3856 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003857 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003858 return(NULL);
3859 }
3860
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003861 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003862 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003863 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003864 return(NULL);
3865 }
3866 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3867 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003868 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003869 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003870 xmlChar *tmp;
3871
Owen Taylor3473f882001-02-23 17:55:21 +00003872 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003873 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3874 if (tmp == NULL) {
3875 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003876 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003877 ctxt->instate = (xmlParserInputState) state;
3878 return(NULL);
3879 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003880 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003881 }
3882 count++;
3883 if (count > 50) {
3884 GROW;
3885 count = 0;
3886 }
3887 COPY_BUF(l,buf,len,cur);
3888 NEXTL(l);
3889 cur = CUR_CHAR(l);
3890 if (cur == 0) {
3891 GROW;
3892 SHRINK;
3893 cur = CUR_CHAR(l);
3894 }
3895 }
3896 buf[len] = 0;
3897 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003898 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003899 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003900 } else {
3901 NEXT;
3902 }
3903 return(buf);
3904}
3905
3906/**
3907 * xmlParsePubidLiteral:
3908 * @ctxt: an XML parser context
3909 *
3910 * parse an XML public literal
3911 *
3912 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3913 *
3914 * Returns the PubidLiteral parsed or NULL.
3915 */
3916
3917xmlChar *
3918xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3919 xmlChar *buf = NULL;
3920 int len = 0;
3921 int size = XML_PARSER_BUFFER_SIZE;
3922 xmlChar cur;
3923 xmlChar stop;
3924 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003925 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003926
3927 SHRINK;
3928 if (RAW == '"') {
3929 NEXT;
3930 stop = '"';
3931 } else if (RAW == '\'') {
3932 NEXT;
3933 stop = '\'';
3934 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003935 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003936 return(NULL);
3937 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003938 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003939 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003940 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003941 return(NULL);
3942 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003943 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003944 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003945 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003946 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003947 xmlChar *tmp;
3948
Owen Taylor3473f882001-02-23 17:55:21 +00003949 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003950 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3951 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003952 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003953 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003954 return(NULL);
3955 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003956 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003957 }
3958 buf[len++] = cur;
3959 count++;
3960 if (count > 50) {
3961 GROW;
3962 count = 0;
3963 }
3964 NEXT;
3965 cur = CUR;
3966 if (cur == 0) {
3967 GROW;
3968 SHRINK;
3969 cur = CUR;
3970 }
3971 }
3972 buf[len] = 0;
3973 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003974 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003975 } else {
3976 NEXT;
3977 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003978 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003979 return(buf);
3980}
3981
Daniel Veillard48b2f892001-02-25 16:11:03 +00003982void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003983
3984/*
3985 * used for the test in the inner loop of the char data testing
3986 */
3987static const unsigned char test_char_data[256] = {
3988 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3989 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3990 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3991 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3992 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3993 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3994 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3995 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3996 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3997 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3998 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3999 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4000 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4001 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4002 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4003 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4004 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4005 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4006 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4007 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4008 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4009 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4010 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4011 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4012 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4013 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4014 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4015 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4016 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4017 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4018 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4019 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4020};
4021
Owen Taylor3473f882001-02-23 17:55:21 +00004022/**
4023 * xmlParseCharData:
4024 * @ctxt: an XML parser context
4025 * @cdata: int indicating whether we are within a CDATA section
4026 *
4027 * parse a CharData section.
4028 * if we are within a CDATA section ']]>' marks an end of section.
4029 *
4030 * The right angle bracket (>) may be represented using the string "&gt;",
4031 * and must, for compatibility, be escaped using "&gt;" or a character
4032 * reference when it appears in the string "]]>" in content, when that
4033 * string is not marking the end of a CDATA section.
4034 *
4035 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4036 */
4037
4038void
4039xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004040 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004041 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004042 int line = ctxt->input->line;
4043 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004044 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004045
4046 SHRINK;
4047 GROW;
4048 /*
4049 * Accelerated common case where input don't need to be
4050 * modified before passing it to the handler.
4051 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004052 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004053 in = ctxt->input->cur;
4054 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004055get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004056 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004057 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004058 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004059 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004060 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004061 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004062 goto get_more_space;
4063 }
4064 if (*in == '<') {
4065 nbchar = in - ctxt->input->cur;
4066 if (nbchar > 0) {
4067 const xmlChar *tmp = ctxt->input->cur;
4068 ctxt->input->cur = in;
4069
Daniel Veillard34099b42004-11-04 17:34:35 +00004070 if ((ctxt->sax != NULL) &&
4071 (ctxt->sax->ignorableWhitespace !=
4072 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004073 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004074 if (ctxt->sax->ignorableWhitespace != NULL)
4075 ctxt->sax->ignorableWhitespace(ctxt->userData,
4076 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004077 } else {
4078 if (ctxt->sax->characters != NULL)
4079 ctxt->sax->characters(ctxt->userData,
4080 tmp, nbchar);
4081 if (*ctxt->space == -1)
4082 *ctxt->space = -2;
4083 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004084 } else if ((ctxt->sax != NULL) &&
4085 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004086 ctxt->sax->characters(ctxt->userData,
4087 tmp, nbchar);
4088 }
4089 }
4090 return;
4091 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004092
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004093get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004094 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004095 while (test_char_data[*in]) {
4096 in++;
4097 ccol++;
4098 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004099 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004100 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004101 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004102 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004103 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004104 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004105 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004106 }
4107 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004108 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004109 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004110 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004111 return;
4112 }
4113 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004114 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004115 goto get_more;
4116 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004117 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004118 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004119 if ((ctxt->sax != NULL) &&
4120 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004121 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004122 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004123 const xmlChar *tmp = ctxt->input->cur;
4124 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004125
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004126 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004127 if (ctxt->sax->ignorableWhitespace != NULL)
4128 ctxt->sax->ignorableWhitespace(ctxt->userData,
4129 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004130 } else {
4131 if (ctxt->sax->characters != NULL)
4132 ctxt->sax->characters(ctxt->userData,
4133 tmp, nbchar);
4134 if (*ctxt->space == -1)
4135 *ctxt->space = -2;
4136 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004137 line = ctxt->input->line;
4138 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004139 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004140 if (ctxt->sax->characters != NULL)
4141 ctxt->sax->characters(ctxt->userData,
4142 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004143 line = ctxt->input->line;
4144 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004145 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004146 }
4147 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004148 if (*in == 0xD) {
4149 in++;
4150 if (*in == 0xA) {
4151 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004152 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004153 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004154 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004155 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004156 in--;
4157 }
4158 if (*in == '<') {
4159 return;
4160 }
4161 if (*in == '&') {
4162 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004163 }
4164 SHRINK;
4165 GROW;
4166 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004167 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004168 nbchar = 0;
4169 }
Daniel Veillard50582112001-03-26 22:52:16 +00004170 ctxt->input->line = line;
4171 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004172 xmlParseCharDataComplex(ctxt, cdata);
4173}
4174
Daniel Veillard01c13b52002-12-10 15:19:08 +00004175/**
4176 * xmlParseCharDataComplex:
4177 * @ctxt: an XML parser context
4178 * @cdata: int indicating whether we are within a CDATA section
4179 *
4180 * parse a CharData section.this is the fallback function
4181 * of xmlParseCharData() when the parsing requires handling
4182 * of non-ASCII characters.
4183 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004184void
4185xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004186 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4187 int nbchar = 0;
4188 int cur, l;
4189 int count = 0;
4190
4191 SHRINK;
4192 GROW;
4193 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004194 while ((cur != '<') && /* checked */
4195 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004196 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004197 if ((cur == ']') && (NXT(1) == ']') &&
4198 (NXT(2) == '>')) {
4199 if (cdata) break;
4200 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004201 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004202 }
4203 }
4204 COPY_BUF(l,buf,nbchar,cur);
4205 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004206 buf[nbchar] = 0;
4207
Owen Taylor3473f882001-02-23 17:55:21 +00004208 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004209 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004210 */
4211 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004212 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004213 if (ctxt->sax->ignorableWhitespace != NULL)
4214 ctxt->sax->ignorableWhitespace(ctxt->userData,
4215 buf, nbchar);
4216 } else {
4217 if (ctxt->sax->characters != NULL)
4218 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004219 if ((ctxt->sax->characters !=
4220 ctxt->sax->ignorableWhitespace) &&
4221 (*ctxt->space == -1))
4222 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004223 }
4224 }
4225 nbchar = 0;
4226 }
4227 count++;
4228 if (count > 50) {
4229 GROW;
4230 count = 0;
4231 }
4232 NEXTL(l);
4233 cur = CUR_CHAR(l);
4234 }
4235 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004236 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004237 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004238 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004239 */
4240 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004241 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004242 if (ctxt->sax->ignorableWhitespace != NULL)
4243 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4244 } else {
4245 if (ctxt->sax->characters != NULL)
4246 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004247 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4248 (*ctxt->space == -1))
4249 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004250 }
4251 }
4252 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004253 if ((cur != 0) && (!IS_CHAR(cur))) {
4254 /* Generate the error and skip the offending character */
4255 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4256 "PCDATA invalid Char value %d\n",
4257 cur);
4258 NEXTL(l);
4259 }
Owen Taylor3473f882001-02-23 17:55:21 +00004260}
4261
4262/**
4263 * xmlParseExternalID:
4264 * @ctxt: an XML parser context
4265 * @publicID: a xmlChar** receiving PubidLiteral
4266 * @strict: indicate whether we should restrict parsing to only
4267 * production [75], see NOTE below
4268 *
4269 * Parse an External ID or a Public ID
4270 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004271 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004272 * 'PUBLIC' S PubidLiteral S SystemLiteral
4273 *
4274 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4275 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4276 *
4277 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4278 *
4279 * Returns the function returns SystemLiteral and in the second
4280 * case publicID receives PubidLiteral, is strict is off
4281 * it is possible to return NULL and have publicID set.
4282 */
4283
4284xmlChar *
4285xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4286 xmlChar *URI = NULL;
4287
4288 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004289
4290 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004291 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004292 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004293 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004294 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4295 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004296 }
4297 SKIP_BLANKS;
4298 URI = xmlParseSystemLiteral(ctxt);
4299 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004300 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004301 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004302 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004303 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004304 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004305 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004306 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004307 }
4308 SKIP_BLANKS;
4309 *publicID = xmlParsePubidLiteral(ctxt);
4310 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004311 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004312 }
4313 if (strict) {
4314 /*
4315 * We don't handle [83] so "S SystemLiteral" is required.
4316 */
William M. Brack76e95df2003-10-18 16:20:14 +00004317 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004318 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004319 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004320 }
4321 } else {
4322 /*
4323 * We handle [83] so we return immediately, if
4324 * "S SystemLiteral" is not detected. From a purely parsing
4325 * point of view that's a nice mess.
4326 */
4327 const xmlChar *ptr;
4328 GROW;
4329
4330 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004331 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004332
William M. Brack76e95df2003-10-18 16:20:14 +00004333 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004334 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4335 }
4336 SKIP_BLANKS;
4337 URI = xmlParseSystemLiteral(ctxt);
4338 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004339 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004340 }
4341 }
4342 return(URI);
4343}
4344
4345/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004346 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004347 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004348 * @buf: the already parsed part of the buffer
4349 * @len: number of bytes filles in the buffer
4350 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004351 *
4352 * Skip an XML (SGML) comment <!-- .... -->
4353 * The spec says that "For compatibility, the string "--" (double-hyphen)
4354 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004355 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004356 *
4357 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4358 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004359static void
4360xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004361 int q, ql;
4362 int r, rl;
4363 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004364 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004365 int inputid;
4366
4367 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004368
Owen Taylor3473f882001-02-23 17:55:21 +00004369 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004370 len = 0;
4371 size = XML_PARSER_BUFFER_SIZE;
4372 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4373 if (buf == NULL) {
4374 xmlErrMemory(ctxt, NULL);
4375 return;
4376 }
Owen Taylor3473f882001-02-23 17:55:21 +00004377 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004378 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004379 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004380 if (q == 0)
4381 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004382 if (!IS_CHAR(q)) {
4383 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4384 "xmlParseComment: invalid xmlChar value %d\n",
4385 q);
4386 xmlFree (buf);
4387 return;
4388 }
Owen Taylor3473f882001-02-23 17:55:21 +00004389 NEXTL(ql);
4390 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004391 if (r == 0)
4392 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004393 if (!IS_CHAR(r)) {
4394 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4395 "xmlParseComment: invalid xmlChar value %d\n",
4396 q);
4397 xmlFree (buf);
4398 return;
4399 }
Owen Taylor3473f882001-02-23 17:55:21 +00004400 NEXTL(rl);
4401 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004402 if (cur == 0)
4403 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004404 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004405 ((cur != '>') ||
4406 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004407 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004408 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004409 }
4410 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004411 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004412 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004413 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4414 if (new_buf == NULL) {
4415 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004416 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004417 return;
4418 }
William M. Bracka3215c72004-07-31 16:24:01 +00004419 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004420 }
4421 COPY_BUF(ql,buf,len,q);
4422 q = r;
4423 ql = rl;
4424 r = cur;
4425 rl = l;
4426
4427 count++;
4428 if (count > 50) {
4429 GROW;
4430 count = 0;
4431 }
4432 NEXTL(l);
4433 cur = CUR_CHAR(l);
4434 if (cur == 0) {
4435 SHRINK;
4436 GROW;
4437 cur = CUR_CHAR(l);
4438 }
4439 }
4440 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004441 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004442 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004443 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004444 } else if (!IS_CHAR(cur)) {
4445 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4446 "xmlParseComment: invalid xmlChar value %d\n",
4447 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004448 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004449 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004450 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4451 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004452 }
4453 NEXT;
4454 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4455 (!ctxt->disableSAX))
4456 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004457 }
Daniel Veillardda629342007-08-01 07:49:06 +00004458 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004459 return;
4460not_terminated:
4461 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4462 "Comment not terminated\n", NULL);
4463 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004464 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004465}
Daniel Veillardda629342007-08-01 07:49:06 +00004466
Daniel Veillard4c778d82005-01-23 17:37:44 +00004467/**
4468 * xmlParseComment:
4469 * @ctxt: an XML parser context
4470 *
4471 * Skip an XML (SGML) comment <!-- .... -->
4472 * The spec says that "For compatibility, the string "--" (double-hyphen)
4473 * must not occur within comments. "
4474 *
4475 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4476 */
4477void
4478xmlParseComment(xmlParserCtxtPtr ctxt) {
4479 xmlChar *buf = NULL;
4480 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004481 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004482 xmlParserInputState state;
4483 const xmlChar *in;
4484 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004485 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004486
4487 /*
4488 * Check that there is a comment right here.
4489 */
4490 if ((RAW != '<') || (NXT(1) != '!') ||
4491 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004492 state = ctxt->instate;
4493 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004494 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004495 SKIP(4);
4496 SHRINK;
4497 GROW;
4498
4499 /*
4500 * Accelerated common case where input don't need to be
4501 * modified before passing it to the handler.
4502 */
4503 in = ctxt->input->cur;
4504 do {
4505 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004506 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004507 ctxt->input->line++; ctxt->input->col = 1;
4508 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004509 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004510 }
4511get_more:
4512 ccol = ctxt->input->col;
4513 while (((*in > '-') && (*in <= 0x7F)) ||
4514 ((*in >= 0x20) && (*in < '-')) ||
4515 (*in == 0x09)) {
4516 in++;
4517 ccol++;
4518 }
4519 ctxt->input->col = ccol;
4520 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004521 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004522 ctxt->input->line++; ctxt->input->col = 1;
4523 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004524 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004525 goto get_more;
4526 }
4527 nbchar = in - ctxt->input->cur;
4528 /*
4529 * save current set of data
4530 */
4531 if (nbchar > 0) {
4532 if ((ctxt->sax != NULL) &&
4533 (ctxt->sax->comment != NULL)) {
4534 if (buf == NULL) {
4535 if ((*in == '-') && (in[1] == '-'))
4536 size = nbchar + 1;
4537 else
4538 size = XML_PARSER_BUFFER_SIZE + nbchar;
4539 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4540 if (buf == NULL) {
4541 xmlErrMemory(ctxt, NULL);
4542 ctxt->instate = state;
4543 return;
4544 }
4545 len = 0;
4546 } else if (len + nbchar + 1 >= size) {
4547 xmlChar *new_buf;
4548 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4549 new_buf = (xmlChar *) xmlRealloc(buf,
4550 size * sizeof(xmlChar));
4551 if (new_buf == NULL) {
4552 xmlFree (buf);
4553 xmlErrMemory(ctxt, NULL);
4554 ctxt->instate = state;
4555 return;
4556 }
4557 buf = new_buf;
4558 }
4559 memcpy(&buf[len], ctxt->input->cur, nbchar);
4560 len += nbchar;
4561 buf[len] = 0;
4562 }
4563 }
4564 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004565 if (*in == 0xA) {
4566 in++;
4567 ctxt->input->line++; ctxt->input->col = 1;
4568 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004569 if (*in == 0xD) {
4570 in++;
4571 if (*in == 0xA) {
4572 ctxt->input->cur = in;
4573 in++;
4574 ctxt->input->line++; ctxt->input->col = 1;
4575 continue; /* while */
4576 }
4577 in--;
4578 }
4579 SHRINK;
4580 GROW;
4581 in = ctxt->input->cur;
4582 if (*in == '-') {
4583 if (in[1] == '-') {
4584 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004585 if (ctxt->input->id != inputid) {
4586 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4587 "comment doesn't start and stop in the same entity\n");
4588 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004589 SKIP(3);
4590 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4591 (!ctxt->disableSAX)) {
4592 if (buf != NULL)
4593 ctxt->sax->comment(ctxt->userData, buf);
4594 else
4595 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4596 }
4597 if (buf != NULL)
4598 xmlFree(buf);
4599 ctxt->instate = state;
4600 return;
4601 }
4602 if (buf != NULL)
4603 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4604 "Comment not terminated \n<!--%.50s\n",
4605 buf);
4606 else
4607 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4608 "Comment not terminated \n", NULL);
4609 in++;
4610 ctxt->input->col++;
4611 }
4612 in++;
4613 ctxt->input->col++;
4614 goto get_more;
4615 }
4616 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4617 xmlParseCommentComplex(ctxt, buf, len, size);
4618 ctxt->instate = state;
4619 return;
4620}
4621
Owen Taylor3473f882001-02-23 17:55:21 +00004622
4623/**
4624 * xmlParsePITarget:
4625 * @ctxt: an XML parser context
4626 *
4627 * parse the name of a PI
4628 *
4629 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4630 *
4631 * Returns the PITarget name or NULL
4632 */
4633
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004634const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004635xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004636 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004637
4638 name = xmlParseName(ctxt);
4639 if ((name != NULL) &&
4640 ((name[0] == 'x') || (name[0] == 'X')) &&
4641 ((name[1] == 'm') || (name[1] == 'M')) &&
4642 ((name[2] == 'l') || (name[2] == 'L'))) {
4643 int i;
4644 if ((name[0] == 'x') && (name[1] == 'm') &&
4645 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004646 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004647 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004648 return(name);
4649 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004650 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004651 return(name);
4652 }
4653 for (i = 0;;i++) {
4654 if (xmlW3CPIs[i] == NULL) break;
4655 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4656 return(name);
4657 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004658 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4659 "xmlParsePITarget: invalid name prefix 'xml'\n",
4660 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004661 }
Daniel Veillard37334572008-07-31 08:20:02 +00004662 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4663 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4664 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4665 }
Owen Taylor3473f882001-02-23 17:55:21 +00004666 return(name);
4667}
4668
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004669#ifdef LIBXML_CATALOG_ENABLED
4670/**
4671 * xmlParseCatalogPI:
4672 * @ctxt: an XML parser context
4673 * @catalog: the PI value string
4674 *
4675 * parse an XML Catalog Processing Instruction.
4676 *
4677 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4678 *
4679 * Occurs only if allowed by the user and if happening in the Misc
4680 * part of the document before any doctype informations
4681 * This will add the given catalog to the parsing context in order
4682 * to be used if there is a resolution need further down in the document
4683 */
4684
4685static void
4686xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4687 xmlChar *URL = NULL;
4688 const xmlChar *tmp, *base;
4689 xmlChar marker;
4690
4691 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004692 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004693 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4694 goto error;
4695 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004696 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004697 if (*tmp != '=') {
4698 return;
4699 }
4700 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004701 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004702 marker = *tmp;
4703 if ((marker != '\'') && (marker != '"'))
4704 goto error;
4705 tmp++;
4706 base = tmp;
4707 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4708 if (*tmp == 0)
4709 goto error;
4710 URL = xmlStrndup(base, tmp - base);
4711 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004712 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004713 if (*tmp != 0)
4714 goto error;
4715
4716 if (URL != NULL) {
4717 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4718 xmlFree(URL);
4719 }
4720 return;
4721
4722error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004723 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4724 "Catalog PI syntax error: %s\n",
4725 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004726 if (URL != NULL)
4727 xmlFree(URL);
4728}
4729#endif
4730
Owen Taylor3473f882001-02-23 17:55:21 +00004731/**
4732 * xmlParsePI:
4733 * @ctxt: an XML parser context
4734 *
4735 * parse an XML Processing Instruction.
4736 *
4737 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4738 *
4739 * The processing is transfered to SAX once parsed.
4740 */
4741
4742void
4743xmlParsePI(xmlParserCtxtPtr ctxt) {
4744 xmlChar *buf = NULL;
4745 int len = 0;
4746 int size = XML_PARSER_BUFFER_SIZE;
4747 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004748 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004749 xmlParserInputState state;
4750 int count = 0;
4751
4752 if ((RAW == '<') && (NXT(1) == '?')) {
4753 xmlParserInputPtr input = ctxt->input;
4754 state = ctxt->instate;
4755 ctxt->instate = XML_PARSER_PI;
4756 /*
4757 * this is a Processing Instruction.
4758 */
4759 SKIP(2);
4760 SHRINK;
4761
4762 /*
4763 * Parse the target name and check for special support like
4764 * namespace.
4765 */
4766 target = xmlParsePITarget(ctxt);
4767 if (target != NULL) {
4768 if ((RAW == '?') && (NXT(1) == '>')) {
4769 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004770 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4771 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004772 }
4773 SKIP(2);
4774
4775 /*
4776 * SAX: PI detected.
4777 */
4778 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4779 (ctxt->sax->processingInstruction != NULL))
4780 ctxt->sax->processingInstruction(ctxt->userData,
4781 target, NULL);
4782 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004783 return;
4784 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004785 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004786 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004787 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004788 ctxt->instate = state;
4789 return;
4790 }
4791 cur = CUR;
4792 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004793 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4794 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004795 }
4796 SKIP_BLANKS;
4797 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004798 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004799 ((cur != '?') || (NXT(1) != '>'))) {
4800 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004801 xmlChar *tmp;
4802
Owen Taylor3473f882001-02-23 17:55:21 +00004803 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004804 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4805 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004806 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004807 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004808 ctxt->instate = state;
4809 return;
4810 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004811 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004812 }
4813 count++;
4814 if (count > 50) {
4815 GROW;
4816 count = 0;
4817 }
4818 COPY_BUF(l,buf,len,cur);
4819 NEXTL(l);
4820 cur = CUR_CHAR(l);
4821 if (cur == 0) {
4822 SHRINK;
4823 GROW;
4824 cur = CUR_CHAR(l);
4825 }
4826 }
4827 buf[len] = 0;
4828 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004829 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4830 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004831 } else {
4832 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004833 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4834 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004835 }
4836 SKIP(2);
4837
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004838#ifdef LIBXML_CATALOG_ENABLED
4839 if (((state == XML_PARSER_MISC) ||
4840 (state == XML_PARSER_START)) &&
4841 (xmlStrEqual(target, XML_CATALOG_PI))) {
4842 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4843 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4844 (allow == XML_CATA_ALLOW_ALL))
4845 xmlParseCatalogPI(ctxt, buf);
4846 }
4847#endif
4848
4849
Owen Taylor3473f882001-02-23 17:55:21 +00004850 /*
4851 * SAX: PI detected.
4852 */
4853 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4854 (ctxt->sax->processingInstruction != NULL))
4855 ctxt->sax->processingInstruction(ctxt->userData,
4856 target, buf);
4857 }
4858 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004859 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004860 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004861 }
4862 ctxt->instate = state;
4863 }
4864}
4865
4866/**
4867 * xmlParseNotationDecl:
4868 * @ctxt: an XML parser context
4869 *
4870 * parse a notation declaration
4871 *
4872 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4873 *
4874 * Hence there is actually 3 choices:
4875 * 'PUBLIC' S PubidLiteral
4876 * 'PUBLIC' S PubidLiteral S SystemLiteral
4877 * and 'SYSTEM' S SystemLiteral
4878 *
4879 * See the NOTE on xmlParseExternalID().
4880 */
4881
4882void
4883xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004884 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004885 xmlChar *Pubid;
4886 xmlChar *Systemid;
4887
Daniel Veillarda07050d2003-10-19 14:46:32 +00004888 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004889 xmlParserInputPtr input = ctxt->input;
4890 SHRINK;
4891 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004892 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004893 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4894 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004895 return;
4896 }
4897 SKIP_BLANKS;
4898
Daniel Veillard76d66f42001-05-16 21:05:17 +00004899 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004900 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004901 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004902 return;
4903 }
William M. Brack76e95df2003-10-18 16:20:14 +00004904 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004905 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004906 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004907 return;
4908 }
Daniel Veillard37334572008-07-31 08:20:02 +00004909 if (xmlStrchr(name, ':') != NULL) {
4910 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4911 "colon are forbidden from notation names '%s'\n",
4912 name, NULL, NULL);
4913 }
Owen Taylor3473f882001-02-23 17:55:21 +00004914 SKIP_BLANKS;
4915
4916 /*
4917 * Parse the IDs.
4918 */
4919 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4920 SKIP_BLANKS;
4921
4922 if (RAW == '>') {
4923 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004924 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4925 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004926 }
4927 NEXT;
4928 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4929 (ctxt->sax->notationDecl != NULL))
4930 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4931 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004932 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004933 }
Owen Taylor3473f882001-02-23 17:55:21 +00004934 if (Systemid != NULL) xmlFree(Systemid);
4935 if (Pubid != NULL) xmlFree(Pubid);
4936 }
4937}
4938
4939/**
4940 * xmlParseEntityDecl:
4941 * @ctxt: an XML parser context
4942 *
4943 * parse <!ENTITY declarations
4944 *
4945 * [70] EntityDecl ::= GEDecl | PEDecl
4946 *
4947 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4948 *
4949 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4950 *
4951 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4952 *
4953 * [74] PEDef ::= EntityValue | ExternalID
4954 *
4955 * [76] NDataDecl ::= S 'NDATA' S Name
4956 *
4957 * [ VC: Notation Declared ]
4958 * The Name must match the declared name of a notation.
4959 */
4960
4961void
4962xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004963 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004964 xmlChar *value = NULL;
4965 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004966 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004967 int isParameter = 0;
4968 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004969 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004970
Daniel Veillard4c778d82005-01-23 17:37:44 +00004971 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004972 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004973 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004974 SHRINK;
4975 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004976 skipped = SKIP_BLANKS;
4977 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004978 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4979 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004980 }
Owen Taylor3473f882001-02-23 17:55:21 +00004981
4982 if (RAW == '%') {
4983 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004984 skipped = SKIP_BLANKS;
4985 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004986 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4987 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004988 }
Owen Taylor3473f882001-02-23 17:55:21 +00004989 isParameter = 1;
4990 }
4991
Daniel Veillard76d66f42001-05-16 21:05:17 +00004992 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004993 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004994 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4995 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004996 return;
4997 }
Daniel Veillard37334572008-07-31 08:20:02 +00004998 if (xmlStrchr(name, ':') != NULL) {
4999 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5000 "colon are forbidden from entities names '%s'\n",
5001 name, NULL, NULL);
5002 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005003 skipped = SKIP_BLANKS;
5004 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005005 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5006 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005007 }
Owen Taylor3473f882001-02-23 17:55:21 +00005008
Daniel Veillardf5582f12002-06-11 10:08:16 +00005009 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005010 /*
5011 * handle the various case of definitions...
5012 */
5013 if (isParameter) {
5014 if ((RAW == '"') || (RAW == '\'')) {
5015 value = xmlParseEntityValue(ctxt, &orig);
5016 if (value) {
5017 if ((ctxt->sax != NULL) &&
5018 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5019 ctxt->sax->entityDecl(ctxt->userData, name,
5020 XML_INTERNAL_PARAMETER_ENTITY,
5021 NULL, NULL, value);
5022 }
5023 } else {
5024 URI = xmlParseExternalID(ctxt, &literal, 1);
5025 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005026 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005027 }
5028 if (URI) {
5029 xmlURIPtr uri;
5030
5031 uri = xmlParseURI((const char *) URI);
5032 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005033 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5034 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005035 /*
5036 * This really ought to be a well formedness error
5037 * but the XML Core WG decided otherwise c.f. issue
5038 * E26 of the XML erratas.
5039 */
Owen Taylor3473f882001-02-23 17:55:21 +00005040 } else {
5041 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005042 /*
5043 * Okay this is foolish to block those but not
5044 * invalid URIs.
5045 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005046 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005047 } else {
5048 if ((ctxt->sax != NULL) &&
5049 (!ctxt->disableSAX) &&
5050 (ctxt->sax->entityDecl != NULL))
5051 ctxt->sax->entityDecl(ctxt->userData, name,
5052 XML_EXTERNAL_PARAMETER_ENTITY,
5053 literal, URI, NULL);
5054 }
5055 xmlFreeURI(uri);
5056 }
5057 }
5058 }
5059 } else {
5060 if ((RAW == '"') || (RAW == '\'')) {
5061 value = xmlParseEntityValue(ctxt, &orig);
5062 if ((ctxt->sax != NULL) &&
5063 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5064 ctxt->sax->entityDecl(ctxt->userData, name,
5065 XML_INTERNAL_GENERAL_ENTITY,
5066 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005067 /*
5068 * For expat compatibility in SAX mode.
5069 */
5070 if ((ctxt->myDoc == NULL) ||
5071 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5072 if (ctxt->myDoc == NULL) {
5073 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005074 if (ctxt->myDoc == NULL) {
5075 xmlErrMemory(ctxt, "New Doc failed");
5076 return;
5077 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005078 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005079 }
5080 if (ctxt->myDoc->intSubset == NULL)
5081 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5082 BAD_CAST "fake", NULL, NULL);
5083
Daniel Veillard1af9a412003-08-20 22:54:39 +00005084 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5085 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005086 }
Owen Taylor3473f882001-02-23 17:55:21 +00005087 } else {
5088 URI = xmlParseExternalID(ctxt, &literal, 1);
5089 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005090 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005091 }
5092 if (URI) {
5093 xmlURIPtr uri;
5094
5095 uri = xmlParseURI((const char *)URI);
5096 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005097 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5098 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005099 /*
5100 * This really ought to be a well formedness error
5101 * but the XML Core WG decided otherwise c.f. issue
5102 * E26 of the XML erratas.
5103 */
Owen Taylor3473f882001-02-23 17:55:21 +00005104 } else {
5105 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005106 /*
5107 * Okay this is foolish to block those but not
5108 * invalid URIs.
5109 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005110 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005111 }
5112 xmlFreeURI(uri);
5113 }
5114 }
William M. Brack76e95df2003-10-18 16:20:14 +00005115 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005116 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5117 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005118 }
5119 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005120 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005121 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005122 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005123 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5124 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005125 }
5126 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005127 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005128 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5129 (ctxt->sax->unparsedEntityDecl != NULL))
5130 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5131 literal, URI, ndata);
5132 } else {
5133 if ((ctxt->sax != NULL) &&
5134 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5135 ctxt->sax->entityDecl(ctxt->userData, name,
5136 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5137 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005138 /*
5139 * For expat compatibility in SAX mode.
5140 * assuming the entity repalcement was asked for
5141 */
5142 if ((ctxt->replaceEntities != 0) &&
5143 ((ctxt->myDoc == NULL) ||
5144 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5145 if (ctxt->myDoc == NULL) {
5146 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005147 if (ctxt->myDoc == NULL) {
5148 xmlErrMemory(ctxt, "New Doc failed");
5149 return;
5150 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005151 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005152 }
5153
5154 if (ctxt->myDoc->intSubset == NULL)
5155 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5156 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005157 xmlSAX2EntityDecl(ctxt, name,
5158 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5159 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005160 }
Owen Taylor3473f882001-02-23 17:55:21 +00005161 }
5162 }
5163 }
5164 SKIP_BLANKS;
5165 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005166 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005167 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005168 } else {
5169 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005170 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5171 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005172 }
5173 NEXT;
5174 }
5175 if (orig != NULL) {
5176 /*
5177 * Ugly mechanism to save the raw entity value.
5178 */
5179 xmlEntityPtr cur = NULL;
5180
5181 if (isParameter) {
5182 if ((ctxt->sax != NULL) &&
5183 (ctxt->sax->getParameterEntity != NULL))
5184 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5185 } else {
5186 if ((ctxt->sax != NULL) &&
5187 (ctxt->sax->getEntity != NULL))
5188 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005189 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005190 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005191 }
Owen Taylor3473f882001-02-23 17:55:21 +00005192 }
5193 if (cur != NULL) {
5194 if (cur->orig != NULL)
5195 xmlFree(orig);
5196 else
5197 cur->orig = orig;
5198 } else
5199 xmlFree(orig);
5200 }
Owen Taylor3473f882001-02-23 17:55:21 +00005201 if (value != NULL) xmlFree(value);
5202 if (URI != NULL) xmlFree(URI);
5203 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005204 }
5205}
5206
5207/**
5208 * xmlParseDefaultDecl:
5209 * @ctxt: an XML parser context
5210 * @value: Receive a possible fixed default value for the attribute
5211 *
5212 * Parse an attribute default declaration
5213 *
5214 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5215 *
5216 * [ VC: Required Attribute ]
5217 * if the default declaration is the keyword #REQUIRED, then the
5218 * attribute must be specified for all elements of the type in the
5219 * attribute-list declaration.
5220 *
5221 * [ VC: Attribute Default Legal ]
5222 * The declared default value must meet the lexical constraints of
5223 * the declared attribute type c.f. xmlValidateAttributeDecl()
5224 *
5225 * [ VC: Fixed Attribute Default ]
5226 * if an attribute has a default value declared with the #FIXED
5227 * keyword, instances of that attribute must match the default value.
5228 *
5229 * [ WFC: No < in Attribute Values ]
5230 * handled in xmlParseAttValue()
5231 *
5232 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5233 * or XML_ATTRIBUTE_FIXED.
5234 */
5235
5236int
5237xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5238 int val;
5239 xmlChar *ret;
5240
5241 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005242 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005243 SKIP(9);
5244 return(XML_ATTRIBUTE_REQUIRED);
5245 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005246 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005247 SKIP(8);
5248 return(XML_ATTRIBUTE_IMPLIED);
5249 }
5250 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005251 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005252 SKIP(6);
5253 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005254 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005255 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5256 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005257 }
5258 SKIP_BLANKS;
5259 }
5260 ret = xmlParseAttValue(ctxt);
5261 ctxt->instate = XML_PARSER_DTD;
5262 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005263 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005264 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005265 } else
5266 *value = ret;
5267 return(val);
5268}
5269
5270/**
5271 * xmlParseNotationType:
5272 * @ctxt: an XML parser context
5273 *
5274 * parse an Notation attribute type.
5275 *
5276 * Note: the leading 'NOTATION' S part has already being parsed...
5277 *
5278 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5279 *
5280 * [ VC: Notation Attributes ]
5281 * Values of this type must match one of the notation names included
5282 * in the declaration; all notation names in the declaration must be declared.
5283 *
5284 * Returns: the notation attribute tree built while parsing
5285 */
5286
5287xmlEnumerationPtr
5288xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005289 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005290 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005291
5292 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005293 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005294 return(NULL);
5295 }
5296 SHRINK;
5297 do {
5298 NEXT;
5299 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005300 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005301 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005302 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5303 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005304 return(ret);
5305 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005306 tmp = ret;
5307 while (tmp != NULL) {
5308 if (xmlStrEqual(name, tmp->name)) {
5309 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5310 "standalone: attribute notation value token %s duplicated\n",
5311 name, NULL);
5312 if (!xmlDictOwns(ctxt->dict, name))
5313 xmlFree((xmlChar *) name);
5314 break;
5315 }
5316 tmp = tmp->next;
5317 }
5318 if (tmp == NULL) {
5319 cur = xmlCreateEnumeration(name);
5320 if (cur == NULL) return(ret);
5321 if (last == NULL) ret = last = cur;
5322 else {
5323 last->next = cur;
5324 last = cur;
5325 }
Owen Taylor3473f882001-02-23 17:55:21 +00005326 }
5327 SKIP_BLANKS;
5328 } while (RAW == '|');
5329 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005330 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005331 if ((last != NULL) && (last != ret))
5332 xmlFreeEnumeration(last);
5333 return(ret);
5334 }
5335 NEXT;
5336 return(ret);
5337}
5338
5339/**
5340 * xmlParseEnumerationType:
5341 * @ctxt: an XML parser context
5342 *
5343 * parse an Enumeration attribute type.
5344 *
5345 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5346 *
5347 * [ VC: Enumeration ]
5348 * Values of this type must match one of the Nmtoken tokens in
5349 * the declaration
5350 *
5351 * Returns: the enumeration attribute tree built while parsing
5352 */
5353
5354xmlEnumerationPtr
5355xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5356 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005357 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005358
5359 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005360 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005361 return(NULL);
5362 }
5363 SHRINK;
5364 do {
5365 NEXT;
5366 SKIP_BLANKS;
5367 name = xmlParseNmtoken(ctxt);
5368 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005369 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005370 return(ret);
5371 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005372 tmp = ret;
5373 while (tmp != NULL) {
5374 if (xmlStrEqual(name, tmp->name)) {
5375 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5376 "standalone: attribute enumeration value token %s duplicated\n",
5377 name, NULL);
5378 if (!xmlDictOwns(ctxt->dict, name))
5379 xmlFree(name);
5380 break;
5381 }
5382 tmp = tmp->next;
5383 }
5384 if (tmp == NULL) {
5385 cur = xmlCreateEnumeration(name);
5386 if (!xmlDictOwns(ctxt->dict, name))
5387 xmlFree(name);
5388 if (cur == NULL) return(ret);
5389 if (last == NULL) ret = last = cur;
5390 else {
5391 last->next = cur;
5392 last = cur;
5393 }
Owen Taylor3473f882001-02-23 17:55:21 +00005394 }
5395 SKIP_BLANKS;
5396 } while (RAW == '|');
5397 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005398 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005399 return(ret);
5400 }
5401 NEXT;
5402 return(ret);
5403}
5404
5405/**
5406 * xmlParseEnumeratedType:
5407 * @ctxt: an XML parser context
5408 * @tree: the enumeration tree built while parsing
5409 *
5410 * parse an Enumerated attribute type.
5411 *
5412 * [57] EnumeratedType ::= NotationType | Enumeration
5413 *
5414 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5415 *
5416 *
5417 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5418 */
5419
5420int
5421xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005422 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005423 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005424 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005425 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5426 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005427 return(0);
5428 }
5429 SKIP_BLANKS;
5430 *tree = xmlParseNotationType(ctxt);
5431 if (*tree == NULL) return(0);
5432 return(XML_ATTRIBUTE_NOTATION);
5433 }
5434 *tree = xmlParseEnumerationType(ctxt);
5435 if (*tree == NULL) return(0);
5436 return(XML_ATTRIBUTE_ENUMERATION);
5437}
5438
5439/**
5440 * xmlParseAttributeType:
5441 * @ctxt: an XML parser context
5442 * @tree: the enumeration tree built while parsing
5443 *
5444 * parse the Attribute list def for an element
5445 *
5446 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5447 *
5448 * [55] StringType ::= 'CDATA'
5449 *
5450 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5451 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5452 *
5453 * Validity constraints for attribute values syntax are checked in
5454 * xmlValidateAttributeValue()
5455 *
5456 * [ VC: ID ]
5457 * Values of type ID must match the Name production. A name must not
5458 * appear more than once in an XML document as a value of this type;
5459 * i.e., ID values must uniquely identify the elements which bear them.
5460 *
5461 * [ VC: One ID per Element Type ]
5462 * No element type may have more than one ID attribute specified.
5463 *
5464 * [ VC: ID Attribute Default ]
5465 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5466 *
5467 * [ VC: IDREF ]
5468 * Values of type IDREF must match the Name production, and values
5469 * of type IDREFS must match Names; each IDREF Name must match the value
5470 * of an ID attribute on some element in the XML document; i.e. IDREF
5471 * values must match the value of some ID attribute.
5472 *
5473 * [ VC: Entity Name ]
5474 * Values of type ENTITY must match the Name production, values
5475 * of type ENTITIES must match Names; each Entity Name must match the
5476 * name of an unparsed entity declared in the DTD.
5477 *
5478 * [ VC: Name Token ]
5479 * Values of type NMTOKEN must match the Nmtoken production; values
5480 * of type NMTOKENS must match Nmtokens.
5481 *
5482 * Returns the attribute type
5483 */
5484int
5485xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5486 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005487 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005488 SKIP(5);
5489 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005490 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005491 SKIP(6);
5492 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005493 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005494 SKIP(5);
5495 return(XML_ATTRIBUTE_IDREF);
5496 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5497 SKIP(2);
5498 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005499 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005500 SKIP(6);
5501 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005502 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005503 SKIP(8);
5504 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005505 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005506 SKIP(8);
5507 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005508 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005509 SKIP(7);
5510 return(XML_ATTRIBUTE_NMTOKEN);
5511 }
5512 return(xmlParseEnumeratedType(ctxt, tree));
5513}
5514
5515/**
5516 * xmlParseAttributeListDecl:
5517 * @ctxt: an XML parser context
5518 *
5519 * : parse the Attribute list def for an element
5520 *
5521 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5522 *
5523 * [53] AttDef ::= S Name S AttType S DefaultDecl
5524 *
5525 */
5526void
5527xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005528 const xmlChar *elemName;
5529 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005530 xmlEnumerationPtr tree;
5531
Daniel Veillarda07050d2003-10-19 14:46:32 +00005532 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005533 xmlParserInputPtr input = ctxt->input;
5534
5535 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005536 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005537 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005538 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005539 }
5540 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005541 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005542 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005543 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5544 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005545 return;
5546 }
5547 SKIP_BLANKS;
5548 GROW;
5549 while (RAW != '>') {
5550 const xmlChar *check = CUR_PTR;
5551 int type;
5552 int def;
5553 xmlChar *defaultValue = NULL;
5554
5555 GROW;
5556 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005557 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005558 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005559 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5560 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005561 break;
5562 }
5563 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005564 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005565 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005566 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005567 break;
5568 }
5569 SKIP_BLANKS;
5570
5571 type = xmlParseAttributeType(ctxt, &tree);
5572 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005573 break;
5574 }
5575
5576 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005577 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005578 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5579 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005580 if (tree != NULL)
5581 xmlFreeEnumeration(tree);
5582 break;
5583 }
5584 SKIP_BLANKS;
5585
5586 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5587 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005588 if (defaultValue != NULL)
5589 xmlFree(defaultValue);
5590 if (tree != NULL)
5591 xmlFreeEnumeration(tree);
5592 break;
5593 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005594 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5595 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005596
5597 GROW;
5598 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005599 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005600 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005601 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005602 if (defaultValue != NULL)
5603 xmlFree(defaultValue);
5604 if (tree != NULL)
5605 xmlFreeEnumeration(tree);
5606 break;
5607 }
5608 SKIP_BLANKS;
5609 }
5610 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005611 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5612 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005613 if (defaultValue != NULL)
5614 xmlFree(defaultValue);
5615 if (tree != NULL)
5616 xmlFreeEnumeration(tree);
5617 break;
5618 }
5619 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5620 (ctxt->sax->attributeDecl != NULL))
5621 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5622 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005623 else if (tree != NULL)
5624 xmlFreeEnumeration(tree);
5625
5626 if ((ctxt->sax2) && (defaultValue != NULL) &&
5627 (def != XML_ATTRIBUTE_IMPLIED) &&
5628 (def != XML_ATTRIBUTE_REQUIRED)) {
5629 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5630 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005631 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005632 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5633 }
Owen Taylor3473f882001-02-23 17:55:21 +00005634 if (defaultValue != NULL)
5635 xmlFree(defaultValue);
5636 GROW;
5637 }
5638 if (RAW == '>') {
5639 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005640 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5641 "Attribute list declaration doesn't start and stop in the same entity\n",
5642 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005643 }
5644 NEXT;
5645 }
Owen Taylor3473f882001-02-23 17:55:21 +00005646 }
5647}
5648
5649/**
5650 * xmlParseElementMixedContentDecl:
5651 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005652 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005653 *
5654 * parse the declaration for a Mixed Element content
5655 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5656 *
5657 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5658 * '(' S? '#PCDATA' S? ')'
5659 *
5660 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5661 *
5662 * [ VC: No Duplicate Types ]
5663 * The same name must not appear more than once in a single
5664 * mixed-content declaration.
5665 *
5666 * returns: the list of the xmlElementContentPtr describing the element choices
5667 */
5668xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005669xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005670 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005671 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005672
5673 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005674 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005675 SKIP(7);
5676 SKIP_BLANKS;
5677 SHRINK;
5678 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005679 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005680 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5681"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005682 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005683 }
Owen Taylor3473f882001-02-23 17:55:21 +00005684 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005685 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005686 if (ret == NULL)
5687 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005688 if (RAW == '*') {
5689 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5690 NEXT;
5691 }
5692 return(ret);
5693 }
5694 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005695 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005696 if (ret == NULL) return(NULL);
5697 }
5698 while (RAW == '|') {
5699 NEXT;
5700 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005701 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005702 if (ret == NULL) return(NULL);
5703 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005704 if (cur != NULL)
5705 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005706 cur = ret;
5707 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005708 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005709 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005710 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005711 if (n->c1 != NULL)
5712 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005713 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005714 if (n != NULL)
5715 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005716 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005717 }
5718 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005719 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005720 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005721 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005722 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005723 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005724 return(NULL);
5725 }
5726 SKIP_BLANKS;
5727 GROW;
5728 }
5729 if ((RAW == ')') && (NXT(1) == '*')) {
5730 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005731 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005732 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005733 if (cur->c2 != NULL)
5734 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005735 }
5736 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005737 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005738 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5739"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005740 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005741 }
Owen Taylor3473f882001-02-23 17:55:21 +00005742 SKIP(2);
5743 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005744 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005745 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005746 return(NULL);
5747 }
5748
5749 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005750 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005751 }
5752 return(ret);
5753}
5754
5755/**
5756 * xmlParseElementChildrenContentDecl:
5757 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005758 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005759 *
5760 * parse the declaration for a Mixed Element content
5761 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5762 *
5763 *
5764 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5765 *
5766 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5767 *
5768 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5769 *
5770 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5771 *
5772 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5773 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005774 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005775 * opening or closing parentheses in a choice, seq, or Mixed
5776 * construct is contained in the replacement text for a parameter
5777 * entity, both must be contained in the same replacement text. For
5778 * interoperability, if a parameter-entity reference appears in a
5779 * choice, seq, or Mixed construct, its replacement text should not
5780 * be empty, and neither the first nor last non-blank character of
5781 * the replacement text should be a connector (| or ,).
5782 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005783 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005784 * hierarchy.
5785 */
5786xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005787xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005788 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005789 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005790 xmlChar type = 0;
5791
5792 SKIP_BLANKS;
5793 GROW;
5794 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005795 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005796
Owen Taylor3473f882001-02-23 17:55:21 +00005797 /* Recurse on first child */
5798 NEXT;
5799 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005800 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005801 SKIP_BLANKS;
5802 GROW;
5803 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005804 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005805 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005806 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005807 return(NULL);
5808 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005809 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005810 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005811 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005812 return(NULL);
5813 }
Owen Taylor3473f882001-02-23 17:55:21 +00005814 GROW;
5815 if (RAW == '?') {
5816 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5817 NEXT;
5818 } else if (RAW == '*') {
5819 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5820 NEXT;
5821 } else if (RAW == '+') {
5822 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5823 NEXT;
5824 } else {
5825 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5826 }
Owen Taylor3473f882001-02-23 17:55:21 +00005827 GROW;
5828 }
5829 SKIP_BLANKS;
5830 SHRINK;
5831 while (RAW != ')') {
5832 /*
5833 * Each loop we parse one separator and one element.
5834 */
5835 if (RAW == ',') {
5836 if (type == 0) type = CUR;
5837
5838 /*
5839 * Detect "Name | Name , Name" error
5840 */
5841 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005842 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005843 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005844 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005845 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005846 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005847 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005848 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005849 return(NULL);
5850 }
5851 NEXT;
5852
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005853 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005854 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005855 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005856 xmlFreeDocElementContent(ctxt->myDoc, last);
5857 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005858 return(NULL);
5859 }
5860 if (last == NULL) {
5861 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005862 if (ret != NULL)
5863 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005864 ret = cur = op;
5865 } else {
5866 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005867 if (op != NULL)
5868 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005869 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005870 if (last != NULL)
5871 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005872 cur =op;
5873 last = NULL;
5874 }
5875 } else if (RAW == '|') {
5876 if (type == 0) type = CUR;
5877
5878 /*
5879 * Detect "Name , Name | Name" error
5880 */
5881 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005882 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005883 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005884 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005885 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005886 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005887 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005888 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005889 return(NULL);
5890 }
5891 NEXT;
5892
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005893 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005894 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005895 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005896 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005897 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005898 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005899 return(NULL);
5900 }
5901 if (last == NULL) {
5902 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005903 if (ret != NULL)
5904 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005905 ret = cur = op;
5906 } else {
5907 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005908 if (op != NULL)
5909 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005910 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005911 if (last != NULL)
5912 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005913 cur =op;
5914 last = NULL;
5915 }
5916 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005917 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005918 if ((last != NULL) && (last != ret))
5919 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005920 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005921 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005922 return(NULL);
5923 }
5924 GROW;
5925 SKIP_BLANKS;
5926 GROW;
5927 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005928 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005929 /* Recurse on second child */
5930 NEXT;
5931 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005932 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005933 SKIP_BLANKS;
5934 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005935 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005936 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005937 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005938 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005939 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005940 return(NULL);
5941 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005942 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005943 if (last == NULL) {
5944 if (ret != NULL)
5945 xmlFreeDocElementContent(ctxt->myDoc, ret);
5946 return(NULL);
5947 }
Owen Taylor3473f882001-02-23 17:55:21 +00005948 if (RAW == '?') {
5949 last->ocur = XML_ELEMENT_CONTENT_OPT;
5950 NEXT;
5951 } else if (RAW == '*') {
5952 last->ocur = XML_ELEMENT_CONTENT_MULT;
5953 NEXT;
5954 } else if (RAW == '+') {
5955 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5956 NEXT;
5957 } else {
5958 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5959 }
5960 }
5961 SKIP_BLANKS;
5962 GROW;
5963 }
5964 if ((cur != NULL) && (last != NULL)) {
5965 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005966 if (last != NULL)
5967 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005968 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005969 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005970 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5971"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005972 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005973 }
Owen Taylor3473f882001-02-23 17:55:21 +00005974 NEXT;
5975 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005976 if (ret != NULL) {
5977 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5978 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5979 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5980 else
5981 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5982 }
Owen Taylor3473f882001-02-23 17:55:21 +00005983 NEXT;
5984 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005985 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005986 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005987 cur = ret;
5988 /*
5989 * Some normalization:
5990 * (a | b* | c?)* == (a | b | c)*
5991 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005992 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005993 if ((cur->c1 != NULL) &&
5994 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5995 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5996 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5997 if ((cur->c2 != NULL) &&
5998 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5999 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6000 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6001 cur = cur->c2;
6002 }
6003 }
Owen Taylor3473f882001-02-23 17:55:21 +00006004 NEXT;
6005 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006006 if (ret != NULL) {
6007 int found = 0;
6008
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006009 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6010 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6011 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006012 else
6013 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006014 /*
6015 * Some normalization:
6016 * (a | b*)+ == (a | b)*
6017 * (a | b?)+ == (a | b)*
6018 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006019 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006020 if ((cur->c1 != NULL) &&
6021 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6022 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6023 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6024 found = 1;
6025 }
6026 if ((cur->c2 != NULL) &&
6027 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6028 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6029 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6030 found = 1;
6031 }
6032 cur = cur->c2;
6033 }
6034 if (found)
6035 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6036 }
Owen Taylor3473f882001-02-23 17:55:21 +00006037 NEXT;
6038 }
6039 return(ret);
6040}
6041
6042/**
6043 * xmlParseElementContentDecl:
6044 * @ctxt: an XML parser context
6045 * @name: the name of the element being defined.
6046 * @result: the Element Content pointer will be stored here if any
6047 *
6048 * parse the declaration for an Element content either Mixed or Children,
6049 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6050 *
6051 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6052 *
6053 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6054 */
6055
6056int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006057xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006058 xmlElementContentPtr *result) {
6059
6060 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006061 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006062 int res;
6063
6064 *result = NULL;
6065
6066 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006067 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006068 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006069 return(-1);
6070 }
6071 NEXT;
6072 GROW;
6073 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006074 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006075 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006076 res = XML_ELEMENT_TYPE_MIXED;
6077 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006078 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006079 res = XML_ELEMENT_TYPE_ELEMENT;
6080 }
Owen Taylor3473f882001-02-23 17:55:21 +00006081 SKIP_BLANKS;
6082 *result = tree;
6083 return(res);
6084}
6085
6086/**
6087 * xmlParseElementDecl:
6088 * @ctxt: an XML parser context
6089 *
6090 * parse an Element declaration.
6091 *
6092 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6093 *
6094 * [ VC: Unique Element Type Declaration ]
6095 * No element type may be declared more than once
6096 *
6097 * Returns the type of the element, or -1 in case of error
6098 */
6099int
6100xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006101 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006102 int ret = -1;
6103 xmlElementContentPtr content = NULL;
6104
Daniel Veillard4c778d82005-01-23 17:37:44 +00006105 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006106 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006107 xmlParserInputPtr input = ctxt->input;
6108
6109 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006110 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006111 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006113 }
6114 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006115 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006116 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006117 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6118 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006119 return(-1);
6120 }
6121 while ((RAW == 0) && (ctxt->inputNr > 1))
6122 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006123 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006124 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6125 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006126 }
6127 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006128 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006129 SKIP(5);
6130 /*
6131 * Element must always be empty.
6132 */
6133 ret = XML_ELEMENT_TYPE_EMPTY;
6134 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6135 (NXT(2) == 'Y')) {
6136 SKIP(3);
6137 /*
6138 * Element is a generic container.
6139 */
6140 ret = XML_ELEMENT_TYPE_ANY;
6141 } else if (RAW == '(') {
6142 ret = xmlParseElementContentDecl(ctxt, name, &content);
6143 } else {
6144 /*
6145 * [ WFC: PEs in Internal Subset ] error handling.
6146 */
6147 if ((RAW == '%') && (ctxt->external == 0) &&
6148 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006149 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006150 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006151 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006152 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006153 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6154 }
Owen Taylor3473f882001-02-23 17:55:21 +00006155 return(-1);
6156 }
6157
6158 SKIP_BLANKS;
6159 /*
6160 * Pop-up of finished entities.
6161 */
6162 while ((RAW == 0) && (ctxt->inputNr > 1))
6163 xmlPopInput(ctxt);
6164 SKIP_BLANKS;
6165
6166 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006167 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006168 if (content != NULL) {
6169 xmlFreeDocElementContent(ctxt->myDoc, content);
6170 }
Owen Taylor3473f882001-02-23 17:55:21 +00006171 } else {
6172 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006173 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6174 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006175 }
6176
6177 NEXT;
6178 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006179 (ctxt->sax->elementDecl != NULL)) {
6180 if (content != NULL)
6181 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006182 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6183 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006184 if ((content != NULL) && (content->parent == NULL)) {
6185 /*
6186 * this is a trick: if xmlAddElementDecl is called,
6187 * instead of copying the full tree it is plugged directly
6188 * if called from the parser. Avoid duplicating the
6189 * interfaces or change the API/ABI
6190 */
6191 xmlFreeDocElementContent(ctxt->myDoc, content);
6192 }
6193 } else if (content != NULL) {
6194 xmlFreeDocElementContent(ctxt->myDoc, content);
6195 }
Owen Taylor3473f882001-02-23 17:55:21 +00006196 }
Owen Taylor3473f882001-02-23 17:55:21 +00006197 }
6198 return(ret);
6199}
6200
6201/**
Owen Taylor3473f882001-02-23 17:55:21 +00006202 * xmlParseConditionalSections
6203 * @ctxt: an XML parser context
6204 *
6205 * [61] conditionalSect ::= includeSect | ignoreSect
6206 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6207 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6208 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6209 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6210 */
6211
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006212static void
Owen Taylor3473f882001-02-23 17:55:21 +00006213xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006214 int id = ctxt->input->id;
6215
Owen Taylor3473f882001-02-23 17:55:21 +00006216 SKIP(3);
6217 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006218 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006219 SKIP(7);
6220 SKIP_BLANKS;
6221 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006222 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006223 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006224 if (ctxt->input->id != id) {
6225 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6226 "All markup of the conditional section is not in the same entity\n",
6227 NULL, NULL);
6228 }
Owen Taylor3473f882001-02-23 17:55:21 +00006229 NEXT;
6230 }
6231 if (xmlParserDebugEntities) {
6232 if ((ctxt->input != NULL) && (ctxt->input->filename))
6233 xmlGenericError(xmlGenericErrorContext,
6234 "%s(%d): ", ctxt->input->filename,
6235 ctxt->input->line);
6236 xmlGenericError(xmlGenericErrorContext,
6237 "Entering INCLUDE Conditional Section\n");
6238 }
6239
6240 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6241 (NXT(2) != '>'))) {
6242 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006243 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006244
6245 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6246 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006247 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006248 NEXT;
6249 } else if (RAW == '%') {
6250 xmlParsePEReference(ctxt);
6251 } else
6252 xmlParseMarkupDecl(ctxt);
6253
6254 /*
6255 * Pop-up of finished entities.
6256 */
6257 while ((RAW == 0) && (ctxt->inputNr > 1))
6258 xmlPopInput(ctxt);
6259
Daniel Veillardfdc91562002-07-01 21:52:03 +00006260 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006261 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006262 break;
6263 }
6264 }
6265 if (xmlParserDebugEntities) {
6266 if ((ctxt->input != NULL) && (ctxt->input->filename))
6267 xmlGenericError(xmlGenericErrorContext,
6268 "%s(%d): ", ctxt->input->filename,
6269 ctxt->input->line);
6270 xmlGenericError(xmlGenericErrorContext,
6271 "Leaving INCLUDE Conditional Section\n");
6272 }
6273
Daniel Veillarda07050d2003-10-19 14:46:32 +00006274 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006275 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006276 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006277 int depth = 0;
6278
6279 SKIP(6);
6280 SKIP_BLANKS;
6281 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006282 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006283 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006284 if (ctxt->input->id != id) {
6285 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6286 "All markup of the conditional section is not in the same entity\n",
6287 NULL, NULL);
6288 }
Owen Taylor3473f882001-02-23 17:55:21 +00006289 NEXT;
6290 }
6291 if (xmlParserDebugEntities) {
6292 if ((ctxt->input != NULL) && (ctxt->input->filename))
6293 xmlGenericError(xmlGenericErrorContext,
6294 "%s(%d): ", ctxt->input->filename,
6295 ctxt->input->line);
6296 xmlGenericError(xmlGenericErrorContext,
6297 "Entering IGNORE Conditional Section\n");
6298 }
6299
6300 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006301 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006302 * But disable SAX event generating DTD building in the meantime
6303 */
6304 state = ctxt->disableSAX;
6305 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006306 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006307 ctxt->instate = XML_PARSER_IGNORE;
6308
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006309 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006310 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6311 depth++;
6312 SKIP(3);
6313 continue;
6314 }
6315 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6316 if (--depth >= 0) SKIP(3);
6317 continue;
6318 }
6319 NEXT;
6320 continue;
6321 }
6322
6323 ctxt->disableSAX = state;
6324 ctxt->instate = instate;
6325
6326 if (xmlParserDebugEntities) {
6327 if ((ctxt->input != NULL) && (ctxt->input->filename))
6328 xmlGenericError(xmlGenericErrorContext,
6329 "%s(%d): ", ctxt->input->filename,
6330 ctxt->input->line);
6331 xmlGenericError(xmlGenericErrorContext,
6332 "Leaving IGNORE Conditional Section\n");
6333 }
6334
6335 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006336 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 }
6338
6339 if (RAW == 0)
6340 SHRINK;
6341
6342 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006343 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006344 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006345 if (ctxt->input->id != id) {
6346 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6347 "All markup of the conditional section is not in the same entity\n",
6348 NULL, NULL);
6349 }
Owen Taylor3473f882001-02-23 17:55:21 +00006350 SKIP(3);
6351 }
6352}
6353
6354/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006355 * xmlParseMarkupDecl:
6356 * @ctxt: an XML parser context
6357 *
6358 * parse Markup declarations
6359 *
6360 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6361 * NotationDecl | PI | Comment
6362 *
6363 * [ VC: Proper Declaration/PE Nesting ]
6364 * Parameter-entity replacement text must be properly nested with
6365 * markup declarations. That is to say, if either the first character
6366 * or the last character of a markup declaration (markupdecl above) is
6367 * contained in the replacement text for a parameter-entity reference,
6368 * both must be contained in the same replacement text.
6369 *
6370 * [ WFC: PEs in Internal Subset ]
6371 * In the internal DTD subset, parameter-entity references can occur
6372 * only where markup declarations can occur, not within markup declarations.
6373 * (This does not apply to references that occur in external parameter
6374 * entities or to the external subset.)
6375 */
6376void
6377xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6378 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006379 if (CUR == '<') {
6380 if (NXT(1) == '!') {
6381 switch (NXT(2)) {
6382 case 'E':
6383 if (NXT(3) == 'L')
6384 xmlParseElementDecl(ctxt);
6385 else if (NXT(3) == 'N')
6386 xmlParseEntityDecl(ctxt);
6387 break;
6388 case 'A':
6389 xmlParseAttributeListDecl(ctxt);
6390 break;
6391 case 'N':
6392 xmlParseNotationDecl(ctxt);
6393 break;
6394 case '-':
6395 xmlParseComment(ctxt);
6396 break;
6397 default:
6398 /* there is an error but it will be detected later */
6399 break;
6400 }
6401 } else if (NXT(1) == '?') {
6402 xmlParsePI(ctxt);
6403 }
6404 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006405 /*
6406 * This is only for internal subset. On external entities,
6407 * the replacement is done before parsing stage
6408 */
6409 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6410 xmlParsePEReference(ctxt);
6411
6412 /*
6413 * Conditional sections are allowed from entities included
6414 * by PE References in the internal subset.
6415 */
6416 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6417 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6418 xmlParseConditionalSections(ctxt);
6419 }
6420 }
6421
6422 ctxt->instate = XML_PARSER_DTD;
6423}
6424
6425/**
6426 * xmlParseTextDecl:
6427 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006428 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006429 * parse an XML declaration header for external entities
6430 *
6431 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006432 */
6433
6434void
6435xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6436 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006437 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006438
6439 /*
6440 * We know that '<?xml' is here.
6441 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006442 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006443 SKIP(5);
6444 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006445 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006446 return;
6447 }
6448
William M. Brack76e95df2003-10-18 16:20:14 +00006449 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006450 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6451 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006452 }
6453 SKIP_BLANKS;
6454
6455 /*
6456 * We may have the VersionInfo here.
6457 */
6458 version = xmlParseVersionInfo(ctxt);
6459 if (version == NULL)
6460 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006461 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006462 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006463 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6464 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006465 }
6466 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006467 ctxt->input->version = version;
6468
6469 /*
6470 * We must have the encoding declaration
6471 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006472 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006473 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6474 /*
6475 * The XML REC instructs us to stop parsing right here
6476 */
6477 return;
6478 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006479 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6480 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6481 "Missing encoding in text declaration\n");
6482 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006483
6484 SKIP_BLANKS;
6485 if ((RAW == '?') && (NXT(1) == '>')) {
6486 SKIP(2);
6487 } else if (RAW == '>') {
6488 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006489 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006490 NEXT;
6491 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006492 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006493 MOVETO_ENDTAG(CUR_PTR);
6494 NEXT;
6495 }
6496}
6497
6498/**
Owen Taylor3473f882001-02-23 17:55:21 +00006499 * xmlParseExternalSubset:
6500 * @ctxt: an XML parser context
6501 * @ExternalID: the external identifier
6502 * @SystemID: the system identifier (or URL)
6503 *
6504 * parse Markup declarations from an external subset
6505 *
6506 * [30] extSubset ::= textDecl? extSubsetDecl
6507 *
6508 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6509 */
6510void
6511xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6512 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006513 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006514 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006515
6516 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6517 (ctxt->input->end - ctxt->input->cur >= 4)) {
6518 xmlChar start[4];
6519 xmlCharEncoding enc;
6520
6521 start[0] = RAW;
6522 start[1] = NXT(1);
6523 start[2] = NXT(2);
6524 start[3] = NXT(3);
6525 enc = xmlDetectCharEncoding(start, 4);
6526 if (enc != XML_CHAR_ENCODING_NONE)
6527 xmlSwitchEncoding(ctxt, enc);
6528 }
6529
Daniel Veillarda07050d2003-10-19 14:46:32 +00006530 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006531 xmlParseTextDecl(ctxt);
6532 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6533 /*
6534 * The XML REC instructs us to stop parsing right here
6535 */
6536 ctxt->instate = XML_PARSER_EOF;
6537 return;
6538 }
6539 }
6540 if (ctxt->myDoc == NULL) {
6541 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006542 if (ctxt->myDoc == NULL) {
6543 xmlErrMemory(ctxt, "New Doc failed");
6544 return;
6545 }
6546 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006547 }
6548 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6549 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6550
6551 ctxt->instate = XML_PARSER_DTD;
6552 ctxt->external = 1;
6553 while (((RAW == '<') && (NXT(1) == '?')) ||
6554 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006555 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006556 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006557 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006558
6559 GROW;
6560 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6561 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006562 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006563 NEXT;
6564 } else if (RAW == '%') {
6565 xmlParsePEReference(ctxt);
6566 } else
6567 xmlParseMarkupDecl(ctxt);
6568
6569 /*
6570 * Pop-up of finished entities.
6571 */
6572 while ((RAW == 0) && (ctxt->inputNr > 1))
6573 xmlPopInput(ctxt);
6574
Daniel Veillardfdc91562002-07-01 21:52:03 +00006575 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006576 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006577 break;
6578 }
6579 }
6580
6581 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006582 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006583 }
6584
6585}
6586
6587/**
6588 * xmlParseReference:
6589 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006590 *
Owen Taylor3473f882001-02-23 17:55:21 +00006591 * parse and handle entity references in content, depending on the SAX
6592 * interface, this may end-up in a call to character() if this is a
6593 * CharRef, a predefined entity, if there is no reference() callback.
6594 * or if the parser was asked to switch to that mode.
6595 *
6596 * [67] Reference ::= EntityRef | CharRef
6597 */
6598void
6599xmlParseReference(xmlParserCtxtPtr ctxt) {
6600 xmlEntityPtr ent;
6601 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006602 int was_checked;
6603 xmlNodePtr list = NULL;
6604 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006605
Daniel Veillard0161e632008-08-28 15:36:32 +00006606
6607 if (RAW != '&')
6608 return;
6609
6610 /*
6611 * Simple case of a CharRef
6612 */
Owen Taylor3473f882001-02-23 17:55:21 +00006613 if (NXT(1) == '#') {
6614 int i = 0;
6615 xmlChar out[10];
6616 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006617 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006618
Daniel Veillarddc171602008-03-26 17:41:38 +00006619 if (value == 0)
6620 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006621 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6622 /*
6623 * So we are using non-UTF-8 buffers
6624 * Check that the char fit on 8bits, if not
6625 * generate a CharRef.
6626 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006627 if (value <= 0xFF) {
6628 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006629 out[1] = 0;
6630 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6631 (!ctxt->disableSAX))
6632 ctxt->sax->characters(ctxt->userData, out, 1);
6633 } else {
6634 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006635 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006636 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006637 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006638 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6639 (!ctxt->disableSAX))
6640 ctxt->sax->reference(ctxt->userData, out);
6641 }
6642 } else {
6643 /*
6644 * Just encode the value in UTF-8
6645 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006646 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006647 out[i] = 0;
6648 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6649 (!ctxt->disableSAX))
6650 ctxt->sax->characters(ctxt->userData, out, i);
6651 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006652 return;
6653 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006654
Daniel Veillard0161e632008-08-28 15:36:32 +00006655 /*
6656 * We are seeing an entity reference
6657 */
6658 ent = xmlParseEntityRef(ctxt);
6659 if (ent == NULL) return;
6660 if (!ctxt->wellFormed)
6661 return;
6662 was_checked = ent->checked;
6663
6664 /* special case of predefined entities */
6665 if ((ent->name == NULL) ||
6666 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6667 val = ent->content;
6668 if (val == NULL) return;
6669 /*
6670 * inline the entity.
6671 */
6672 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6673 (!ctxt->disableSAX))
6674 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6675 return;
6676 }
6677
6678 /*
6679 * The first reference to the entity trigger a parsing phase
6680 * where the ent->children is filled with the result from
6681 * the parsing.
6682 */
6683 if (ent->checked == 0) {
6684 unsigned long oldnbent = ctxt->nbentities;
6685
6686 /*
6687 * This is a bit hackish but this seems the best
6688 * way to make sure both SAX and DOM entity support
6689 * behaves okay.
6690 */
6691 void *user_data;
6692 if (ctxt->userData == ctxt)
6693 user_data = NULL;
6694 else
6695 user_data = ctxt->userData;
6696
6697 /*
6698 * Check that this entity is well formed
6699 * 4.3.2: An internal general parsed entity is well-formed
6700 * if its replacement text matches the production labeled
6701 * content.
6702 */
6703 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6704 ctxt->depth++;
6705 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6706 user_data, &list);
6707 ctxt->depth--;
6708
6709 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6710 ctxt->depth++;
6711 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6712 user_data, ctxt->depth, ent->URI,
6713 ent->ExternalID, &list);
6714 ctxt->depth--;
6715 } else {
6716 ret = XML_ERR_ENTITY_PE_INTERNAL;
6717 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6718 "invalid entity type found\n", NULL);
6719 }
6720
6721 /*
6722 * Store the number of entities needing parsing for this entity
6723 * content and do checkings
6724 */
6725 ent->checked = ctxt->nbentities - oldnbent;
6726 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006727 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006728 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006729 return;
6730 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006731 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6732 xmlFreeNodeList(list);
6733 return;
6734 }
Owen Taylor3473f882001-02-23 17:55:21 +00006735
Daniel Veillard0161e632008-08-28 15:36:32 +00006736 if ((ret == XML_ERR_OK) && (list != NULL)) {
6737 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6738 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6739 (ent->children == NULL)) {
6740 ent->children = list;
6741 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006742 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006743 * Prune it directly in the generated document
6744 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006745 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006746 if (((list->type == XML_TEXT_NODE) &&
6747 (list->next == NULL)) ||
6748 (ctxt->parseMode == XML_PARSE_READER)) {
6749 list->parent = (xmlNodePtr) ent;
6750 list = NULL;
6751 ent->owner = 1;
6752 } else {
6753 ent->owner = 0;
6754 while (list != NULL) {
6755 list->parent = (xmlNodePtr) ctxt->node;
6756 list->doc = ctxt->myDoc;
6757 if (list->next == NULL)
6758 ent->last = list;
6759 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006760 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006761 list = ent->children;
6762#ifdef LIBXML_LEGACY_ENABLED
6763 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6764 xmlAddEntityReference(ent, list, NULL);
6765#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006766 }
6767 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006768 ent->owner = 1;
6769 while (list != NULL) {
6770 list->parent = (xmlNodePtr) ent;
6771 if (list->next == NULL)
6772 ent->last = list;
6773 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006774 }
6775 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006776 } else {
6777 xmlFreeNodeList(list);
6778 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006779 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006780 } else if ((ret != XML_ERR_OK) &&
6781 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6782 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6783 "Entity '%s' failed to parse\n", ent->name);
6784 } else if (list != NULL) {
6785 xmlFreeNodeList(list);
6786 list = NULL;
6787 }
6788 if (ent->checked == 0)
6789 ent->checked = 1;
6790 } else if (ent->checked != 1) {
6791 ctxt->nbentities += ent->checked;
6792 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006793
Daniel Veillard0161e632008-08-28 15:36:32 +00006794 /*
6795 * Now that the entity content has been gathered
6796 * provide it to the application, this can take different forms based
6797 * on the parsing modes.
6798 */
6799 if (ent->children == NULL) {
6800 /*
6801 * Probably running in SAX mode and the callbacks don't
6802 * build the entity content. So unless we already went
6803 * though parsing for first checking go though the entity
6804 * content to generate callbacks associated to the entity
6805 */
6806 if (was_checked != 0) {
6807 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006808 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006809 * This is a bit hackish but this seems the best
6810 * way to make sure both SAX and DOM entity support
6811 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006812 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006813 if (ctxt->userData == ctxt)
6814 user_data = NULL;
6815 else
6816 user_data = ctxt->userData;
6817
6818 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6819 ctxt->depth++;
6820 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6821 ent->content, user_data, NULL);
6822 ctxt->depth--;
6823 } else if (ent->etype ==
6824 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6825 ctxt->depth++;
6826 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6827 ctxt->sax, user_data, ctxt->depth,
6828 ent->URI, ent->ExternalID, NULL);
6829 ctxt->depth--;
6830 } else {
6831 ret = XML_ERR_ENTITY_PE_INTERNAL;
6832 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6833 "invalid entity type found\n", NULL);
6834 }
6835 if (ret == XML_ERR_ENTITY_LOOP) {
6836 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6837 return;
6838 }
6839 }
6840 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6841 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6842 /*
6843 * Entity reference callback comes second, it's somewhat
6844 * superfluous but a compatibility to historical behaviour
6845 */
6846 ctxt->sax->reference(ctxt->userData, ent->name);
6847 }
6848 return;
6849 }
6850
6851 /*
6852 * If we didn't get any children for the entity being built
6853 */
6854 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6855 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6856 /*
6857 * Create a node.
6858 */
6859 ctxt->sax->reference(ctxt->userData, ent->name);
6860 return;
6861 }
6862
6863 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6864 /*
6865 * There is a problem on the handling of _private for entities
6866 * (bug 155816): Should we copy the content of the field from
6867 * the entity (possibly overwriting some value set by the user
6868 * when a copy is created), should we leave it alone, or should
6869 * we try to take care of different situations? The problem
6870 * is exacerbated by the usage of this field by the xmlReader.
6871 * To fix this bug, we look at _private on the created node
6872 * and, if it's NULL, we copy in whatever was in the entity.
6873 * If it's not NULL we leave it alone. This is somewhat of a
6874 * hack - maybe we should have further tests to determine
6875 * what to do.
6876 */
6877 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6878 /*
6879 * Seems we are generating the DOM content, do
6880 * a simple tree copy for all references except the first
6881 * In the first occurrence list contains the replacement.
6882 * progressive == 2 means we are operating on the Reader
6883 * and since nodes are discarded we must copy all the time.
6884 */
6885 if (((list == NULL) && (ent->owner == 0)) ||
6886 (ctxt->parseMode == XML_PARSE_READER)) {
6887 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6888
6889 /*
6890 * when operating on a reader, the entities definitions
6891 * are always owning the entities subtree.
6892 if (ctxt->parseMode == XML_PARSE_READER)
6893 ent->owner = 1;
6894 */
6895
6896 cur = ent->children;
6897 while (cur != NULL) {
6898 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6899 if (nw != NULL) {
6900 if (nw->_private == NULL)
6901 nw->_private = cur->_private;
6902 if (firstChild == NULL){
6903 firstChild = nw;
6904 }
6905 nw = xmlAddChild(ctxt->node, nw);
6906 }
6907 if (cur == ent->last) {
6908 /*
6909 * needed to detect some strange empty
6910 * node cases in the reader tests
6911 */
6912 if ((ctxt->parseMode == XML_PARSE_READER) &&
6913 (nw != NULL) &&
6914 (nw->type == XML_ELEMENT_NODE) &&
6915 (nw->children == NULL))
6916 nw->extra = 1;
6917
6918 break;
6919 }
6920 cur = cur->next;
6921 }
6922#ifdef LIBXML_LEGACY_ENABLED
6923 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6924 xmlAddEntityReference(ent, firstChild, nw);
6925#endif /* LIBXML_LEGACY_ENABLED */
6926 } else if (list == NULL) {
6927 xmlNodePtr nw = NULL, cur, next, last,
6928 firstChild = NULL;
6929 /*
6930 * Copy the entity child list and make it the new
6931 * entity child list. The goal is to make sure any
6932 * ID or REF referenced will be the one from the
6933 * document content and not the entity copy.
6934 */
6935 cur = ent->children;
6936 ent->children = NULL;
6937 last = ent->last;
6938 ent->last = NULL;
6939 while (cur != NULL) {
6940 next = cur->next;
6941 cur->next = NULL;
6942 cur->parent = NULL;
6943 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6944 if (nw != NULL) {
6945 if (nw->_private == NULL)
6946 nw->_private = cur->_private;
6947 if (firstChild == NULL){
6948 firstChild = cur;
6949 }
6950 xmlAddChild((xmlNodePtr) ent, nw);
6951 xmlAddChild(ctxt->node, cur);
6952 }
6953 if (cur == last)
6954 break;
6955 cur = next;
6956 }
Daniel Veillardcba68392008-08-29 12:43:40 +00006957 if (ent->owner == 0)
6958 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00006959#ifdef LIBXML_LEGACY_ENABLED
6960 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6961 xmlAddEntityReference(ent, firstChild, nw);
6962#endif /* LIBXML_LEGACY_ENABLED */
6963 } else {
6964 const xmlChar *nbktext;
6965
6966 /*
6967 * the name change is to avoid coalescing of the
6968 * node with a possible previous text one which
6969 * would make ent->children a dangling pointer
6970 */
6971 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6972 -1);
6973 if (ent->children->type == XML_TEXT_NODE)
6974 ent->children->name = nbktext;
6975 if ((ent->last != ent->children) &&
6976 (ent->last->type == XML_TEXT_NODE))
6977 ent->last->name = nbktext;
6978 xmlAddChildList(ctxt->node, ent->children);
6979 }
6980
6981 /*
6982 * This is to avoid a nasty side effect, see
6983 * characters() in SAX.c
6984 */
6985 ctxt->nodemem = 0;
6986 ctxt->nodelen = 0;
6987 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006988 }
6989 }
6990}
6991
6992/**
6993 * xmlParseEntityRef:
6994 * @ctxt: an XML parser context
6995 *
6996 * parse ENTITY references declarations
6997 *
6998 * [68] EntityRef ::= '&' Name ';'
6999 *
7000 * [ WFC: Entity Declared ]
7001 * In a document without any DTD, a document with only an internal DTD
7002 * subset which contains no parameter entity references, or a document
7003 * with "standalone='yes'", the Name given in the entity reference
7004 * must match that in an entity declaration, except that well-formed
7005 * documents need not declare any of the following entities: amp, lt,
7006 * gt, apos, quot. The declaration of a parameter entity must precede
7007 * any reference to it. Similarly, the declaration of a general entity
7008 * must precede any reference to it which appears in a default value in an
7009 * attribute-list declaration. Note that if entities are declared in the
7010 * external subset or in external parameter entities, a non-validating
7011 * processor is not obligated to read and process their declarations;
7012 * for such documents, the rule that an entity must be declared is a
7013 * well-formedness constraint only if standalone='yes'.
7014 *
7015 * [ WFC: Parsed Entity ]
7016 * An entity reference must not contain the name of an unparsed entity
7017 *
7018 * Returns the xmlEntityPtr if found, or NULL otherwise.
7019 */
7020xmlEntityPtr
7021xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007022 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007023 xmlEntityPtr ent = NULL;
7024
7025 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007026
Daniel Veillard0161e632008-08-28 15:36:32 +00007027 if (RAW != '&')
7028 return(NULL);
7029 NEXT;
7030 name = xmlParseName(ctxt);
7031 if (name == NULL) {
7032 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7033 "xmlParseEntityRef: no name\n");
7034 return(NULL);
7035 }
7036 if (RAW != ';') {
7037 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7038 return(NULL);
7039 }
7040 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007041
Daniel Veillard0161e632008-08-28 15:36:32 +00007042 /*
7043 * Predefined entites override any extra definition
7044 */
7045 ent = xmlGetPredefinedEntity(name);
7046 if (ent != NULL)
7047 return(ent);
Owen Taylor3473f882001-02-23 17:55:21 +00007048
Daniel Veillard0161e632008-08-28 15:36:32 +00007049 /*
7050 * Increate the number of entity references parsed
7051 */
7052 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007053
Daniel Veillard0161e632008-08-28 15:36:32 +00007054 /*
7055 * Ask first SAX for entity resolution, otherwise try the
7056 * entities which may have stored in the parser context.
7057 */
7058 if (ctxt->sax != NULL) {
7059 if (ctxt->sax->getEntity != NULL)
7060 ent = ctxt->sax->getEntity(ctxt->userData, name);
7061 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7062 (ctxt->userData==ctxt)) {
7063 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007064 }
7065 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007066 /*
7067 * [ WFC: Entity Declared ]
7068 * In a document without any DTD, a document with only an
7069 * internal DTD subset which contains no parameter entity
7070 * references, or a document with "standalone='yes'", the
7071 * Name given in the entity reference must match that in an
7072 * entity declaration, except that well-formed documents
7073 * need not declare any of the following entities: amp, lt,
7074 * gt, apos, quot.
7075 * The declaration of a parameter entity must precede any
7076 * reference to it.
7077 * Similarly, the declaration of a general entity must
7078 * precede any reference to it which appears in a default
7079 * value in an attribute-list declaration. Note that if
7080 * entities are declared in the external subset or in
7081 * external parameter entities, a non-validating processor
7082 * is not obligated to read and process their declarations;
7083 * for such documents, the rule that an entity must be
7084 * declared is a well-formedness constraint only if
7085 * standalone='yes'.
7086 */
7087 if (ent == NULL) {
7088 if ((ctxt->standalone == 1) ||
7089 ((ctxt->hasExternalSubset == 0) &&
7090 (ctxt->hasPErefs == 0))) {
7091 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7092 "Entity '%s' not defined\n", name);
7093 } else {
7094 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7095 "Entity '%s' not defined\n", name);
7096 if ((ctxt->inSubset == 0) &&
7097 (ctxt->sax != NULL) &&
7098 (ctxt->sax->reference != NULL)) {
7099 ctxt->sax->reference(ctxt->userData, name);
7100 }
7101 }
7102 ctxt->valid = 0;
7103 }
7104
7105 /*
7106 * [ WFC: Parsed Entity ]
7107 * An entity reference must not contain the name of an
7108 * unparsed entity
7109 */
7110 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7111 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7112 "Entity reference to unparsed entity %s\n", name);
7113 }
7114
7115 /*
7116 * [ WFC: No External Entity References ]
7117 * Attribute values cannot contain direct or indirect
7118 * entity references to external entities.
7119 */
7120 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7121 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7122 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7123 "Attribute references external entity '%s'\n", name);
7124 }
7125 /*
7126 * [ WFC: No < in Attribute Values ]
7127 * The replacement text of any entity referred to directly or
7128 * indirectly in an attribute value (other than "&lt;") must
7129 * not contain a <.
7130 */
7131 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7132 (ent != NULL) && (ent->content != NULL) &&
7133 (xmlStrchr(ent->content, '<'))) {
7134 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7135 "'<' in entity '%s' is not allowed in attributes values\n", name);
7136 }
7137
7138 /*
7139 * Internal check, no parameter entities here ...
7140 */
7141 else {
7142 switch (ent->etype) {
7143 case XML_INTERNAL_PARAMETER_ENTITY:
7144 case XML_EXTERNAL_PARAMETER_ENTITY:
7145 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7146 "Attempt to reference the parameter entity '%s'\n",
7147 name);
7148 break;
7149 default:
7150 break;
7151 }
7152 }
7153
7154 /*
7155 * [ WFC: No Recursion ]
7156 * A parsed entity must not contain a recursive reference
7157 * to itself, either directly or indirectly.
7158 * Done somewhere else
7159 */
Owen Taylor3473f882001-02-23 17:55:21 +00007160 return(ent);
7161}
7162
7163/**
7164 * xmlParseStringEntityRef:
7165 * @ctxt: an XML parser context
7166 * @str: a pointer to an index in the string
7167 *
7168 * parse ENTITY references declarations, but this version parses it from
7169 * a string value.
7170 *
7171 * [68] EntityRef ::= '&' Name ';'
7172 *
7173 * [ WFC: Entity Declared ]
7174 * In a document without any DTD, a document with only an internal DTD
7175 * subset which contains no parameter entity references, or a document
7176 * with "standalone='yes'", the Name given in the entity reference
7177 * must match that in an entity declaration, except that well-formed
7178 * documents need not declare any of the following entities: amp, lt,
7179 * gt, apos, quot. The declaration of a parameter entity must precede
7180 * any reference to it. Similarly, the declaration of a general entity
7181 * must precede any reference to it which appears in a default value in an
7182 * attribute-list declaration. Note that if entities are declared in the
7183 * external subset or in external parameter entities, a non-validating
7184 * processor is not obligated to read and process their declarations;
7185 * for such documents, the rule that an entity must be declared is a
7186 * well-formedness constraint only if standalone='yes'.
7187 *
7188 * [ WFC: Parsed Entity ]
7189 * An entity reference must not contain the name of an unparsed entity
7190 *
7191 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7192 * is updated to the current location in the string.
7193 */
7194xmlEntityPtr
7195xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7196 xmlChar *name;
7197 const xmlChar *ptr;
7198 xmlChar cur;
7199 xmlEntityPtr ent = NULL;
7200
7201 if ((str == NULL) || (*str == NULL))
7202 return(NULL);
7203 ptr = *str;
7204 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007205 if (cur != '&')
7206 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007207
Daniel Veillard0161e632008-08-28 15:36:32 +00007208 ptr++;
7209 cur = *ptr;
7210 name = xmlParseStringName(ctxt, &ptr);
7211 if (name == NULL) {
7212 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7213 "xmlParseStringEntityRef: no name\n");
7214 *str = ptr;
7215 return(NULL);
7216 }
7217 if (*ptr != ';') {
7218 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7219 *str = ptr;
7220 return(NULL);
7221 }
7222 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007223
Owen Taylor3473f882001-02-23 17:55:21 +00007224
Daniel Veillard0161e632008-08-28 15:36:32 +00007225 /*
7226 * Predefined entites override any extra definition
7227 */
7228 ent = xmlGetPredefinedEntity(name);
7229 if (ent != NULL)
7230 return(ent);
Owen Taylor3473f882001-02-23 17:55:21 +00007231
Daniel Veillard0161e632008-08-28 15:36:32 +00007232 /*
7233 * Increate the number of entity references parsed
7234 */
7235 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007236
Daniel Veillard0161e632008-08-28 15:36:32 +00007237 /*
7238 * Ask first SAX for entity resolution, otherwise try the
7239 * entities which may have stored in the parser context.
7240 */
7241 if (ctxt->sax != NULL) {
7242 if (ctxt->sax->getEntity != NULL)
7243 ent = ctxt->sax->getEntity(ctxt->userData, name);
7244 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7245 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007246 }
7247 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007248
7249 /*
7250 * [ WFC: Entity Declared ]
7251 * In a document without any DTD, a document with only an
7252 * internal DTD subset which contains no parameter entity
7253 * references, or a document with "standalone='yes'", the
7254 * Name given in the entity reference must match that in an
7255 * entity declaration, except that well-formed documents
7256 * need not declare any of the following entities: amp, lt,
7257 * gt, apos, quot.
7258 * The declaration of a parameter entity must precede any
7259 * reference to it.
7260 * Similarly, the declaration of a general entity must
7261 * precede any reference to it which appears in a default
7262 * value in an attribute-list declaration. Note that if
7263 * entities are declared in the external subset or in
7264 * external parameter entities, a non-validating processor
7265 * is not obligated to read and process their declarations;
7266 * for such documents, the rule that an entity must be
7267 * declared is a well-formedness constraint only if
7268 * standalone='yes'.
7269 */
7270 if (ent == NULL) {
7271 if ((ctxt->standalone == 1) ||
7272 ((ctxt->hasExternalSubset == 0) &&
7273 (ctxt->hasPErefs == 0))) {
7274 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7275 "Entity '%s' not defined\n", name);
7276 } else {
7277 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7278 "Entity '%s' not defined\n",
7279 name);
7280 }
7281 /* TODO ? check regressions ctxt->valid = 0; */
7282 }
7283
7284 /*
7285 * [ WFC: Parsed Entity ]
7286 * An entity reference must not contain the name of an
7287 * unparsed entity
7288 */
7289 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7290 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7291 "Entity reference to unparsed entity %s\n", name);
7292 }
7293
7294 /*
7295 * [ WFC: No External Entity References ]
7296 * Attribute values cannot contain direct or indirect
7297 * entity references to external entities.
7298 */
7299 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7300 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7301 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7302 "Attribute references external entity '%s'\n", name);
7303 }
7304 /*
7305 * [ WFC: No < in Attribute Values ]
7306 * The replacement text of any entity referred to directly or
7307 * indirectly in an attribute value (other than "&lt;") must
7308 * not contain a <.
7309 */
7310 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7311 (ent != NULL) && (ent->content != NULL) &&
7312 (xmlStrchr(ent->content, '<'))) {
7313 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7314 "'<' in entity '%s' is not allowed in attributes values\n",
7315 name);
7316 }
7317
7318 /*
7319 * Internal check, no parameter entities here ...
7320 */
7321 else {
7322 switch (ent->etype) {
7323 case XML_INTERNAL_PARAMETER_ENTITY:
7324 case XML_EXTERNAL_PARAMETER_ENTITY:
7325 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7326 "Attempt to reference the parameter entity '%s'\n",
7327 name);
7328 break;
7329 default:
7330 break;
7331 }
7332 }
7333
7334 /*
7335 * [ WFC: No Recursion ]
7336 * A parsed entity must not contain a recursive reference
7337 * to itself, either directly or indirectly.
7338 * Done somewhere else
7339 */
7340
7341 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007342 *str = ptr;
7343 return(ent);
7344}
7345
7346/**
7347 * xmlParsePEReference:
7348 * @ctxt: an XML parser context
7349 *
7350 * parse PEReference declarations
7351 * The entity content is handled directly by pushing it's content as
7352 * a new input stream.
7353 *
7354 * [69] PEReference ::= '%' Name ';'
7355 *
7356 * [ WFC: No Recursion ]
7357 * A parsed entity must not contain a recursive
7358 * reference to itself, either directly or indirectly.
7359 *
7360 * [ WFC: Entity Declared ]
7361 * In a document without any DTD, a document with only an internal DTD
7362 * subset which contains no parameter entity references, or a document
7363 * with "standalone='yes'", ... ... The declaration of a parameter
7364 * entity must precede any reference to it...
7365 *
7366 * [ VC: Entity Declared ]
7367 * In a document with an external subset or external parameter entities
7368 * with "standalone='no'", ... ... The declaration of a parameter entity
7369 * must precede any reference to it...
7370 *
7371 * [ WFC: In DTD ]
7372 * Parameter-entity references may only appear in the DTD.
7373 * NOTE: misleading but this is handled.
7374 */
7375void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007376xmlParsePEReference(xmlParserCtxtPtr ctxt)
7377{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007378 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007379 xmlEntityPtr entity = NULL;
7380 xmlParserInputPtr input;
7381
Daniel Veillard0161e632008-08-28 15:36:32 +00007382 if (RAW != '%')
7383 return;
7384 NEXT;
7385 name = xmlParseName(ctxt);
7386 if (name == NULL) {
7387 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7388 "xmlParsePEReference: no name\n");
7389 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007390 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007391 if (RAW != ';') {
7392 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7393 return;
7394 }
7395
7396 NEXT;
7397
7398 /*
7399 * Increate the number of entity references parsed
7400 */
7401 ctxt->nbentities++;
7402
7403 /*
7404 * Request the entity from SAX
7405 */
7406 if ((ctxt->sax != NULL) &&
7407 (ctxt->sax->getParameterEntity != NULL))
7408 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7409 name);
7410 if (entity == NULL) {
7411 /*
7412 * [ WFC: Entity Declared ]
7413 * In a document without any DTD, a document with only an
7414 * internal DTD subset which contains no parameter entity
7415 * references, or a document with "standalone='yes'", ...
7416 * ... The declaration of a parameter entity must precede
7417 * any reference to it...
7418 */
7419 if ((ctxt->standalone == 1) ||
7420 ((ctxt->hasExternalSubset == 0) &&
7421 (ctxt->hasPErefs == 0))) {
7422 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7423 "PEReference: %%%s; not found\n",
7424 name);
7425 } else {
7426 /*
7427 * [ VC: Entity Declared ]
7428 * In a document with an external subset or external
7429 * parameter entities with "standalone='no'", ...
7430 * ... The declaration of a parameter entity must
7431 * precede any reference to it...
7432 */
7433 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7434 "PEReference: %%%s; not found\n",
7435 name, NULL);
7436 ctxt->valid = 0;
7437 }
7438 } else {
7439 /*
7440 * Internal checking in case the entity quest barfed
7441 */
7442 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7443 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7444 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7445 "Internal: %%%s; is not a parameter entity\n",
7446 name, NULL);
7447 } else if (ctxt->input->free != deallocblankswrapper) {
7448 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7449 if (xmlPushInput(ctxt, input) < 0)
7450 return;
7451 } else {
7452 /*
7453 * TODO !!!
7454 * handle the extra spaces added before and after
7455 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7456 */
7457 input = xmlNewEntityInputStream(ctxt, entity);
7458 if (xmlPushInput(ctxt, input) < 0)
7459 return;
7460 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7461 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7462 (IS_BLANK_CH(NXT(5)))) {
7463 xmlParseTextDecl(ctxt);
7464 if (ctxt->errNo ==
7465 XML_ERR_UNSUPPORTED_ENCODING) {
7466 /*
7467 * The XML REC instructs us to stop parsing
7468 * right here
7469 */
7470 ctxt->instate = XML_PARSER_EOF;
7471 return;
7472 }
7473 }
7474 }
7475 }
7476 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007477}
7478
7479/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007480 * xmlLoadEntityContent:
7481 * @ctxt: an XML parser context
7482 * @entity: an unloaded system entity
7483 *
7484 * Load the original content of the given system entity from the
7485 * ExternalID/SystemID given. This is to be used for Included in Literal
7486 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7487 *
7488 * Returns 0 in case of success and -1 in case of failure
7489 */
7490static int
7491xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7492 xmlParserInputPtr input;
7493 xmlBufferPtr buf;
7494 int l, c;
7495 int count = 0;
7496
7497 if ((ctxt == NULL) || (entity == NULL) ||
7498 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7499 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7500 (entity->content != NULL)) {
7501 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7502 "xmlLoadEntityContent parameter error");
7503 return(-1);
7504 }
7505
7506 if (xmlParserDebugEntities)
7507 xmlGenericError(xmlGenericErrorContext,
7508 "Reading %s entity content input\n", entity->name);
7509
7510 buf = xmlBufferCreate();
7511 if (buf == NULL) {
7512 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7513 "xmlLoadEntityContent parameter error");
7514 return(-1);
7515 }
7516
7517 input = xmlNewEntityInputStream(ctxt, entity);
7518 if (input == NULL) {
7519 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7520 "xmlLoadEntityContent input error");
7521 xmlBufferFree(buf);
7522 return(-1);
7523 }
7524
7525 /*
7526 * Push the entity as the current input, read char by char
7527 * saving to the buffer until the end of the entity or an error
7528 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007529 if (xmlPushInput(ctxt, input) < 0) {
7530 xmlBufferFree(buf);
7531 return(-1);
7532 }
7533
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007534 GROW;
7535 c = CUR_CHAR(l);
7536 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7537 (IS_CHAR(c))) {
7538 xmlBufferAdd(buf, ctxt->input->cur, l);
7539 if (count++ > 100) {
7540 count = 0;
7541 GROW;
7542 }
7543 NEXTL(l);
7544 c = CUR_CHAR(l);
7545 }
7546
7547 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7548 xmlPopInput(ctxt);
7549 } else if (!IS_CHAR(c)) {
7550 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7551 "xmlLoadEntityContent: invalid char value %d\n",
7552 c);
7553 xmlBufferFree(buf);
7554 return(-1);
7555 }
7556 entity->content = buf->content;
7557 buf->content = NULL;
7558 xmlBufferFree(buf);
7559
7560 return(0);
7561}
7562
7563/**
Owen Taylor3473f882001-02-23 17:55:21 +00007564 * xmlParseStringPEReference:
7565 * @ctxt: an XML parser context
7566 * @str: a pointer to an index in the string
7567 *
7568 * parse PEReference declarations
7569 *
7570 * [69] PEReference ::= '%' Name ';'
7571 *
7572 * [ WFC: No Recursion ]
7573 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007574 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007575 *
7576 * [ WFC: Entity Declared ]
7577 * In a document without any DTD, a document with only an internal DTD
7578 * subset which contains no parameter entity references, or a document
7579 * with "standalone='yes'", ... ... The declaration of a parameter
7580 * entity must precede any reference to it...
7581 *
7582 * [ VC: Entity Declared ]
7583 * In a document with an external subset or external parameter entities
7584 * with "standalone='no'", ... ... The declaration of a parameter entity
7585 * must precede any reference to it...
7586 *
7587 * [ WFC: In DTD ]
7588 * Parameter-entity references may only appear in the DTD.
7589 * NOTE: misleading but this is handled.
7590 *
7591 * Returns the string of the entity content.
7592 * str is updated to the current value of the index
7593 */
7594xmlEntityPtr
7595xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7596 const xmlChar *ptr;
7597 xmlChar cur;
7598 xmlChar *name;
7599 xmlEntityPtr entity = NULL;
7600
7601 if ((str == NULL) || (*str == NULL)) return(NULL);
7602 ptr = *str;
7603 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007604 if (cur != '%')
7605 return(NULL);
7606 ptr++;
7607 cur = *ptr;
7608 name = xmlParseStringName(ctxt, &ptr);
7609 if (name == NULL) {
7610 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7611 "xmlParseStringPEReference: no name\n");
7612 *str = ptr;
7613 return(NULL);
7614 }
7615 cur = *ptr;
7616 if (cur != ';') {
7617 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7618 xmlFree(name);
7619 *str = ptr;
7620 return(NULL);
7621 }
7622 ptr++;
7623
7624 /*
7625 * Increate the number of entity references parsed
7626 */
7627 ctxt->nbentities++;
7628
7629 /*
7630 * Request the entity from SAX
7631 */
7632 if ((ctxt->sax != NULL) &&
7633 (ctxt->sax->getParameterEntity != NULL))
7634 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7635 name);
7636 if (entity == NULL) {
7637 /*
7638 * [ WFC: Entity Declared ]
7639 * In a document without any DTD, a document with only an
7640 * internal DTD subset which contains no parameter entity
7641 * references, or a document with "standalone='yes'", ...
7642 * ... The declaration of a parameter entity must precede
7643 * any reference to it...
7644 */
7645 if ((ctxt->standalone == 1) ||
7646 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7647 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7648 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007649 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007650 /*
7651 * [ VC: Entity Declared ]
7652 * In a document with an external subset or external
7653 * parameter entities with "standalone='no'", ...
7654 * ... The declaration of a parameter entity must
7655 * precede any reference to it...
7656 */
7657 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7658 "PEReference: %%%s; not found\n",
7659 name, NULL);
7660 ctxt->valid = 0;
7661 }
7662 } else {
7663 /*
7664 * Internal checking in case the entity quest barfed
7665 */
7666 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7667 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7668 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7669 "%%%s; is not a parameter entity\n",
7670 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007671 }
7672 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007673 ctxt->hasPErefs = 1;
7674 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007675 *str = ptr;
7676 return(entity);
7677}
7678
7679/**
7680 * xmlParseDocTypeDecl:
7681 * @ctxt: an XML parser context
7682 *
7683 * parse a DOCTYPE declaration
7684 *
7685 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7686 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7687 *
7688 * [ VC: Root Element Type ]
7689 * The Name in the document type declaration must match the element
7690 * type of the root element.
7691 */
7692
7693void
7694xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007695 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007696 xmlChar *ExternalID = NULL;
7697 xmlChar *URI = NULL;
7698
7699 /*
7700 * We know that '<!DOCTYPE' has been detected.
7701 */
7702 SKIP(9);
7703
7704 SKIP_BLANKS;
7705
7706 /*
7707 * Parse the DOCTYPE name.
7708 */
7709 name = xmlParseName(ctxt);
7710 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007711 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7712 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007713 }
7714 ctxt->intSubName = name;
7715
7716 SKIP_BLANKS;
7717
7718 /*
7719 * Check for SystemID and ExternalID
7720 */
7721 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7722
7723 if ((URI != NULL) || (ExternalID != NULL)) {
7724 ctxt->hasExternalSubset = 1;
7725 }
7726 ctxt->extSubURI = URI;
7727 ctxt->extSubSystem = ExternalID;
7728
7729 SKIP_BLANKS;
7730
7731 /*
7732 * Create and update the internal subset.
7733 */
7734 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7735 (!ctxt->disableSAX))
7736 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7737
7738 /*
7739 * Is there any internal subset declarations ?
7740 * they are handled separately in xmlParseInternalSubset()
7741 */
7742 if (RAW == '[')
7743 return;
7744
7745 /*
7746 * We should be at the end of the DOCTYPE declaration.
7747 */
7748 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007749 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007750 }
7751 NEXT;
7752}
7753
7754/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007755 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007756 * @ctxt: an XML parser context
7757 *
7758 * parse the internal subset declaration
7759 *
7760 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7761 */
7762
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007763static void
Owen Taylor3473f882001-02-23 17:55:21 +00007764xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7765 /*
7766 * Is there any DTD definition ?
7767 */
7768 if (RAW == '[') {
7769 ctxt->instate = XML_PARSER_DTD;
7770 NEXT;
7771 /*
7772 * Parse the succession of Markup declarations and
7773 * PEReferences.
7774 * Subsequence (markupdecl | PEReference | S)*
7775 */
7776 while (RAW != ']') {
7777 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007778 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007779
7780 SKIP_BLANKS;
7781 xmlParseMarkupDecl(ctxt);
7782 xmlParsePEReference(ctxt);
7783
7784 /*
7785 * Pop-up of finished entities.
7786 */
7787 while ((RAW == 0) && (ctxt->inputNr > 1))
7788 xmlPopInput(ctxt);
7789
7790 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007791 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007792 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007793 break;
7794 }
7795 }
7796 if (RAW == ']') {
7797 NEXT;
7798 SKIP_BLANKS;
7799 }
7800 }
7801
7802 /*
7803 * We should be at the end of the DOCTYPE declaration.
7804 */
7805 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007806 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007807 }
7808 NEXT;
7809}
7810
Daniel Veillard81273902003-09-30 00:43:48 +00007811#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007812/**
7813 * xmlParseAttribute:
7814 * @ctxt: an XML parser context
7815 * @value: a xmlChar ** used to store the value of the attribute
7816 *
7817 * parse an attribute
7818 *
7819 * [41] Attribute ::= Name Eq AttValue
7820 *
7821 * [ WFC: No External Entity References ]
7822 * Attribute values cannot contain direct or indirect entity references
7823 * to external entities.
7824 *
7825 * [ WFC: No < in Attribute Values ]
7826 * The replacement text of any entity referred to directly or indirectly in
7827 * an attribute value (other than "&lt;") must not contain a <.
7828 *
7829 * [ VC: Attribute Value Type ]
7830 * The attribute must have been declared; the value must be of the type
7831 * declared for it.
7832 *
7833 * [25] Eq ::= S? '=' S?
7834 *
7835 * With namespace:
7836 *
7837 * [NS 11] Attribute ::= QName Eq AttValue
7838 *
7839 * Also the case QName == xmlns:??? is handled independently as a namespace
7840 * definition.
7841 *
7842 * Returns the attribute name, and the value in *value.
7843 */
7844
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007845const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007846xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007847 const xmlChar *name;
7848 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007849
7850 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007851 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007852 name = xmlParseName(ctxt);
7853 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007854 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007855 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007856 return(NULL);
7857 }
7858
7859 /*
7860 * read the value
7861 */
7862 SKIP_BLANKS;
7863 if (RAW == '=') {
7864 NEXT;
7865 SKIP_BLANKS;
7866 val = xmlParseAttValue(ctxt);
7867 ctxt->instate = XML_PARSER_CONTENT;
7868 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007869 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007870 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007871 return(NULL);
7872 }
7873
7874 /*
7875 * Check that xml:lang conforms to the specification
7876 * No more registered as an error, just generate a warning now
7877 * since this was deprecated in XML second edition
7878 */
7879 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7880 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007881 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7882 "Malformed value for xml:lang : %s\n",
7883 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007884 }
7885 }
7886
7887 /*
7888 * Check that xml:space conforms to the specification
7889 */
7890 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7891 if (xmlStrEqual(val, BAD_CAST "default"))
7892 *(ctxt->space) = 0;
7893 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7894 *(ctxt->space) = 1;
7895 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007896 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007897"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007898 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007899 }
7900 }
7901
7902 *value = val;
7903 return(name);
7904}
7905
7906/**
7907 * xmlParseStartTag:
7908 * @ctxt: an XML parser context
7909 *
7910 * parse a start of tag either for rule element or
7911 * EmptyElement. In both case we don't parse the tag closing chars.
7912 *
7913 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7914 *
7915 * [ WFC: Unique Att Spec ]
7916 * No attribute name may appear more than once in the same start-tag or
7917 * empty-element tag.
7918 *
7919 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7920 *
7921 * [ WFC: Unique Att Spec ]
7922 * No attribute name may appear more than once in the same start-tag or
7923 * empty-element tag.
7924 *
7925 * With namespace:
7926 *
7927 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7928 *
7929 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7930 *
7931 * Returns the element name parsed
7932 */
7933
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007934const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007935xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007936 const xmlChar *name;
7937 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007938 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007939 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007940 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007941 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007942 int i;
7943
7944 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007945 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007946
7947 name = xmlParseName(ctxt);
7948 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007949 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007950 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007951 return(NULL);
7952 }
7953
7954 /*
7955 * Now parse the attributes, it ends up with the ending
7956 *
7957 * (S Attribute)* S?
7958 */
7959 SKIP_BLANKS;
7960 GROW;
7961
Daniel Veillard21a0f912001-02-25 19:54:14 +00007962 while ((RAW != '>') &&
7963 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007964 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007965 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007966 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007967
7968 attname = xmlParseAttribute(ctxt, &attvalue);
7969 if ((attname != NULL) && (attvalue != NULL)) {
7970 /*
7971 * [ WFC: Unique Att Spec ]
7972 * No attribute name may appear more than once in the same
7973 * start-tag or empty-element tag.
7974 */
7975 for (i = 0; i < nbatts;i += 2) {
7976 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007977 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007978 xmlFree(attvalue);
7979 goto failed;
7980 }
7981 }
Owen Taylor3473f882001-02-23 17:55:21 +00007982 /*
7983 * Add the pair to atts
7984 */
7985 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007986 maxatts = 22; /* allow for 10 attrs by default */
7987 atts = (const xmlChar **)
7988 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007989 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007990 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007991 if (attvalue != NULL)
7992 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007993 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007994 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007995 ctxt->atts = atts;
7996 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007997 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007998 const xmlChar **n;
7999
Owen Taylor3473f882001-02-23 17:55:21 +00008000 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008001 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008002 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008003 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008004 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008005 if (attvalue != NULL)
8006 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008007 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008008 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008009 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008010 ctxt->atts = atts;
8011 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008012 }
8013 atts[nbatts++] = attname;
8014 atts[nbatts++] = attvalue;
8015 atts[nbatts] = NULL;
8016 atts[nbatts + 1] = NULL;
8017 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008018 if (attvalue != NULL)
8019 xmlFree(attvalue);
8020 }
8021
8022failed:
8023
Daniel Veillard3772de32002-12-17 10:31:45 +00008024 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008025 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8026 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008027 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008028 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8029 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008030 }
8031 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008032 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8033 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008034 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8035 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008036 break;
8037 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008038 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008039 GROW;
8040 }
8041
8042 /*
8043 * SAX: Start of Element !
8044 */
8045 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008046 (!ctxt->disableSAX)) {
8047 if (nbatts > 0)
8048 ctxt->sax->startElement(ctxt->userData, name, atts);
8049 else
8050 ctxt->sax->startElement(ctxt->userData, name, NULL);
8051 }
Owen Taylor3473f882001-02-23 17:55:21 +00008052
8053 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008054 /* Free only the content strings */
8055 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008056 if (atts[i] != NULL)
8057 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008058 }
8059 return(name);
8060}
8061
8062/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008063 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008064 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008065 * @line: line of the start tag
8066 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008067 *
8068 * parse an end of tag
8069 *
8070 * [42] ETag ::= '</' Name S? '>'
8071 *
8072 * With namespace
8073 *
8074 * [NS 9] ETag ::= '</' QName S? '>'
8075 */
8076
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008077static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008078xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008079 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008080
8081 GROW;
8082 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008083 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008084 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008085 return;
8086 }
8087 SKIP(2);
8088
Daniel Veillard46de64e2002-05-29 08:21:33 +00008089 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008090
8091 /*
8092 * We should definitely be at the ending "S? '>'" part
8093 */
8094 GROW;
8095 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008096 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008097 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008098 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008099 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008100
8101 /*
8102 * [ WFC: Element Type Match ]
8103 * The Name in an element's end-tag must match the element type in the
8104 * start-tag.
8105 *
8106 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008107 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008108 if (name == NULL) name = BAD_CAST "unparseable";
8109 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008110 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008111 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008112 }
8113
8114 /*
8115 * SAX: End of Tag
8116 */
8117 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8118 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008119 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008120
Daniel Veillarde57ec792003-09-10 10:50:59 +00008121 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008122 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008123 return;
8124}
8125
8126/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008127 * xmlParseEndTag:
8128 * @ctxt: an XML parser context
8129 *
8130 * parse an end of tag
8131 *
8132 * [42] ETag ::= '</' Name S? '>'
8133 *
8134 * With namespace
8135 *
8136 * [NS 9] ETag ::= '</' QName S? '>'
8137 */
8138
8139void
8140xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008141 xmlParseEndTag1(ctxt, 0);
8142}
Daniel Veillard81273902003-09-30 00:43:48 +00008143#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008144
8145/************************************************************************
8146 * *
8147 * SAX 2 specific operations *
8148 * *
8149 ************************************************************************/
8150
Daniel Veillard0fb18932003-09-07 09:14:37 +00008151/*
8152 * xmlGetNamespace:
8153 * @ctxt: an XML parser context
8154 * @prefix: the prefix to lookup
8155 *
8156 * Lookup the namespace name for the @prefix (which ca be NULL)
8157 * The prefix must come from the @ctxt->dict dictionnary
8158 *
8159 * Returns the namespace name or NULL if not bound
8160 */
8161static const xmlChar *
8162xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8163 int i;
8164
Daniel Veillarde57ec792003-09-10 10:50:59 +00008165 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008166 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008167 if (ctxt->nsTab[i] == prefix) {
8168 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8169 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008170 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008171 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008172 return(NULL);
8173}
8174
8175/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008176 * xmlParseQName:
8177 * @ctxt: an XML parser context
8178 * @prefix: pointer to store the prefix part
8179 *
8180 * parse an XML Namespace QName
8181 *
8182 * [6] QName ::= (Prefix ':')? LocalPart
8183 * [7] Prefix ::= NCName
8184 * [8] LocalPart ::= NCName
8185 *
8186 * Returns the Name parsed or NULL
8187 */
8188
8189static const xmlChar *
8190xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8191 const xmlChar *l, *p;
8192
8193 GROW;
8194
8195 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008196 if (l == NULL) {
8197 if (CUR == ':') {
8198 l = xmlParseName(ctxt);
8199 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008200 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8201 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008202 *prefix = NULL;
8203 return(l);
8204 }
8205 }
8206 return(NULL);
8207 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008208 if (CUR == ':') {
8209 NEXT;
8210 p = l;
8211 l = xmlParseNCName(ctxt);
8212 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008213 xmlChar *tmp;
8214
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008215 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8216 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008217 l = xmlParseNmtoken(ctxt);
8218 if (l == NULL)
8219 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8220 else {
8221 tmp = xmlBuildQName(l, p, NULL, 0);
8222 xmlFree((char *)l);
8223 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008224 p = xmlDictLookup(ctxt->dict, tmp, -1);
8225 if (tmp != NULL) xmlFree(tmp);
8226 *prefix = NULL;
8227 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008228 }
8229 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008230 xmlChar *tmp;
8231
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008232 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8233 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008234 NEXT;
8235 tmp = (xmlChar *) xmlParseName(ctxt);
8236 if (tmp != NULL) {
8237 tmp = xmlBuildQName(tmp, l, NULL, 0);
8238 l = xmlDictLookup(ctxt->dict, tmp, -1);
8239 if (tmp != NULL) xmlFree(tmp);
8240 *prefix = p;
8241 return(l);
8242 }
8243 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8244 l = xmlDictLookup(ctxt->dict, tmp, -1);
8245 if (tmp != NULL) xmlFree(tmp);
8246 *prefix = p;
8247 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008248 }
8249 *prefix = p;
8250 } else
8251 *prefix = NULL;
8252 return(l);
8253}
8254
8255/**
8256 * xmlParseQNameAndCompare:
8257 * @ctxt: an XML parser context
8258 * @name: the localname
8259 * @prefix: the prefix, if any.
8260 *
8261 * parse an XML name and compares for match
8262 * (specialized for endtag parsing)
8263 *
8264 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8265 * and the name for mismatch
8266 */
8267
8268static const xmlChar *
8269xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8270 xmlChar const *prefix) {
8271 const xmlChar *cmp = name;
8272 const xmlChar *in;
8273 const xmlChar *ret;
8274 const xmlChar *prefix2;
8275
8276 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8277
8278 GROW;
8279 in = ctxt->input->cur;
8280
8281 cmp = prefix;
8282 while (*in != 0 && *in == *cmp) {
8283 ++in;
8284 ++cmp;
8285 }
8286 if ((*cmp == 0) && (*in == ':')) {
8287 in++;
8288 cmp = name;
8289 while (*in != 0 && *in == *cmp) {
8290 ++in;
8291 ++cmp;
8292 }
William M. Brack76e95df2003-10-18 16:20:14 +00008293 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008294 /* success */
8295 ctxt->input->cur = in;
8296 return((const xmlChar*) 1);
8297 }
8298 }
8299 /*
8300 * all strings coms from the dictionary, equality can be done directly
8301 */
8302 ret = xmlParseQName (ctxt, &prefix2);
8303 if ((ret == name) && (prefix == prefix2))
8304 return((const xmlChar*) 1);
8305 return ret;
8306}
8307
8308/**
8309 * xmlParseAttValueInternal:
8310 * @ctxt: an XML parser context
8311 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008312 * @alloc: whether the attribute was reallocated as a new string
8313 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008314 *
8315 * parse a value for an attribute.
8316 * NOTE: if no normalization is needed, the routine will return pointers
8317 * directly from the data buffer.
8318 *
8319 * 3.3.3 Attribute-Value Normalization:
8320 * Before the value of an attribute is passed to the application or
8321 * checked for validity, the XML processor must normalize it as follows:
8322 * - a character reference is processed by appending the referenced
8323 * character to the attribute value
8324 * - an entity reference is processed by recursively processing the
8325 * replacement text of the entity
8326 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8327 * appending #x20 to the normalized value, except that only a single
8328 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8329 * parsed entity or the literal entity value of an internal parsed entity
8330 * - other characters are processed by appending them to the normalized value
8331 * If the declared value is not CDATA, then the XML processor must further
8332 * process the normalized attribute value by discarding any leading and
8333 * trailing space (#x20) characters, and by replacing sequences of space
8334 * (#x20) characters by a single space (#x20) character.
8335 * All attributes for which no declaration has been read should be treated
8336 * by a non-validating parser as if declared CDATA.
8337 *
8338 * Returns the AttValue parsed or NULL. The value has to be freed by the
8339 * caller if it was copied, this can be detected by val[*len] == 0.
8340 */
8341
8342static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008343xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8344 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008345{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008346 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008347 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008348 xmlChar *ret = NULL;
8349
8350 GROW;
8351 in = (xmlChar *) CUR_PTR;
8352 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008353 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008354 return (NULL);
8355 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008356 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008357
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008358 /*
8359 * try to handle in this routine the most common case where no
8360 * allocation of a new string is required and where content is
8361 * pure ASCII.
8362 */
8363 limit = *in++;
8364 end = ctxt->input->end;
8365 start = in;
8366 if (in >= end) {
8367 const xmlChar *oldbase = ctxt->input->base;
8368 GROW;
8369 if (oldbase != ctxt->input->base) {
8370 long delta = ctxt->input->base - oldbase;
8371 start = start + delta;
8372 in = in + delta;
8373 }
8374 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008375 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008376 if (normalize) {
8377 /*
8378 * Skip any leading spaces
8379 */
8380 while ((in < end) && (*in != limit) &&
8381 ((*in == 0x20) || (*in == 0x9) ||
8382 (*in == 0xA) || (*in == 0xD))) {
8383 in++;
8384 start = in;
8385 if (in >= end) {
8386 const xmlChar *oldbase = ctxt->input->base;
8387 GROW;
8388 if (oldbase != ctxt->input->base) {
8389 long delta = ctxt->input->base - oldbase;
8390 start = start + delta;
8391 in = in + delta;
8392 }
8393 end = ctxt->input->end;
8394 }
8395 }
8396 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8397 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8398 if ((*in++ == 0x20) && (*in == 0x20)) break;
8399 if (in >= end) {
8400 const xmlChar *oldbase = ctxt->input->base;
8401 GROW;
8402 if (oldbase != ctxt->input->base) {
8403 long delta = ctxt->input->base - oldbase;
8404 start = start + delta;
8405 in = in + delta;
8406 }
8407 end = ctxt->input->end;
8408 }
8409 }
8410 last = in;
8411 /*
8412 * skip the trailing blanks
8413 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008414 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008415 while ((in < end) && (*in != limit) &&
8416 ((*in == 0x20) || (*in == 0x9) ||
8417 (*in == 0xA) || (*in == 0xD))) {
8418 in++;
8419 if (in >= end) {
8420 const xmlChar *oldbase = ctxt->input->base;
8421 GROW;
8422 if (oldbase != ctxt->input->base) {
8423 long delta = ctxt->input->base - oldbase;
8424 start = start + delta;
8425 in = in + delta;
8426 last = last + delta;
8427 }
8428 end = ctxt->input->end;
8429 }
8430 }
8431 if (*in != limit) goto need_complex;
8432 } else {
8433 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8434 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8435 in++;
8436 if (in >= end) {
8437 const xmlChar *oldbase = ctxt->input->base;
8438 GROW;
8439 if (oldbase != ctxt->input->base) {
8440 long delta = ctxt->input->base - oldbase;
8441 start = start + delta;
8442 in = in + delta;
8443 }
8444 end = ctxt->input->end;
8445 }
8446 }
8447 last = in;
8448 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008449 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008450 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008451 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008452 *len = last - start;
8453 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008454 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008455 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008456 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008457 }
8458 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008459 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008460 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008461need_complex:
8462 if (alloc) *alloc = 1;
8463 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008464}
8465
8466/**
8467 * xmlParseAttribute2:
8468 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008469 * @pref: the element prefix
8470 * @elem: the element name
8471 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008472 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008473 * @len: an int * to save the length of the attribute
8474 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008475 *
8476 * parse an attribute in the new SAX2 framework.
8477 *
8478 * Returns the attribute name, and the value in *value, .
8479 */
8480
8481static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008482xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008483 const xmlChar * pref, const xmlChar * elem,
8484 const xmlChar ** prefix, xmlChar ** value,
8485 int *len, int *alloc)
8486{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008487 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008488 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008489 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008490
8491 *value = NULL;
8492 GROW;
8493 name = xmlParseQName(ctxt, prefix);
8494 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008495 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8496 "error parsing attribute name\n");
8497 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008498 }
8499
8500 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008501 * get the type if needed
8502 */
8503 if (ctxt->attsSpecial != NULL) {
8504 int type;
8505
8506 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008507 pref, elem, *prefix, name);
8508 if (type != 0)
8509 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008510 }
8511
8512 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008513 * read the value
8514 */
8515 SKIP_BLANKS;
8516 if (RAW == '=') {
8517 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008518 SKIP_BLANKS;
8519 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8520 if (normalize) {
8521 /*
8522 * Sometimes a second normalisation pass for spaces is needed
8523 * but that only happens if charrefs or entities refernces
8524 * have been used in the attribute value, i.e. the attribute
8525 * value have been extracted in an allocated string already.
8526 */
8527 if (*alloc) {
8528 const xmlChar *val2;
8529
8530 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008531 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008532 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008533 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008534 }
8535 }
8536 }
8537 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008538 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008539 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8540 "Specification mandate value for attribute %s\n",
8541 name);
8542 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008543 }
8544
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008545 if (*prefix == ctxt->str_xml) {
8546 /*
8547 * Check that xml:lang conforms to the specification
8548 * No more registered as an error, just generate a warning now
8549 * since this was deprecated in XML second edition
8550 */
8551 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8552 internal_val = xmlStrndup(val, *len);
8553 if (!xmlCheckLanguageID(internal_val)) {
8554 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8555 "Malformed value for xml:lang : %s\n",
8556 internal_val, NULL);
8557 }
8558 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008559
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008560 /*
8561 * Check that xml:space conforms to the specification
8562 */
8563 if (xmlStrEqual(name, BAD_CAST "space")) {
8564 internal_val = xmlStrndup(val, *len);
8565 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8566 *(ctxt->space) = 0;
8567 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8568 *(ctxt->space) = 1;
8569 else {
8570 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8571 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8572 internal_val, NULL);
8573 }
8574 }
8575 if (internal_val) {
8576 xmlFree(internal_val);
8577 }
8578 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008579
8580 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008581 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008582}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008583/**
8584 * xmlParseStartTag2:
8585 * @ctxt: an XML parser context
8586 *
8587 * parse a start of tag either for rule element or
8588 * EmptyElement. In both case we don't parse the tag closing chars.
8589 * This routine is called when running SAX2 parsing
8590 *
8591 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8592 *
8593 * [ WFC: Unique Att Spec ]
8594 * No attribute name may appear more than once in the same start-tag or
8595 * empty-element tag.
8596 *
8597 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8598 *
8599 * [ WFC: Unique Att Spec ]
8600 * No attribute name may appear more than once in the same start-tag or
8601 * empty-element tag.
8602 *
8603 * With namespace:
8604 *
8605 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8606 *
8607 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8608 *
8609 * Returns the element name parsed
8610 */
8611
8612static const xmlChar *
8613xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008614 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008615 const xmlChar *localname;
8616 const xmlChar *prefix;
8617 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008618 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008619 const xmlChar *nsname;
8620 xmlChar *attvalue;
8621 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008622 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008623 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008624 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008625 const xmlChar *base;
8626 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008627 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008628
8629 if (RAW != '<') return(NULL);
8630 NEXT1;
8631
8632 /*
8633 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8634 * point since the attribute values may be stored as pointers to
8635 * the buffer and calling SHRINK would destroy them !
8636 * The Shrinking is only possible once the full set of attribute
8637 * callbacks have been done.
8638 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008639reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008640 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008641 base = ctxt->input->base;
8642 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008643 oldline = ctxt->input->line;
8644 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008645 nbatts = 0;
8646 nratts = 0;
8647 nbdef = 0;
8648 nbNs = 0;
8649 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008650 /* Forget any namespaces added during an earlier parse of this element. */
8651 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008652
8653 localname = xmlParseQName(ctxt, &prefix);
8654 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008655 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8656 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008657 return(NULL);
8658 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008659 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008660
8661 /*
8662 * Now parse the attributes, it ends up with the ending
8663 *
8664 * (S Attribute)* S?
8665 */
8666 SKIP_BLANKS;
8667 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008668 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008669
8670 while ((RAW != '>') &&
8671 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008672 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008673 const xmlChar *q = CUR_PTR;
8674 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008675 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008676
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008677 attname = xmlParseAttribute2(ctxt, prefix, localname,
8678 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008679 if (ctxt->input->base != base) {
8680 if ((attvalue != NULL) && (alloc != 0))
8681 xmlFree(attvalue);
8682 attvalue = NULL;
8683 goto base_changed;
8684 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008685 if ((attname != NULL) && (attvalue != NULL)) {
8686 if (len < 0) len = xmlStrlen(attvalue);
8687 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008688 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8689 xmlURIPtr uri;
8690
8691 if (*URL != 0) {
8692 uri = xmlParseURI((const char *) URL);
8693 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008694 xmlNsErr(ctxt, XML_WAR_NS_URI,
8695 "xmlns: '%s' is not a valid URI\n",
8696 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008697 } else {
Daniel Veillard37334572008-07-31 08:20:02 +00008698 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8699 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8700 "xmlns: URI %s is not absolute\n",
8701 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008702 }
8703 xmlFreeURI(uri);
8704 }
Daniel Veillard37334572008-07-31 08:20:02 +00008705 if (URL == ctxt->str_xml_ns) {
8706 if (attname != ctxt->str_xml) {
8707 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8708 "xml namespace URI cannot be the default namespace\n",
8709 NULL, NULL, NULL);
8710 }
8711 goto skip_default_ns;
8712 }
8713 if ((len == 29) &&
8714 (xmlStrEqual(URL,
8715 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8716 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8717 "reuse of the xmlns namespace name is forbidden\n",
8718 NULL, NULL, NULL);
8719 goto skip_default_ns;
8720 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008721 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008722 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008723 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008724 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008725 for (j = 1;j <= nbNs;j++)
8726 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8727 break;
8728 if (j <= nbNs)
8729 xmlErrAttributeDup(ctxt, NULL, attname);
8730 else
8731 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008732skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008733 if (alloc != 0) xmlFree(attvalue);
8734 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008735 continue;
8736 }
8737 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008738 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8739 xmlURIPtr uri;
8740
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008741 if (attname == ctxt->str_xml) {
8742 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008743 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8744 "xml namespace prefix mapped to wrong URI\n",
8745 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008746 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008747 /*
8748 * Do not keep a namespace definition node
8749 */
Daniel Veillard37334572008-07-31 08:20:02 +00008750 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008751 }
Daniel Veillard37334572008-07-31 08:20:02 +00008752 if (URL == ctxt->str_xml_ns) {
8753 if (attname != ctxt->str_xml) {
8754 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8755 "xml namespace URI mapped to wrong prefix\n",
8756 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008757 }
Daniel Veillard37334572008-07-31 08:20:02 +00008758 goto skip_ns;
8759 }
8760 if (attname == ctxt->str_xmlns) {
8761 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8762 "redefinition of the xmlns prefix is forbidden\n",
8763 NULL, NULL, NULL);
8764 goto skip_ns;
8765 }
8766 if ((len == 29) &&
8767 (xmlStrEqual(URL,
8768 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8769 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8770 "reuse of the xmlns namespace name is forbidden\n",
8771 NULL, NULL, NULL);
8772 goto skip_ns;
8773 }
8774 if ((URL == NULL) || (URL[0] == 0)) {
8775 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8776 "xmlns:%s: Empty XML namespace is not allowed\n",
8777 attname, NULL, NULL);
8778 goto skip_ns;
8779 } else {
8780 uri = xmlParseURI((const char *) URL);
8781 if (uri == NULL) {
8782 xmlNsErr(ctxt, XML_WAR_NS_URI,
8783 "xmlns:%s: '%s' is not a valid URI\n",
8784 attname, URL, NULL);
8785 } else {
8786 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8787 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8788 "xmlns:%s: URI %s is not absolute\n",
8789 attname, URL, NULL);
8790 }
8791 xmlFreeURI(uri);
8792 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008793 }
8794
Daniel Veillard0fb18932003-09-07 09:14:37 +00008795 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008796 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008797 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008798 for (j = 1;j <= nbNs;j++)
8799 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8800 break;
8801 if (j <= nbNs)
8802 xmlErrAttributeDup(ctxt, aprefix, attname);
8803 else
8804 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008805skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008806 if (alloc != 0) xmlFree(attvalue);
8807 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008808 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008809 continue;
8810 }
8811
8812 /*
8813 * Add the pair to atts
8814 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008815 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8816 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008817 if (attvalue[len] == 0)
8818 xmlFree(attvalue);
8819 goto failed;
8820 }
8821 maxatts = ctxt->maxatts;
8822 atts = ctxt->atts;
8823 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008824 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008825 atts[nbatts++] = attname;
8826 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008827 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008828 atts[nbatts++] = attvalue;
8829 attvalue += len;
8830 atts[nbatts++] = attvalue;
8831 /*
8832 * tag if some deallocation is needed
8833 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008834 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008835 } else {
8836 if ((attvalue != NULL) && (attvalue[len] == 0))
8837 xmlFree(attvalue);
8838 }
8839
Daniel Veillard37334572008-07-31 08:20:02 +00008840failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008841
8842 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008843 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008844 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8845 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008846 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8848 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008849 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008850 }
8851 SKIP_BLANKS;
8852 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8853 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008854 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008855 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008856 break;
8857 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008858 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008859 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008860 }
8861
Daniel Veillard0fb18932003-09-07 09:14:37 +00008862 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008863 * The attributes defaulting
8864 */
8865 if (ctxt->attsDefault != NULL) {
8866 xmlDefAttrsPtr defaults;
8867
8868 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8869 if (defaults != NULL) {
8870 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008871 attname = defaults->values[5 * i];
8872 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008873
8874 /*
8875 * special work for namespaces defaulted defs
8876 */
8877 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8878 /*
8879 * check that it's not a defined namespace
8880 */
8881 for (j = 1;j <= nbNs;j++)
8882 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8883 break;
8884 if (j <= nbNs) continue;
8885
8886 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008887 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008888 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008889 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008890 nbNs++;
8891 }
8892 } else if (aprefix == ctxt->str_xmlns) {
8893 /*
8894 * check that it's not a defined namespace
8895 */
8896 for (j = 1;j <= nbNs;j++)
8897 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8898 break;
8899 if (j <= nbNs) continue;
8900
8901 nsname = xmlGetNamespace(ctxt, attname);
8902 if (nsname != defaults->values[2]) {
8903 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008904 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008905 nbNs++;
8906 }
8907 } else {
8908 /*
8909 * check that it's not a defined attribute
8910 */
8911 for (j = 0;j < nbatts;j+=5) {
8912 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8913 break;
8914 }
8915 if (j < nbatts) continue;
8916
8917 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8918 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008919 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008920 }
8921 maxatts = ctxt->maxatts;
8922 atts = ctxt->atts;
8923 }
8924 atts[nbatts++] = attname;
8925 atts[nbatts++] = aprefix;
8926 if (aprefix == NULL)
8927 atts[nbatts++] = NULL;
8928 else
8929 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008930 atts[nbatts++] = defaults->values[5 * i + 2];
8931 atts[nbatts++] = defaults->values[5 * i + 3];
8932 if ((ctxt->standalone == 1) &&
8933 (defaults->values[5 * i + 4] != NULL)) {
8934 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
8935 "standalone: attribute %s on %s defaulted from external subset\n",
8936 attname, localname);
8937 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008938 nbdef++;
8939 }
8940 }
8941 }
8942 }
8943
Daniel Veillarde70c8772003-11-25 07:21:18 +00008944 /*
8945 * The attributes checkings
8946 */
8947 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008948 /*
8949 * The default namespace does not apply to attribute names.
8950 */
8951 if (atts[i + 1] != NULL) {
8952 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8953 if (nsname == NULL) {
8954 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8955 "Namespace prefix %s for %s on %s is not defined\n",
8956 atts[i + 1], atts[i], localname);
8957 }
8958 atts[i + 2] = nsname;
8959 } else
8960 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008961 /*
8962 * [ WFC: Unique Att Spec ]
8963 * No attribute name may appear more than once in the same
8964 * start-tag or empty-element tag.
8965 * As extended by the Namespace in XML REC.
8966 */
8967 for (j = 0; j < i;j += 5) {
8968 if (atts[i] == atts[j]) {
8969 if (atts[i+1] == atts[j+1]) {
8970 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8971 break;
8972 }
8973 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8974 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8975 "Namespaced Attribute %s in '%s' redefined\n",
8976 atts[i], nsname, NULL);
8977 break;
8978 }
8979 }
8980 }
8981 }
8982
Daniel Veillarde57ec792003-09-10 10:50:59 +00008983 nsname = xmlGetNamespace(ctxt, prefix);
8984 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008985 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8986 "Namespace prefix %s on %s is not defined\n",
8987 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008988 }
8989 *pref = prefix;
8990 *URI = nsname;
8991
8992 /*
8993 * SAX: Start of Element !
8994 */
8995 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8996 (!ctxt->disableSAX)) {
8997 if (nbNs > 0)
8998 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8999 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9000 nbatts / 5, nbdef, atts);
9001 else
9002 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9003 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9004 }
9005
9006 /*
9007 * Free up attribute allocated strings if needed
9008 */
9009 if (attval != 0) {
9010 for (i = 3,j = 0; j < nratts;i += 5,j++)
9011 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9012 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009013 }
9014
9015 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009016
9017base_changed:
9018 /*
9019 * the attribute strings are valid iif the base didn't changed
9020 */
9021 if (attval != 0) {
9022 for (i = 3,j = 0; j < nratts;i += 5,j++)
9023 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9024 xmlFree((xmlChar *) atts[i]);
9025 }
9026 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009027 ctxt->input->line = oldline;
9028 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009029 if (ctxt->wellFormed == 1) {
9030 goto reparse;
9031 }
9032 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009033}
9034
9035/**
9036 * xmlParseEndTag2:
9037 * @ctxt: an XML parser context
9038 * @line: line of the start tag
9039 * @nsNr: number of namespaces on the start tag
9040 *
9041 * parse an end of tag
9042 *
9043 * [42] ETag ::= '</' Name S? '>'
9044 *
9045 * With namespace
9046 *
9047 * [NS 9] ETag ::= '</' QName S? '>'
9048 */
9049
9050static void
9051xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009052 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009053 const xmlChar *name;
9054
9055 GROW;
9056 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009057 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009058 return;
9059 }
9060 SKIP(2);
9061
William M. Brack13dfa872004-09-18 04:52:08 +00009062 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009063 if (ctxt->input->cur[tlen] == '>') {
9064 ctxt->input->cur += tlen + 1;
9065 goto done;
9066 }
9067 ctxt->input->cur += tlen;
9068 name = (xmlChar*)1;
9069 } else {
9070 if (prefix == NULL)
9071 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9072 else
9073 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9074 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009075
9076 /*
9077 * We should definitely be at the ending "S? '>'" part
9078 */
9079 GROW;
9080 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009081 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009082 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009083 } else
9084 NEXT1;
9085
9086 /*
9087 * [ WFC: Element Type Match ]
9088 * The Name in an element's end-tag must match the element type in the
9089 * start-tag.
9090 *
9091 */
9092 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009093 if (name == NULL) name = BAD_CAST "unparseable";
9094 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009095 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009096 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009097 }
9098
9099 /*
9100 * SAX: End of Tag
9101 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009102done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009103 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9104 (!ctxt->disableSAX))
9105 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9106
Daniel Veillard0fb18932003-09-07 09:14:37 +00009107 spacePop(ctxt);
9108 if (nsNr != 0)
9109 nsPop(ctxt, nsNr);
9110 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009111}
9112
9113/**
Owen Taylor3473f882001-02-23 17:55:21 +00009114 * xmlParseCDSect:
9115 * @ctxt: an XML parser context
9116 *
9117 * Parse escaped pure raw content.
9118 *
9119 * [18] CDSect ::= CDStart CData CDEnd
9120 *
9121 * [19] CDStart ::= '<![CDATA['
9122 *
9123 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9124 *
9125 * [21] CDEnd ::= ']]>'
9126 */
9127void
9128xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9129 xmlChar *buf = NULL;
9130 int len = 0;
9131 int size = XML_PARSER_BUFFER_SIZE;
9132 int r, rl;
9133 int s, sl;
9134 int cur, l;
9135 int count = 0;
9136
Daniel Veillard8f597c32003-10-06 08:19:27 +00009137 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009138 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009139 SKIP(9);
9140 } else
9141 return;
9142
9143 ctxt->instate = XML_PARSER_CDATA_SECTION;
9144 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009145 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009146 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009147 ctxt->instate = XML_PARSER_CONTENT;
9148 return;
9149 }
9150 NEXTL(rl);
9151 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009152 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009153 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009154 ctxt->instate = XML_PARSER_CONTENT;
9155 return;
9156 }
9157 NEXTL(sl);
9158 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009159 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009160 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009161 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009162 return;
9163 }
William M. Brack871611b2003-10-18 04:53:14 +00009164 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009165 ((r != ']') || (s != ']') || (cur != '>'))) {
9166 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009167 xmlChar *tmp;
9168
Owen Taylor3473f882001-02-23 17:55:21 +00009169 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009170 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9171 if (tmp == NULL) {
9172 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009173 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009174 return;
9175 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009176 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009177 }
9178 COPY_BUF(rl,buf,len,r);
9179 r = s;
9180 rl = sl;
9181 s = cur;
9182 sl = l;
9183 count++;
9184 if (count > 50) {
9185 GROW;
9186 count = 0;
9187 }
9188 NEXTL(l);
9189 cur = CUR_CHAR(l);
9190 }
9191 buf[len] = 0;
9192 ctxt->instate = XML_PARSER_CONTENT;
9193 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009194 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009195 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009196 xmlFree(buf);
9197 return;
9198 }
9199 NEXTL(l);
9200
9201 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009202 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009203 */
9204 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9205 if (ctxt->sax->cdataBlock != NULL)
9206 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009207 else if (ctxt->sax->characters != NULL)
9208 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009209 }
9210 xmlFree(buf);
9211}
9212
9213/**
9214 * xmlParseContent:
9215 * @ctxt: an XML parser context
9216 *
9217 * Parse a content:
9218 *
9219 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9220 */
9221
9222void
9223xmlParseContent(xmlParserCtxtPtr ctxt) {
9224 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009225 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009226 ((RAW != '<') || (NXT(1) != '/')) &&
9227 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009228 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009229 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009230 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009231
9232 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009233 * First case : a Processing Instruction.
9234 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009235 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009236 xmlParsePI(ctxt);
9237 }
9238
9239 /*
9240 * Second case : a CDSection
9241 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009242 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009243 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009244 xmlParseCDSect(ctxt);
9245 }
9246
9247 /*
9248 * Third case : a comment
9249 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009250 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009251 (NXT(2) == '-') && (NXT(3) == '-')) {
9252 xmlParseComment(ctxt);
9253 ctxt->instate = XML_PARSER_CONTENT;
9254 }
9255
9256 /*
9257 * Fourth case : a sub-element.
9258 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009259 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009260 xmlParseElement(ctxt);
9261 }
9262
9263 /*
9264 * Fifth case : a reference. If if has not been resolved,
9265 * parsing returns it's Name, create the node
9266 */
9267
Daniel Veillard21a0f912001-02-25 19:54:14 +00009268 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009269 xmlParseReference(ctxt);
9270 }
9271
9272 /*
9273 * Last case, text. Note that References are handled directly.
9274 */
9275 else {
9276 xmlParseCharData(ctxt, 0);
9277 }
9278
9279 GROW;
9280 /*
9281 * Pop-up of finished entities.
9282 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009283 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009284 xmlPopInput(ctxt);
9285 SHRINK;
9286
Daniel Veillardfdc91562002-07-01 21:52:03 +00009287 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009288 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9289 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009290 ctxt->instate = XML_PARSER_EOF;
9291 break;
9292 }
9293 }
9294}
9295
9296/**
9297 * xmlParseElement:
9298 * @ctxt: an XML parser context
9299 *
9300 * parse an XML element, this is highly recursive
9301 *
9302 * [39] element ::= EmptyElemTag | STag content ETag
9303 *
9304 * [ WFC: Element Type Match ]
9305 * The Name in an element's end-tag must match the element type in the
9306 * start-tag.
9307 *
Owen Taylor3473f882001-02-23 17:55:21 +00009308 */
9309
9310void
9311xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009312 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009313 const xmlChar *prefix;
9314 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009315 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009316 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009317 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009318 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009319
Daniel Veillard8915c152008-08-26 13:05:34 +00009320 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9321 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9322 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9323 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9324 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009325 ctxt->instate = XML_PARSER_EOF;
9326 return;
9327 }
9328
Owen Taylor3473f882001-02-23 17:55:21 +00009329 /* Capture start position */
9330 if (ctxt->record_info) {
9331 node_info.begin_pos = ctxt->input->consumed +
9332 (CUR_PTR - ctxt->input->base);
9333 node_info.begin_line = ctxt->input->line;
9334 }
9335
9336 if (ctxt->spaceNr == 0)
9337 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009338 else if (*ctxt->space == -2)
9339 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009340 else
9341 spacePush(ctxt, *ctxt->space);
9342
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009343 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009344#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009345 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009346#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009347 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009348#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009349 else
9350 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009351#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009352 if (name == NULL) {
9353 spacePop(ctxt);
9354 return;
9355 }
9356 namePush(ctxt, name);
9357 ret = ctxt->node;
9358
Daniel Veillard4432df22003-09-28 18:58:27 +00009359#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009360 /*
9361 * [ VC: Root Element Type ]
9362 * The Name in the document type declaration must match the element
9363 * type of the root element.
9364 */
9365 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9366 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9367 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009368#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009369
9370 /*
9371 * Check for an Empty Element.
9372 */
9373 if ((RAW == '/') && (NXT(1) == '>')) {
9374 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009375 if (ctxt->sax2) {
9376 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9377 (!ctxt->disableSAX))
9378 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009379#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009380 } else {
9381 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9382 (!ctxt->disableSAX))
9383 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009384#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009385 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009386 namePop(ctxt);
9387 spacePop(ctxt);
9388 if (nsNr != ctxt->nsNr)
9389 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009390 if ( ret != NULL && ctxt->record_info ) {
9391 node_info.end_pos = ctxt->input->consumed +
9392 (CUR_PTR - ctxt->input->base);
9393 node_info.end_line = ctxt->input->line;
9394 node_info.node = ret;
9395 xmlParserAddNodeInfo(ctxt, &node_info);
9396 }
9397 return;
9398 }
9399 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009400 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009401 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009402 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9403 "Couldn't find end of Start Tag %s line %d\n",
9404 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009405
9406 /*
9407 * end of parsing of this node.
9408 */
9409 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009410 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009411 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009412 if (nsNr != ctxt->nsNr)
9413 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009414
9415 /*
9416 * Capture end position and add node
9417 */
9418 if ( ret != NULL && ctxt->record_info ) {
9419 node_info.end_pos = ctxt->input->consumed +
9420 (CUR_PTR - ctxt->input->base);
9421 node_info.end_line = ctxt->input->line;
9422 node_info.node = ret;
9423 xmlParserAddNodeInfo(ctxt, &node_info);
9424 }
9425 return;
9426 }
9427
9428 /*
9429 * Parse the content of the element:
9430 */
9431 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009432 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009433 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009434 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009435 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009436
9437 /*
9438 * end of parsing of this node.
9439 */
9440 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009441 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009442 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009443 if (nsNr != ctxt->nsNr)
9444 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009445 return;
9446 }
9447
9448 /*
9449 * parse the end of tag: '</' should be here.
9450 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009451 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009452 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009453 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009454 }
9455#ifdef LIBXML_SAX1_ENABLED
9456 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009457 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009458#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009459
9460 /*
9461 * Capture end position and add node
9462 */
9463 if ( ret != NULL && ctxt->record_info ) {
9464 node_info.end_pos = ctxt->input->consumed +
9465 (CUR_PTR - ctxt->input->base);
9466 node_info.end_line = ctxt->input->line;
9467 node_info.node = ret;
9468 xmlParserAddNodeInfo(ctxt, &node_info);
9469 }
9470}
9471
9472/**
9473 * xmlParseVersionNum:
9474 * @ctxt: an XML parser context
9475 *
9476 * parse the XML version value.
9477 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009478 * [26] VersionNum ::= '1.' [0-9]+
9479 *
9480 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009481 *
9482 * Returns the string giving the XML version number, or NULL
9483 */
9484xmlChar *
9485xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9486 xmlChar *buf = NULL;
9487 int len = 0;
9488 int size = 10;
9489 xmlChar cur;
9490
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009491 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009492 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009493 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009494 return(NULL);
9495 }
9496 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009497 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009498 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009499 return(NULL);
9500 }
9501 buf[len++] = cur;
9502 NEXT;
9503 cur=CUR;
9504 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009505 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009506 return(NULL);
9507 }
9508 buf[len++] = cur;
9509 NEXT;
9510 cur=CUR;
9511 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009512 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009513 xmlChar *tmp;
9514
Owen Taylor3473f882001-02-23 17:55:21 +00009515 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009516 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9517 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009518 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009519 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009520 return(NULL);
9521 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009522 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009523 }
9524 buf[len++] = cur;
9525 NEXT;
9526 cur=CUR;
9527 }
9528 buf[len] = 0;
9529 return(buf);
9530}
9531
9532/**
9533 * xmlParseVersionInfo:
9534 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009535 *
Owen Taylor3473f882001-02-23 17:55:21 +00009536 * parse the XML version.
9537 *
9538 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009539 *
Owen Taylor3473f882001-02-23 17:55:21 +00009540 * [25] Eq ::= S? '=' S?
9541 *
9542 * Returns the version string, e.g. "1.0"
9543 */
9544
9545xmlChar *
9546xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9547 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009548
Daniel Veillarda07050d2003-10-19 14:46:32 +00009549 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009550 SKIP(7);
9551 SKIP_BLANKS;
9552 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009553 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009554 return(NULL);
9555 }
9556 NEXT;
9557 SKIP_BLANKS;
9558 if (RAW == '"') {
9559 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009560 version = xmlParseVersionNum(ctxt);
9561 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009562 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009563 } else
9564 NEXT;
9565 } else if (RAW == '\''){
9566 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009567 version = xmlParseVersionNum(ctxt);
9568 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009569 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009570 } else
9571 NEXT;
9572 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009573 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009574 }
9575 }
9576 return(version);
9577}
9578
9579/**
9580 * xmlParseEncName:
9581 * @ctxt: an XML parser context
9582 *
9583 * parse the XML encoding name
9584 *
9585 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9586 *
9587 * Returns the encoding name value or NULL
9588 */
9589xmlChar *
9590xmlParseEncName(xmlParserCtxtPtr ctxt) {
9591 xmlChar *buf = NULL;
9592 int len = 0;
9593 int size = 10;
9594 xmlChar cur;
9595
9596 cur = CUR;
9597 if (((cur >= 'a') && (cur <= 'z')) ||
9598 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009599 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009600 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009601 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009602 return(NULL);
9603 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009604
Owen Taylor3473f882001-02-23 17:55:21 +00009605 buf[len++] = cur;
9606 NEXT;
9607 cur = CUR;
9608 while (((cur >= 'a') && (cur <= 'z')) ||
9609 ((cur >= 'A') && (cur <= 'Z')) ||
9610 ((cur >= '0') && (cur <= '9')) ||
9611 (cur == '.') || (cur == '_') ||
9612 (cur == '-')) {
9613 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009614 xmlChar *tmp;
9615
Owen Taylor3473f882001-02-23 17:55:21 +00009616 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009617 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9618 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009619 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009620 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009621 return(NULL);
9622 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009623 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009624 }
9625 buf[len++] = cur;
9626 NEXT;
9627 cur = CUR;
9628 if (cur == 0) {
9629 SHRINK;
9630 GROW;
9631 cur = CUR;
9632 }
9633 }
9634 buf[len] = 0;
9635 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009636 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009637 }
9638 return(buf);
9639}
9640
9641/**
9642 * xmlParseEncodingDecl:
9643 * @ctxt: an XML parser context
9644 *
9645 * parse the XML encoding declaration
9646 *
9647 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9648 *
9649 * this setups the conversion filters.
9650 *
9651 * Returns the encoding value or NULL
9652 */
9653
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009654const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009655xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9656 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009657
9658 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009659 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009660 SKIP(8);
9661 SKIP_BLANKS;
9662 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009663 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009664 return(NULL);
9665 }
9666 NEXT;
9667 SKIP_BLANKS;
9668 if (RAW == '"') {
9669 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009670 encoding = xmlParseEncName(ctxt);
9671 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009672 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009673 } else
9674 NEXT;
9675 } else if (RAW == '\''){
9676 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009677 encoding = xmlParseEncName(ctxt);
9678 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009679 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009680 } else
9681 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009682 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009683 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009684 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009685 /*
9686 * UTF-16 encoding stwich has already taken place at this stage,
9687 * more over the little-endian/big-endian selection is already done
9688 */
9689 if ((encoding != NULL) &&
9690 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9691 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009692 /*
9693 * If no encoding was passed to the parser, that we are
9694 * using UTF-16 and no decoder is present i.e. the
9695 * document is apparently UTF-8 compatible, then raise an
9696 * encoding mismatch fatal error
9697 */
9698 if ((ctxt->encoding == NULL) &&
9699 (ctxt->input->buf != NULL) &&
9700 (ctxt->input->buf->encoder == NULL)) {
9701 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9702 "Document labelled UTF-16 but has UTF-8 content\n");
9703 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009704 if (ctxt->encoding != NULL)
9705 xmlFree((xmlChar *) ctxt->encoding);
9706 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009707 }
9708 /*
9709 * UTF-8 encoding is handled natively
9710 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009711 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009712 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9713 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009714 if (ctxt->encoding != NULL)
9715 xmlFree((xmlChar *) ctxt->encoding);
9716 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009717 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009718 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009719 xmlCharEncodingHandlerPtr handler;
9720
9721 if (ctxt->input->encoding != NULL)
9722 xmlFree((xmlChar *) ctxt->input->encoding);
9723 ctxt->input->encoding = encoding;
9724
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009725 handler = xmlFindCharEncodingHandler((const char *) encoding);
9726 if (handler != NULL) {
9727 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009728 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009729 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009730 "Unsupported encoding %s\n", encoding);
9731 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009732 }
9733 }
9734 }
9735 return(encoding);
9736}
9737
9738/**
9739 * xmlParseSDDecl:
9740 * @ctxt: an XML parser context
9741 *
9742 * parse the XML standalone declaration
9743 *
9744 * [32] SDDecl ::= S 'standalone' Eq
9745 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9746 *
9747 * [ VC: Standalone Document Declaration ]
9748 * TODO The standalone document declaration must have the value "no"
9749 * if any external markup declarations contain declarations of:
9750 * - attributes with default values, if elements to which these
9751 * attributes apply appear in the document without specifications
9752 * of values for these attributes, or
9753 * - entities (other than amp, lt, gt, apos, quot), if references
9754 * to those entities appear in the document, or
9755 * - attributes with values subject to normalization, where the
9756 * attribute appears in the document with a value which will change
9757 * as a result of normalization, or
9758 * - element types with element content, if white space occurs directly
9759 * within any instance of those types.
9760 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009761 * Returns:
9762 * 1 if standalone="yes"
9763 * 0 if standalone="no"
9764 * -2 if standalone attribute is missing or invalid
9765 * (A standalone value of -2 means that the XML declaration was found,
9766 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009767 */
9768
9769int
9770xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009771 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009772
9773 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009774 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009775 SKIP(10);
9776 SKIP_BLANKS;
9777 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009778 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009779 return(standalone);
9780 }
9781 NEXT;
9782 SKIP_BLANKS;
9783 if (RAW == '\''){
9784 NEXT;
9785 if ((RAW == 'n') && (NXT(1) == 'o')) {
9786 standalone = 0;
9787 SKIP(2);
9788 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9789 (NXT(2) == 's')) {
9790 standalone = 1;
9791 SKIP(3);
9792 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009793 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009794 }
9795 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009796 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009797 } else
9798 NEXT;
9799 } else if (RAW == '"'){
9800 NEXT;
9801 if ((RAW == 'n') && (NXT(1) == 'o')) {
9802 standalone = 0;
9803 SKIP(2);
9804 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9805 (NXT(2) == 's')) {
9806 standalone = 1;
9807 SKIP(3);
9808 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009809 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009810 }
9811 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009812 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009813 } else
9814 NEXT;
9815 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009816 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009817 }
9818 }
9819 return(standalone);
9820}
9821
9822/**
9823 * xmlParseXMLDecl:
9824 * @ctxt: an XML parser context
9825 *
9826 * parse an XML declaration header
9827 *
9828 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9829 */
9830
9831void
9832xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9833 xmlChar *version;
9834
9835 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009836 * This value for standalone indicates that the document has an
9837 * XML declaration but it does not have a standalone attribute.
9838 * It will be overwritten later if a standalone attribute is found.
9839 */
9840 ctxt->input->standalone = -2;
9841
9842 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009843 * We know that '<?xml' is here.
9844 */
9845 SKIP(5);
9846
William M. Brack76e95df2003-10-18 16:20:14 +00009847 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009848 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9849 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009850 }
9851 SKIP_BLANKS;
9852
9853 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009854 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009855 */
9856 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009857 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009858 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009859 } else {
9860 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9861 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009862 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009863 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009864 if (ctxt->options & XML_PARSE_OLD10) {
9865 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9866 "Unsupported version '%s'\n",
9867 version);
9868 } else {
9869 if ((version[0] == '1') && ((version[1] == '.'))) {
9870 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9871 "Unsupported version '%s'\n",
9872 version, NULL);
9873 } else {
9874 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9875 "Unsupported version '%s'\n",
9876 version);
9877 }
9878 }
Daniel Veillard19840942001-11-29 16:11:38 +00009879 }
9880 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009881 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009882 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009883 }
Owen Taylor3473f882001-02-23 17:55:21 +00009884
9885 /*
9886 * We may have the encoding declaration
9887 */
William M. Brack76e95df2003-10-18 16:20:14 +00009888 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009889 if ((RAW == '?') && (NXT(1) == '>')) {
9890 SKIP(2);
9891 return;
9892 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009893 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009894 }
9895 xmlParseEncodingDecl(ctxt);
9896 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9897 /*
9898 * The XML REC instructs us to stop parsing right here
9899 */
9900 return;
9901 }
9902
9903 /*
9904 * We may have the standalone status.
9905 */
William M. Brack76e95df2003-10-18 16:20:14 +00009906 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009907 if ((RAW == '?') && (NXT(1) == '>')) {
9908 SKIP(2);
9909 return;
9910 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009911 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009912 }
9913 SKIP_BLANKS;
9914 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9915
9916 SKIP_BLANKS;
9917 if ((RAW == '?') && (NXT(1) == '>')) {
9918 SKIP(2);
9919 } else if (RAW == '>') {
9920 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009921 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009922 NEXT;
9923 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009924 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009925 MOVETO_ENDTAG(CUR_PTR);
9926 NEXT;
9927 }
9928}
9929
9930/**
9931 * xmlParseMisc:
9932 * @ctxt: an XML parser context
9933 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009934 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009935 *
9936 * [27] Misc ::= Comment | PI | S
9937 */
9938
9939void
9940xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009941 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009942 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009943 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009944 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009945 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009946 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009947 NEXT;
9948 } else
9949 xmlParseComment(ctxt);
9950 }
9951}
9952
9953/**
9954 * xmlParseDocument:
9955 * @ctxt: an XML parser context
9956 *
9957 * parse an XML document (and build a tree if using the standard SAX
9958 * interface).
9959 *
9960 * [1] document ::= prolog element Misc*
9961 *
9962 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9963 *
9964 * Returns 0, -1 in case of error. the parser context is augmented
9965 * as a result of the parsing.
9966 */
9967
9968int
9969xmlParseDocument(xmlParserCtxtPtr ctxt) {
9970 xmlChar start[4];
9971 xmlCharEncoding enc;
9972
9973 xmlInitParser();
9974
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009975 if ((ctxt == NULL) || (ctxt->input == NULL))
9976 return(-1);
9977
Owen Taylor3473f882001-02-23 17:55:21 +00009978 GROW;
9979
9980 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009981 * SAX: detecting the level.
9982 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009983 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009984
9985 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009986 * SAX: beginning of the document processing.
9987 */
9988 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9989 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9990
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009991 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9992 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009993 /*
9994 * Get the 4 first bytes and decode the charset
9995 * if enc != XML_CHAR_ENCODING_NONE
9996 * plug some encoding conversion routines.
9997 */
9998 start[0] = RAW;
9999 start[1] = NXT(1);
10000 start[2] = NXT(2);
10001 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010002 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010003 if (enc != XML_CHAR_ENCODING_NONE) {
10004 xmlSwitchEncoding(ctxt, enc);
10005 }
Owen Taylor3473f882001-02-23 17:55:21 +000010006 }
10007
10008
10009 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010010 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010011 }
10012
10013 /*
10014 * Check for the XMLDecl in the Prolog.
10015 */
10016 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010017 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010018
10019 /*
10020 * Note that we will switch encoding on the fly.
10021 */
10022 xmlParseXMLDecl(ctxt);
10023 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10024 /*
10025 * The XML REC instructs us to stop parsing right here
10026 */
10027 return(-1);
10028 }
10029 ctxt->standalone = ctxt->input->standalone;
10030 SKIP_BLANKS;
10031 } else {
10032 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10033 }
10034 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10035 ctxt->sax->startDocument(ctxt->userData);
10036
10037 /*
10038 * The Misc part of the Prolog
10039 */
10040 GROW;
10041 xmlParseMisc(ctxt);
10042
10043 /*
10044 * Then possibly doc type declaration(s) and more Misc
10045 * (doctypedecl Misc*)?
10046 */
10047 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010048 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010049
10050 ctxt->inSubset = 1;
10051 xmlParseDocTypeDecl(ctxt);
10052 if (RAW == '[') {
10053 ctxt->instate = XML_PARSER_DTD;
10054 xmlParseInternalSubset(ctxt);
10055 }
10056
10057 /*
10058 * Create and update the external subset.
10059 */
10060 ctxt->inSubset = 2;
10061 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10062 (!ctxt->disableSAX))
10063 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10064 ctxt->extSubSystem, ctxt->extSubURI);
10065 ctxt->inSubset = 0;
10066
Daniel Veillardac4118d2008-01-11 05:27:32 +000010067 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010068
10069 ctxt->instate = XML_PARSER_PROLOG;
10070 xmlParseMisc(ctxt);
10071 }
10072
10073 /*
10074 * Time to start parsing the tree itself
10075 */
10076 GROW;
10077 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010078 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10079 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010080 } else {
10081 ctxt->instate = XML_PARSER_CONTENT;
10082 xmlParseElement(ctxt);
10083 ctxt->instate = XML_PARSER_EPILOG;
10084
10085
10086 /*
10087 * The Misc part at the end
10088 */
10089 xmlParseMisc(ctxt);
10090
Daniel Veillard561b7f82002-03-20 21:55:57 +000010091 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010092 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010093 }
10094 ctxt->instate = XML_PARSER_EOF;
10095 }
10096
10097 /*
10098 * SAX: end of the document processing.
10099 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010100 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010101 ctxt->sax->endDocument(ctxt->userData);
10102
Daniel Veillard5997aca2002-03-18 18:36:20 +000010103 /*
10104 * Remove locally kept entity definitions if the tree was not built
10105 */
10106 if ((ctxt->myDoc != NULL) &&
10107 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10108 xmlFreeDoc(ctxt->myDoc);
10109 ctxt->myDoc = NULL;
10110 }
10111
Daniel Veillardae0765b2008-07-31 19:54:59 +000010112 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10113 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10114 if (ctxt->valid)
10115 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10116 if (ctxt->nsWellFormed)
10117 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10118 if (ctxt->options & XML_PARSE_OLD10)
10119 ctxt->myDoc->properties |= XML_DOC_OLD10;
10120 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010121 if (! ctxt->wellFormed) {
10122 ctxt->valid = 0;
10123 return(-1);
10124 }
Owen Taylor3473f882001-02-23 17:55:21 +000010125 return(0);
10126}
10127
10128/**
10129 * xmlParseExtParsedEnt:
10130 * @ctxt: an XML parser context
10131 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010132 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010133 * An external general parsed entity is well-formed if it matches the
10134 * production labeled extParsedEnt.
10135 *
10136 * [78] extParsedEnt ::= TextDecl? content
10137 *
10138 * Returns 0, -1 in case of error. the parser context is augmented
10139 * as a result of the parsing.
10140 */
10141
10142int
10143xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10144 xmlChar start[4];
10145 xmlCharEncoding enc;
10146
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010147 if ((ctxt == NULL) || (ctxt->input == NULL))
10148 return(-1);
10149
Owen Taylor3473f882001-02-23 17:55:21 +000010150 xmlDefaultSAXHandlerInit();
10151
Daniel Veillard309f81d2003-09-23 09:02:53 +000010152 xmlDetectSAX2(ctxt);
10153
Owen Taylor3473f882001-02-23 17:55:21 +000010154 GROW;
10155
10156 /*
10157 * SAX: beginning of the document processing.
10158 */
10159 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10160 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10161
10162 /*
10163 * Get the 4 first bytes and decode the charset
10164 * if enc != XML_CHAR_ENCODING_NONE
10165 * plug some encoding conversion routines.
10166 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010167 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10168 start[0] = RAW;
10169 start[1] = NXT(1);
10170 start[2] = NXT(2);
10171 start[3] = NXT(3);
10172 enc = xmlDetectCharEncoding(start, 4);
10173 if (enc != XML_CHAR_ENCODING_NONE) {
10174 xmlSwitchEncoding(ctxt, enc);
10175 }
Owen Taylor3473f882001-02-23 17:55:21 +000010176 }
10177
10178
10179 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010180 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010181 }
10182
10183 /*
10184 * Check for the XMLDecl in the Prolog.
10185 */
10186 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010187 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010188
10189 /*
10190 * Note that we will switch encoding on the fly.
10191 */
10192 xmlParseXMLDecl(ctxt);
10193 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10194 /*
10195 * The XML REC instructs us to stop parsing right here
10196 */
10197 return(-1);
10198 }
10199 SKIP_BLANKS;
10200 } else {
10201 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10202 }
10203 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10204 ctxt->sax->startDocument(ctxt->userData);
10205
10206 /*
10207 * Doing validity checking on chunk doesn't make sense
10208 */
10209 ctxt->instate = XML_PARSER_CONTENT;
10210 ctxt->validate = 0;
10211 ctxt->loadsubset = 0;
10212 ctxt->depth = 0;
10213
10214 xmlParseContent(ctxt);
10215
10216 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010217 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010218 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010219 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010220 }
10221
10222 /*
10223 * SAX: end of the document processing.
10224 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010225 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010226 ctxt->sax->endDocument(ctxt->userData);
10227
10228 if (! ctxt->wellFormed) return(-1);
10229 return(0);
10230}
10231
Daniel Veillard73b013f2003-09-30 12:36:01 +000010232#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010233/************************************************************************
10234 * *
10235 * Progressive parsing interfaces *
10236 * *
10237 ************************************************************************/
10238
10239/**
10240 * xmlParseLookupSequence:
10241 * @ctxt: an XML parser context
10242 * @first: the first char to lookup
10243 * @next: the next char to lookup or zero
10244 * @third: the next char to lookup or zero
10245 *
10246 * Try to find if a sequence (first, next, third) or just (first next) or
10247 * (first) is available in the input stream.
10248 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10249 * to avoid rescanning sequences of bytes, it DOES change the state of the
10250 * parser, do not use liberally.
10251 *
10252 * Returns the index to the current parsing point if the full sequence
10253 * is available, -1 otherwise.
10254 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010255static int
Owen Taylor3473f882001-02-23 17:55:21 +000010256xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10257 xmlChar next, xmlChar third) {
10258 int base, len;
10259 xmlParserInputPtr in;
10260 const xmlChar *buf;
10261
10262 in = ctxt->input;
10263 if (in == NULL) return(-1);
10264 base = in->cur - in->base;
10265 if (base < 0) return(-1);
10266 if (ctxt->checkIndex > base)
10267 base = ctxt->checkIndex;
10268 if (in->buf == NULL) {
10269 buf = in->base;
10270 len = in->length;
10271 } else {
10272 buf = in->buf->buffer->content;
10273 len = in->buf->buffer->use;
10274 }
10275 /* take into account the sequence length */
10276 if (third) len -= 2;
10277 else if (next) len --;
10278 for (;base < len;base++) {
10279 if (buf[base] == first) {
10280 if (third != 0) {
10281 if ((buf[base + 1] != next) ||
10282 (buf[base + 2] != third)) continue;
10283 } else if (next != 0) {
10284 if (buf[base + 1] != next) continue;
10285 }
10286 ctxt->checkIndex = 0;
10287#ifdef DEBUG_PUSH
10288 if (next == 0)
10289 xmlGenericError(xmlGenericErrorContext,
10290 "PP: lookup '%c' found at %d\n",
10291 first, base);
10292 else if (third == 0)
10293 xmlGenericError(xmlGenericErrorContext,
10294 "PP: lookup '%c%c' found at %d\n",
10295 first, next, base);
10296 else
10297 xmlGenericError(xmlGenericErrorContext,
10298 "PP: lookup '%c%c%c' found at %d\n",
10299 first, next, third, base);
10300#endif
10301 return(base - (in->cur - in->base));
10302 }
10303 }
10304 ctxt->checkIndex = base;
10305#ifdef DEBUG_PUSH
10306 if (next == 0)
10307 xmlGenericError(xmlGenericErrorContext,
10308 "PP: lookup '%c' failed\n", first);
10309 else if (third == 0)
10310 xmlGenericError(xmlGenericErrorContext,
10311 "PP: lookup '%c%c' failed\n", first, next);
10312 else
10313 xmlGenericError(xmlGenericErrorContext,
10314 "PP: lookup '%c%c%c' failed\n", first, next, third);
10315#endif
10316 return(-1);
10317}
10318
10319/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010320 * xmlParseGetLasts:
10321 * @ctxt: an XML parser context
10322 * @lastlt: pointer to store the last '<' from the input
10323 * @lastgt: pointer to store the last '>' from the input
10324 *
10325 * Lookup the last < and > in the current chunk
10326 */
10327static void
10328xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10329 const xmlChar **lastgt) {
10330 const xmlChar *tmp;
10331
10332 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10333 xmlGenericError(xmlGenericErrorContext,
10334 "Internal error: xmlParseGetLasts\n");
10335 return;
10336 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010337 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010338 tmp = ctxt->input->end;
10339 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010340 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010341 if (tmp < ctxt->input->base) {
10342 *lastlt = NULL;
10343 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010344 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010345 *lastlt = tmp;
10346 tmp++;
10347 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10348 if (*tmp == '\'') {
10349 tmp++;
10350 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10351 if (tmp < ctxt->input->end) tmp++;
10352 } else if (*tmp == '"') {
10353 tmp++;
10354 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10355 if (tmp < ctxt->input->end) tmp++;
10356 } else
10357 tmp++;
10358 }
10359 if (tmp < ctxt->input->end)
10360 *lastgt = tmp;
10361 else {
10362 tmp = *lastlt;
10363 tmp--;
10364 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10365 if (tmp >= ctxt->input->base)
10366 *lastgt = tmp;
10367 else
10368 *lastgt = NULL;
10369 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010370 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010371 } else {
10372 *lastlt = NULL;
10373 *lastgt = NULL;
10374 }
10375}
10376/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010377 * xmlCheckCdataPush:
10378 * @cur: pointer to the bock of characters
10379 * @len: length of the block in bytes
10380 *
10381 * Check that the block of characters is okay as SCdata content [20]
10382 *
10383 * Returns the number of bytes to pass if okay, a negative index where an
10384 * UTF-8 error occured otherwise
10385 */
10386static int
10387xmlCheckCdataPush(const xmlChar *utf, int len) {
10388 int ix;
10389 unsigned char c;
10390 int codepoint;
10391
10392 if ((utf == NULL) || (len <= 0))
10393 return(0);
10394
10395 for (ix = 0; ix < len;) { /* string is 0-terminated */
10396 c = utf[ix];
10397 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10398 if (c >= 0x20)
10399 ix++;
10400 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10401 ix++;
10402 else
10403 return(-ix);
10404 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10405 if (ix + 2 > len) return(ix);
10406 if ((utf[ix+1] & 0xc0 ) != 0x80)
10407 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010408 codepoint = (utf[ix] & 0x1f) << 6;
10409 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010410 if (!xmlIsCharQ(codepoint))
10411 return(-ix);
10412 ix += 2;
10413 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10414 if (ix + 3 > len) return(ix);
10415 if (((utf[ix+1] & 0xc0) != 0x80) ||
10416 ((utf[ix+2] & 0xc0) != 0x80))
10417 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010418 codepoint = (utf[ix] & 0xf) << 12;
10419 codepoint |= (utf[ix+1] & 0x3f) << 6;
10420 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010421 if (!xmlIsCharQ(codepoint))
10422 return(-ix);
10423 ix += 3;
10424 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10425 if (ix + 4 > len) return(ix);
10426 if (((utf[ix+1] & 0xc0) != 0x80) ||
10427 ((utf[ix+2] & 0xc0) != 0x80) ||
10428 ((utf[ix+3] & 0xc0) != 0x80))
10429 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010430 codepoint = (utf[ix] & 0x7) << 18;
10431 codepoint |= (utf[ix+1] & 0x3f) << 12;
10432 codepoint |= (utf[ix+2] & 0x3f) << 6;
10433 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010434 if (!xmlIsCharQ(codepoint))
10435 return(-ix);
10436 ix += 4;
10437 } else /* unknown encoding */
10438 return(-ix);
10439 }
10440 return(ix);
10441}
10442
10443/**
Owen Taylor3473f882001-02-23 17:55:21 +000010444 * xmlParseTryOrFinish:
10445 * @ctxt: an XML parser context
10446 * @terminate: last chunk indicator
10447 *
10448 * Try to progress on parsing
10449 *
10450 * Returns zero if no parsing was possible
10451 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010452static int
Owen Taylor3473f882001-02-23 17:55:21 +000010453xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10454 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010455 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010456 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010457 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010458
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010459 if (ctxt->input == NULL)
10460 return(0);
10461
Owen Taylor3473f882001-02-23 17:55:21 +000010462#ifdef DEBUG_PUSH
10463 switch (ctxt->instate) {
10464 case XML_PARSER_EOF:
10465 xmlGenericError(xmlGenericErrorContext,
10466 "PP: try EOF\n"); break;
10467 case XML_PARSER_START:
10468 xmlGenericError(xmlGenericErrorContext,
10469 "PP: try START\n"); break;
10470 case XML_PARSER_MISC:
10471 xmlGenericError(xmlGenericErrorContext,
10472 "PP: try MISC\n");break;
10473 case XML_PARSER_COMMENT:
10474 xmlGenericError(xmlGenericErrorContext,
10475 "PP: try COMMENT\n");break;
10476 case XML_PARSER_PROLOG:
10477 xmlGenericError(xmlGenericErrorContext,
10478 "PP: try PROLOG\n");break;
10479 case XML_PARSER_START_TAG:
10480 xmlGenericError(xmlGenericErrorContext,
10481 "PP: try START_TAG\n");break;
10482 case XML_PARSER_CONTENT:
10483 xmlGenericError(xmlGenericErrorContext,
10484 "PP: try CONTENT\n");break;
10485 case XML_PARSER_CDATA_SECTION:
10486 xmlGenericError(xmlGenericErrorContext,
10487 "PP: try CDATA_SECTION\n");break;
10488 case XML_PARSER_END_TAG:
10489 xmlGenericError(xmlGenericErrorContext,
10490 "PP: try END_TAG\n");break;
10491 case XML_PARSER_ENTITY_DECL:
10492 xmlGenericError(xmlGenericErrorContext,
10493 "PP: try ENTITY_DECL\n");break;
10494 case XML_PARSER_ENTITY_VALUE:
10495 xmlGenericError(xmlGenericErrorContext,
10496 "PP: try ENTITY_VALUE\n");break;
10497 case XML_PARSER_ATTRIBUTE_VALUE:
10498 xmlGenericError(xmlGenericErrorContext,
10499 "PP: try ATTRIBUTE_VALUE\n");break;
10500 case XML_PARSER_DTD:
10501 xmlGenericError(xmlGenericErrorContext,
10502 "PP: try DTD\n");break;
10503 case XML_PARSER_EPILOG:
10504 xmlGenericError(xmlGenericErrorContext,
10505 "PP: try EPILOG\n");break;
10506 case XML_PARSER_PI:
10507 xmlGenericError(xmlGenericErrorContext,
10508 "PP: try PI\n");break;
10509 case XML_PARSER_IGNORE:
10510 xmlGenericError(xmlGenericErrorContext,
10511 "PP: try IGNORE\n");break;
10512 }
10513#endif
10514
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010515 if ((ctxt->input != NULL) &&
10516 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010517 xmlSHRINK(ctxt);
10518 ctxt->checkIndex = 0;
10519 }
10520 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010521
Daniel Veillarda880b122003-04-21 21:36:41 +000010522 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010523 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010524 return(0);
10525
10526
Owen Taylor3473f882001-02-23 17:55:21 +000010527 /*
10528 * Pop-up of finished entities.
10529 */
10530 while ((RAW == 0) && (ctxt->inputNr > 1))
10531 xmlPopInput(ctxt);
10532
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010533 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010534 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010535 avail = ctxt->input->length -
10536 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010537 else {
10538 /*
10539 * If we are operating on converted input, try to flush
10540 * remainng chars to avoid them stalling in the non-converted
10541 * buffer.
10542 */
10543 if ((ctxt->input->buf->raw != NULL) &&
10544 (ctxt->input->buf->raw->use > 0)) {
10545 int base = ctxt->input->base -
10546 ctxt->input->buf->buffer->content;
10547 int current = ctxt->input->cur - ctxt->input->base;
10548
10549 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10550 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10551 ctxt->input->cur = ctxt->input->base + current;
10552 ctxt->input->end =
10553 &ctxt->input->buf->buffer->content[
10554 ctxt->input->buf->buffer->use];
10555 }
10556 avail = ctxt->input->buf->buffer->use -
10557 (ctxt->input->cur - ctxt->input->base);
10558 }
Owen Taylor3473f882001-02-23 17:55:21 +000010559 if (avail < 1)
10560 goto done;
10561 switch (ctxt->instate) {
10562 case XML_PARSER_EOF:
10563 /*
10564 * Document parsing is done !
10565 */
10566 goto done;
10567 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010568 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10569 xmlChar start[4];
10570 xmlCharEncoding enc;
10571
10572 /*
10573 * Very first chars read from the document flow.
10574 */
10575 if (avail < 4)
10576 goto done;
10577
10578 /*
10579 * Get the 4 first bytes and decode the charset
10580 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010581 * plug some encoding conversion routines,
10582 * else xmlSwitchEncoding will set to (default)
10583 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010584 */
10585 start[0] = RAW;
10586 start[1] = NXT(1);
10587 start[2] = NXT(2);
10588 start[3] = NXT(3);
10589 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010590 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010591 break;
10592 }
Owen Taylor3473f882001-02-23 17:55:21 +000010593
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010594 if (avail < 2)
10595 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010596 cur = ctxt->input->cur[0];
10597 next = ctxt->input->cur[1];
10598 if (cur == 0) {
10599 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10600 ctxt->sax->setDocumentLocator(ctxt->userData,
10601 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010602 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010603 ctxt->instate = XML_PARSER_EOF;
10604#ifdef DEBUG_PUSH
10605 xmlGenericError(xmlGenericErrorContext,
10606 "PP: entering EOF\n");
10607#endif
10608 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10609 ctxt->sax->endDocument(ctxt->userData);
10610 goto done;
10611 }
10612 if ((cur == '<') && (next == '?')) {
10613 /* PI or XML decl */
10614 if (avail < 5) return(ret);
10615 if ((!terminate) &&
10616 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10617 return(ret);
10618 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10619 ctxt->sax->setDocumentLocator(ctxt->userData,
10620 &xmlDefaultSAXLocator);
10621 if ((ctxt->input->cur[2] == 'x') &&
10622 (ctxt->input->cur[3] == 'm') &&
10623 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010624 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010625 ret += 5;
10626#ifdef DEBUG_PUSH
10627 xmlGenericError(xmlGenericErrorContext,
10628 "PP: Parsing XML Decl\n");
10629#endif
10630 xmlParseXMLDecl(ctxt);
10631 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10632 /*
10633 * The XML REC instructs us to stop parsing right
10634 * here
10635 */
10636 ctxt->instate = XML_PARSER_EOF;
10637 return(0);
10638 }
10639 ctxt->standalone = ctxt->input->standalone;
10640 if ((ctxt->encoding == NULL) &&
10641 (ctxt->input->encoding != NULL))
10642 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10643 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10644 (!ctxt->disableSAX))
10645 ctxt->sax->startDocument(ctxt->userData);
10646 ctxt->instate = XML_PARSER_MISC;
10647#ifdef DEBUG_PUSH
10648 xmlGenericError(xmlGenericErrorContext,
10649 "PP: entering MISC\n");
10650#endif
10651 } else {
10652 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10653 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10654 (!ctxt->disableSAX))
10655 ctxt->sax->startDocument(ctxt->userData);
10656 ctxt->instate = XML_PARSER_MISC;
10657#ifdef DEBUG_PUSH
10658 xmlGenericError(xmlGenericErrorContext,
10659 "PP: entering MISC\n");
10660#endif
10661 }
10662 } else {
10663 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10664 ctxt->sax->setDocumentLocator(ctxt->userData,
10665 &xmlDefaultSAXLocator);
10666 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010667 if (ctxt->version == NULL) {
10668 xmlErrMemory(ctxt, NULL);
10669 break;
10670 }
Owen Taylor3473f882001-02-23 17:55:21 +000010671 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10672 (!ctxt->disableSAX))
10673 ctxt->sax->startDocument(ctxt->userData);
10674 ctxt->instate = XML_PARSER_MISC;
10675#ifdef DEBUG_PUSH
10676 xmlGenericError(xmlGenericErrorContext,
10677 "PP: entering MISC\n");
10678#endif
10679 }
10680 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010681 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010682 const xmlChar *name;
10683 const xmlChar *prefix;
10684 const xmlChar *URI;
10685 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010686
10687 if ((avail < 2) && (ctxt->inputNr == 1))
10688 goto done;
10689 cur = ctxt->input->cur[0];
10690 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010691 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010692 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010693 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10694 ctxt->sax->endDocument(ctxt->userData);
10695 goto done;
10696 }
10697 if (!terminate) {
10698 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010699 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010700 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010701 goto done;
10702 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10703 goto done;
10704 }
10705 }
10706 if (ctxt->spaceNr == 0)
10707 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010708 else if (*ctxt->space == -2)
10709 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010710 else
10711 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010712#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010713 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010714#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010715 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010716#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010717 else
10718 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010719#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010720 if (name == NULL) {
10721 spacePop(ctxt);
10722 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010723 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10724 ctxt->sax->endDocument(ctxt->userData);
10725 goto done;
10726 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010727#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010728 /*
10729 * [ VC: Root Element Type ]
10730 * The Name in the document type declaration must match
10731 * the element type of the root element.
10732 */
10733 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10734 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10735 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010736#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010737
10738 /*
10739 * Check for an Empty Element.
10740 */
10741 if ((RAW == '/') && (NXT(1) == '>')) {
10742 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010743
10744 if (ctxt->sax2) {
10745 if ((ctxt->sax != NULL) &&
10746 (ctxt->sax->endElementNs != NULL) &&
10747 (!ctxt->disableSAX))
10748 ctxt->sax->endElementNs(ctxt->userData, name,
10749 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010750 if (ctxt->nsNr - nsNr > 0)
10751 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010752#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010753 } else {
10754 if ((ctxt->sax != NULL) &&
10755 (ctxt->sax->endElement != NULL) &&
10756 (!ctxt->disableSAX))
10757 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010758#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010759 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010760 spacePop(ctxt);
10761 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010762 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010763 } else {
10764 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010765 }
10766 break;
10767 }
10768 if (RAW == '>') {
10769 NEXT;
10770 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010771 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010772 "Couldn't find end of Start Tag %s\n",
10773 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010774 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010775 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010776 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010777 if (ctxt->sax2)
10778 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010779#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010780 else
10781 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010782#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010783
Daniel Veillarda880b122003-04-21 21:36:41 +000010784 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010785 break;
10786 }
10787 case XML_PARSER_CONTENT: {
10788 const xmlChar *test;
10789 unsigned int cons;
10790 if ((avail < 2) && (ctxt->inputNr == 1))
10791 goto done;
10792 cur = ctxt->input->cur[0];
10793 next = ctxt->input->cur[1];
10794
10795 test = CUR_PTR;
10796 cons = ctxt->input->consumed;
10797 if ((cur == '<') && (next == '/')) {
10798 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010799 break;
10800 } else if ((cur == '<') && (next == '?')) {
10801 if ((!terminate) &&
10802 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10803 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010804 xmlParsePI(ctxt);
10805 } else if ((cur == '<') && (next != '!')) {
10806 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010807 break;
10808 } else if ((cur == '<') && (next == '!') &&
10809 (ctxt->input->cur[2] == '-') &&
10810 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010811 int term;
10812
10813 if (avail < 4)
10814 goto done;
10815 ctxt->input->cur += 4;
10816 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10817 ctxt->input->cur -= 4;
10818 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010819 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010820 xmlParseComment(ctxt);
10821 ctxt->instate = XML_PARSER_CONTENT;
10822 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10823 (ctxt->input->cur[2] == '[') &&
10824 (ctxt->input->cur[3] == 'C') &&
10825 (ctxt->input->cur[4] == 'D') &&
10826 (ctxt->input->cur[5] == 'A') &&
10827 (ctxt->input->cur[6] == 'T') &&
10828 (ctxt->input->cur[7] == 'A') &&
10829 (ctxt->input->cur[8] == '[')) {
10830 SKIP(9);
10831 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010832 break;
10833 } else if ((cur == '<') && (next == '!') &&
10834 (avail < 9)) {
10835 goto done;
10836 } else if (cur == '&') {
10837 if ((!terminate) &&
10838 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10839 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010840 xmlParseReference(ctxt);
10841 } else {
10842 /* TODO Avoid the extra copy, handle directly !!! */
10843 /*
10844 * Goal of the following test is:
10845 * - minimize calls to the SAX 'character' callback
10846 * when they are mergeable
10847 * - handle an problem for isBlank when we only parse
10848 * a sequence of blank chars and the next one is
10849 * not available to check against '<' presence.
10850 * - tries to homogenize the differences in SAX
10851 * callbacks between the push and pull versions
10852 * of the parser.
10853 */
10854 if ((ctxt->inputNr == 1) &&
10855 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10856 if (!terminate) {
10857 if (ctxt->progressive) {
10858 if ((lastlt == NULL) ||
10859 (ctxt->input->cur > lastlt))
10860 goto done;
10861 } else if (xmlParseLookupSequence(ctxt,
10862 '<', 0, 0) < 0) {
10863 goto done;
10864 }
10865 }
10866 }
10867 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010868 xmlParseCharData(ctxt, 0);
10869 }
10870 /*
10871 * Pop-up of finished entities.
10872 */
10873 while ((RAW == 0) && (ctxt->inputNr > 1))
10874 xmlPopInput(ctxt);
10875 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010876 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10877 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010878 ctxt->instate = XML_PARSER_EOF;
10879 break;
10880 }
10881 break;
10882 }
10883 case XML_PARSER_END_TAG:
10884 if (avail < 2)
10885 goto done;
10886 if (!terminate) {
10887 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010888 /* > can be found unescaped in attribute values */
10889 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010890 goto done;
10891 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10892 goto done;
10893 }
10894 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010895 if (ctxt->sax2) {
10896 xmlParseEndTag2(ctxt,
10897 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10898 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010899 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010900 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010901 }
10902#ifdef LIBXML_SAX1_ENABLED
10903 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010904 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010905#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010906 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010907 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010908 } else {
10909 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010910 }
10911 break;
10912 case XML_PARSER_CDATA_SECTION: {
10913 /*
10914 * The Push mode need to have the SAX callback for
10915 * cdataBlock merge back contiguous callbacks.
10916 */
10917 int base;
10918
10919 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10920 if (base < 0) {
10921 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010922 int tmp;
10923
10924 tmp = xmlCheckCdataPush(ctxt->input->cur,
10925 XML_PARSER_BIG_BUFFER_SIZE);
10926 if (tmp < 0) {
10927 tmp = -tmp;
10928 ctxt->input->cur += tmp;
10929 goto encoding_error;
10930 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010931 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10932 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010933 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010934 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010935 else if (ctxt->sax->characters != NULL)
10936 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010937 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010938 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010939 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010940 ctxt->checkIndex = 0;
10941 }
10942 goto done;
10943 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010944 int tmp;
10945
10946 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10947 if ((tmp < 0) || (tmp != base)) {
10948 tmp = -tmp;
10949 ctxt->input->cur += tmp;
10950 goto encoding_error;
10951 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010952 if ((ctxt->sax != NULL) && (base == 0) &&
10953 (ctxt->sax->cdataBlock != NULL) &&
10954 (!ctxt->disableSAX)) {
10955 /*
10956 * Special case to provide identical behaviour
10957 * between pull and push parsers on enpty CDATA
10958 * sections
10959 */
10960 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10961 (!strncmp((const char *)&ctxt->input->cur[-9],
10962 "<![CDATA[", 9)))
10963 ctxt->sax->cdataBlock(ctxt->userData,
10964 BAD_CAST "", 0);
10965 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010966 (!ctxt->disableSAX)) {
10967 if (ctxt->sax->cdataBlock != NULL)
10968 ctxt->sax->cdataBlock(ctxt->userData,
10969 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010970 else if (ctxt->sax->characters != NULL)
10971 ctxt->sax->characters(ctxt->userData,
10972 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010973 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010974 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010975 ctxt->checkIndex = 0;
10976 ctxt->instate = XML_PARSER_CONTENT;
10977#ifdef DEBUG_PUSH
10978 xmlGenericError(xmlGenericErrorContext,
10979 "PP: entering CONTENT\n");
10980#endif
10981 }
10982 break;
10983 }
Owen Taylor3473f882001-02-23 17:55:21 +000010984 case XML_PARSER_MISC:
10985 SKIP_BLANKS;
10986 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010987 avail = ctxt->input->length -
10988 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010989 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010990 avail = ctxt->input->buf->buffer->use -
10991 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010992 if (avail < 2)
10993 goto done;
10994 cur = ctxt->input->cur[0];
10995 next = ctxt->input->cur[1];
10996 if ((cur == '<') && (next == '?')) {
10997 if ((!terminate) &&
10998 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10999 goto done;
11000#ifdef DEBUG_PUSH
11001 xmlGenericError(xmlGenericErrorContext,
11002 "PP: Parsing PI\n");
11003#endif
11004 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011005 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011006 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011007 (ctxt->input->cur[2] == '-') &&
11008 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011009 if ((!terminate) &&
11010 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11011 goto done;
11012#ifdef DEBUG_PUSH
11013 xmlGenericError(xmlGenericErrorContext,
11014 "PP: Parsing Comment\n");
11015#endif
11016 xmlParseComment(ctxt);
11017 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011018 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011019 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011020 (ctxt->input->cur[2] == 'D') &&
11021 (ctxt->input->cur[3] == 'O') &&
11022 (ctxt->input->cur[4] == 'C') &&
11023 (ctxt->input->cur[5] == 'T') &&
11024 (ctxt->input->cur[6] == 'Y') &&
11025 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011026 (ctxt->input->cur[8] == 'E')) {
11027 if ((!terminate) &&
11028 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11029 goto done;
11030#ifdef DEBUG_PUSH
11031 xmlGenericError(xmlGenericErrorContext,
11032 "PP: Parsing internal subset\n");
11033#endif
11034 ctxt->inSubset = 1;
11035 xmlParseDocTypeDecl(ctxt);
11036 if (RAW == '[') {
11037 ctxt->instate = XML_PARSER_DTD;
11038#ifdef DEBUG_PUSH
11039 xmlGenericError(xmlGenericErrorContext,
11040 "PP: entering DTD\n");
11041#endif
11042 } else {
11043 /*
11044 * Create and update the external subset.
11045 */
11046 ctxt->inSubset = 2;
11047 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11048 (ctxt->sax->externalSubset != NULL))
11049 ctxt->sax->externalSubset(ctxt->userData,
11050 ctxt->intSubName, ctxt->extSubSystem,
11051 ctxt->extSubURI);
11052 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011053 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011054 ctxt->instate = XML_PARSER_PROLOG;
11055#ifdef DEBUG_PUSH
11056 xmlGenericError(xmlGenericErrorContext,
11057 "PP: entering PROLOG\n");
11058#endif
11059 }
11060 } else if ((cur == '<') && (next == '!') &&
11061 (avail < 9)) {
11062 goto done;
11063 } else {
11064 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011065 ctxt->progressive = 1;
11066 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011067#ifdef DEBUG_PUSH
11068 xmlGenericError(xmlGenericErrorContext,
11069 "PP: entering START_TAG\n");
11070#endif
11071 }
11072 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011073 case XML_PARSER_PROLOG:
11074 SKIP_BLANKS;
11075 if (ctxt->input->buf == NULL)
11076 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11077 else
11078 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11079 if (avail < 2)
11080 goto done;
11081 cur = ctxt->input->cur[0];
11082 next = ctxt->input->cur[1];
11083 if ((cur == '<') && (next == '?')) {
11084 if ((!terminate) &&
11085 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11086 goto done;
11087#ifdef DEBUG_PUSH
11088 xmlGenericError(xmlGenericErrorContext,
11089 "PP: Parsing PI\n");
11090#endif
11091 xmlParsePI(ctxt);
11092 } else if ((cur == '<') && (next == '!') &&
11093 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11094 if ((!terminate) &&
11095 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11096 goto done;
11097#ifdef DEBUG_PUSH
11098 xmlGenericError(xmlGenericErrorContext,
11099 "PP: Parsing Comment\n");
11100#endif
11101 xmlParseComment(ctxt);
11102 ctxt->instate = XML_PARSER_PROLOG;
11103 } else if ((cur == '<') && (next == '!') &&
11104 (avail < 4)) {
11105 goto done;
11106 } else {
11107 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011108 if (ctxt->progressive == 0)
11109 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011110 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011111#ifdef DEBUG_PUSH
11112 xmlGenericError(xmlGenericErrorContext,
11113 "PP: entering START_TAG\n");
11114#endif
11115 }
11116 break;
11117 case XML_PARSER_EPILOG:
11118 SKIP_BLANKS;
11119 if (ctxt->input->buf == NULL)
11120 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11121 else
11122 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11123 if (avail < 2)
11124 goto done;
11125 cur = ctxt->input->cur[0];
11126 next = ctxt->input->cur[1];
11127 if ((cur == '<') && (next == '?')) {
11128 if ((!terminate) &&
11129 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11130 goto done;
11131#ifdef DEBUG_PUSH
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: Parsing PI\n");
11134#endif
11135 xmlParsePI(ctxt);
11136 ctxt->instate = XML_PARSER_EPILOG;
11137 } else if ((cur == '<') && (next == '!') &&
11138 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11139 if ((!terminate) &&
11140 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11141 goto done;
11142#ifdef DEBUG_PUSH
11143 xmlGenericError(xmlGenericErrorContext,
11144 "PP: Parsing Comment\n");
11145#endif
11146 xmlParseComment(ctxt);
11147 ctxt->instate = XML_PARSER_EPILOG;
11148 } else if ((cur == '<') && (next == '!') &&
11149 (avail < 4)) {
11150 goto done;
11151 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011152 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011153 ctxt->instate = XML_PARSER_EOF;
11154#ifdef DEBUG_PUSH
11155 xmlGenericError(xmlGenericErrorContext,
11156 "PP: entering EOF\n");
11157#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011158 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011159 ctxt->sax->endDocument(ctxt->userData);
11160 goto done;
11161 }
11162 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011163 case XML_PARSER_DTD: {
11164 /*
11165 * Sorry but progressive parsing of the internal subset
11166 * is not expected to be supported. We first check that
11167 * the full content of the internal subset is available and
11168 * the parsing is launched only at that point.
11169 * Internal subset ends up with "']' S? '>'" in an unescaped
11170 * section and not in a ']]>' sequence which are conditional
11171 * sections (whoever argued to keep that crap in XML deserve
11172 * a place in hell !).
11173 */
11174 int base, i;
11175 xmlChar *buf;
11176 xmlChar quote = 0;
11177
11178 base = ctxt->input->cur - ctxt->input->base;
11179 if (base < 0) return(0);
11180 if (ctxt->checkIndex > base)
11181 base = ctxt->checkIndex;
11182 buf = ctxt->input->buf->buffer->content;
11183 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11184 base++) {
11185 if (quote != 0) {
11186 if (buf[base] == quote)
11187 quote = 0;
11188 continue;
11189 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011190 if ((quote == 0) && (buf[base] == '<')) {
11191 int found = 0;
11192 /* special handling of comments */
11193 if (((unsigned int) base + 4 <
11194 ctxt->input->buf->buffer->use) &&
11195 (buf[base + 1] == '!') &&
11196 (buf[base + 2] == '-') &&
11197 (buf[base + 3] == '-')) {
11198 for (;(unsigned int) base + 3 <
11199 ctxt->input->buf->buffer->use; base++) {
11200 if ((buf[base] == '-') &&
11201 (buf[base + 1] == '-') &&
11202 (buf[base + 2] == '>')) {
11203 found = 1;
11204 base += 2;
11205 break;
11206 }
11207 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011208 if (!found) {
11209#if 0
11210 fprintf(stderr, "unfinished comment\n");
11211#endif
11212 break; /* for */
11213 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011214 continue;
11215 }
11216 }
Owen Taylor3473f882001-02-23 17:55:21 +000011217 if (buf[base] == '"') {
11218 quote = '"';
11219 continue;
11220 }
11221 if (buf[base] == '\'') {
11222 quote = '\'';
11223 continue;
11224 }
11225 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011226#if 0
11227 fprintf(stderr, "%c%c%c%c: ", buf[base],
11228 buf[base + 1], buf[base + 2], buf[base + 3]);
11229#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011230 if ((unsigned int) base +1 >=
11231 ctxt->input->buf->buffer->use)
11232 break;
11233 if (buf[base + 1] == ']') {
11234 /* conditional crap, skip both ']' ! */
11235 base++;
11236 continue;
11237 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011238 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011239 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11240 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011241 if (buf[base + i] == '>') {
11242#if 0
11243 fprintf(stderr, "found\n");
11244#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011245 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011246 }
11247 if (!IS_BLANK_CH(buf[base + i])) {
11248#if 0
11249 fprintf(stderr, "not found\n");
11250#endif
11251 goto not_end_of_int_subset;
11252 }
Owen Taylor3473f882001-02-23 17:55:21 +000011253 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011254#if 0
11255 fprintf(stderr, "end of stream\n");
11256#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011257 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011258
Owen Taylor3473f882001-02-23 17:55:21 +000011259 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011260not_end_of_int_subset:
11261 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011262 }
11263 /*
11264 * We didn't found the end of the Internal subset
11265 */
Owen Taylor3473f882001-02-23 17:55:21 +000011266#ifdef DEBUG_PUSH
11267 if (next == 0)
11268 xmlGenericError(xmlGenericErrorContext,
11269 "PP: lookup of int subset end filed\n");
11270#endif
11271 goto done;
11272
11273found_end_int_subset:
11274 xmlParseInternalSubset(ctxt);
11275 ctxt->inSubset = 2;
11276 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11277 (ctxt->sax->externalSubset != NULL))
11278 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11279 ctxt->extSubSystem, ctxt->extSubURI);
11280 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011281 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011282 ctxt->instate = XML_PARSER_PROLOG;
11283 ctxt->checkIndex = 0;
11284#ifdef DEBUG_PUSH
11285 xmlGenericError(xmlGenericErrorContext,
11286 "PP: entering PROLOG\n");
11287#endif
11288 break;
11289 }
11290 case XML_PARSER_COMMENT:
11291 xmlGenericError(xmlGenericErrorContext,
11292 "PP: internal error, state == COMMENT\n");
11293 ctxt->instate = XML_PARSER_CONTENT;
11294#ifdef DEBUG_PUSH
11295 xmlGenericError(xmlGenericErrorContext,
11296 "PP: entering CONTENT\n");
11297#endif
11298 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011299 case XML_PARSER_IGNORE:
11300 xmlGenericError(xmlGenericErrorContext,
11301 "PP: internal error, state == IGNORE");
11302 ctxt->instate = XML_PARSER_DTD;
11303#ifdef DEBUG_PUSH
11304 xmlGenericError(xmlGenericErrorContext,
11305 "PP: entering DTD\n");
11306#endif
11307 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011308 case XML_PARSER_PI:
11309 xmlGenericError(xmlGenericErrorContext,
11310 "PP: internal error, state == PI\n");
11311 ctxt->instate = XML_PARSER_CONTENT;
11312#ifdef DEBUG_PUSH
11313 xmlGenericError(xmlGenericErrorContext,
11314 "PP: entering CONTENT\n");
11315#endif
11316 break;
11317 case XML_PARSER_ENTITY_DECL:
11318 xmlGenericError(xmlGenericErrorContext,
11319 "PP: internal error, state == ENTITY_DECL\n");
11320 ctxt->instate = XML_PARSER_DTD;
11321#ifdef DEBUG_PUSH
11322 xmlGenericError(xmlGenericErrorContext,
11323 "PP: entering DTD\n");
11324#endif
11325 break;
11326 case XML_PARSER_ENTITY_VALUE:
11327 xmlGenericError(xmlGenericErrorContext,
11328 "PP: internal error, state == ENTITY_VALUE\n");
11329 ctxt->instate = XML_PARSER_CONTENT;
11330#ifdef DEBUG_PUSH
11331 xmlGenericError(xmlGenericErrorContext,
11332 "PP: entering DTD\n");
11333#endif
11334 break;
11335 case XML_PARSER_ATTRIBUTE_VALUE:
11336 xmlGenericError(xmlGenericErrorContext,
11337 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11338 ctxt->instate = XML_PARSER_START_TAG;
11339#ifdef DEBUG_PUSH
11340 xmlGenericError(xmlGenericErrorContext,
11341 "PP: entering START_TAG\n");
11342#endif
11343 break;
11344 case XML_PARSER_SYSTEM_LITERAL:
11345 xmlGenericError(xmlGenericErrorContext,
11346 "PP: internal error, state == SYSTEM_LITERAL\n");
11347 ctxt->instate = XML_PARSER_START_TAG;
11348#ifdef DEBUG_PUSH
11349 xmlGenericError(xmlGenericErrorContext,
11350 "PP: entering START_TAG\n");
11351#endif
11352 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011353 case XML_PARSER_PUBLIC_LITERAL:
11354 xmlGenericError(xmlGenericErrorContext,
11355 "PP: internal error, state == PUBLIC_LITERAL\n");
11356 ctxt->instate = XML_PARSER_START_TAG;
11357#ifdef DEBUG_PUSH
11358 xmlGenericError(xmlGenericErrorContext,
11359 "PP: entering START_TAG\n");
11360#endif
11361 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011362 }
11363 }
11364done:
11365#ifdef DEBUG_PUSH
11366 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11367#endif
11368 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011369encoding_error:
11370 {
11371 char buffer[150];
11372
11373 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11374 ctxt->input->cur[0], ctxt->input->cur[1],
11375 ctxt->input->cur[2], ctxt->input->cur[3]);
11376 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11377 "Input is not proper UTF-8, indicate encoding !\n%s",
11378 BAD_CAST buffer, NULL);
11379 }
11380 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011381}
11382
11383/**
Owen Taylor3473f882001-02-23 17:55:21 +000011384 * xmlParseChunk:
11385 * @ctxt: an XML parser context
11386 * @chunk: an char array
11387 * @size: the size in byte of the chunk
11388 * @terminate: last chunk indicator
11389 *
11390 * Parse a Chunk of memory
11391 *
11392 * Returns zero if no error, the xmlParserErrors otherwise.
11393 */
11394int
11395xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11396 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011397 int end_in_lf = 0;
11398
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011399 if (ctxt == NULL)
11400 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011401 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011402 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011403 if (ctxt->instate == XML_PARSER_START)
11404 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011405 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11406 (chunk[size - 1] == '\r')) {
11407 end_in_lf = 1;
11408 size--;
11409 }
Owen Taylor3473f882001-02-23 17:55:21 +000011410 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11411 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11412 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11413 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011414 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011415
William M. Bracka3215c72004-07-31 16:24:01 +000011416 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11417 if (res < 0) {
11418 ctxt->errNo = XML_PARSER_EOF;
11419 ctxt->disableSAX = 1;
11420 return (XML_PARSER_EOF);
11421 }
Owen Taylor3473f882001-02-23 17:55:21 +000011422 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11423 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011424 ctxt->input->end =
11425 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011426#ifdef DEBUG_PUSH
11427 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11428#endif
11429
Owen Taylor3473f882001-02-23 17:55:21 +000011430 } else if (ctxt->instate != XML_PARSER_EOF) {
11431 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11432 xmlParserInputBufferPtr in = ctxt->input->buf;
11433 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11434 (in->raw != NULL)) {
11435 int nbchars;
11436
11437 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11438 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011439 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011440 xmlGenericError(xmlGenericErrorContext,
11441 "xmlParseChunk: encoder error\n");
11442 return(XML_ERR_INVALID_ENCODING);
11443 }
11444 }
11445 }
11446 }
11447 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011448 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11449 (ctxt->input->buf != NULL)) {
11450 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11451 }
Daniel Veillard14412512005-01-21 23:53:26 +000011452 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011453 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011454 if (terminate) {
11455 /*
11456 * Check for termination
11457 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011458 int avail = 0;
11459
11460 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011461 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011462 avail = ctxt->input->length -
11463 (ctxt->input->cur - ctxt->input->base);
11464 else
11465 avail = ctxt->input->buf->buffer->use -
11466 (ctxt->input->cur - ctxt->input->base);
11467 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011468
Owen Taylor3473f882001-02-23 17:55:21 +000011469 if ((ctxt->instate != XML_PARSER_EOF) &&
11470 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011471 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011472 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011473 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011474 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011475 }
Owen Taylor3473f882001-02-23 17:55:21 +000011476 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011477 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011478 ctxt->sax->endDocument(ctxt->userData);
11479 }
11480 ctxt->instate = XML_PARSER_EOF;
11481 }
11482 return((xmlParserErrors) ctxt->errNo);
11483}
11484
11485/************************************************************************
11486 * *
11487 * I/O front end functions to the parser *
11488 * *
11489 ************************************************************************/
11490
11491/**
Owen Taylor3473f882001-02-23 17:55:21 +000011492 * xmlCreatePushParserCtxt:
11493 * @sax: a SAX handler
11494 * @user_data: The user data returned on SAX callbacks
11495 * @chunk: a pointer to an array of chars
11496 * @size: number of chars in the array
11497 * @filename: an optional file name or URI
11498 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011499 * Create a parser context for using the XML parser in push mode.
11500 * If @buffer and @size are non-NULL, the data is used to detect
11501 * the encoding. The remaining characters will be parsed so they
11502 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011503 * To allow content encoding detection, @size should be >= 4
11504 * The value of @filename is used for fetching external entities
11505 * and error/warning reports.
11506 *
11507 * Returns the new parser context or NULL
11508 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011509
Owen Taylor3473f882001-02-23 17:55:21 +000011510xmlParserCtxtPtr
11511xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11512 const char *chunk, int size, const char *filename) {
11513 xmlParserCtxtPtr ctxt;
11514 xmlParserInputPtr inputStream;
11515 xmlParserInputBufferPtr buf;
11516 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11517
11518 /*
11519 * plug some encoding conversion routines
11520 */
11521 if ((chunk != NULL) && (size >= 4))
11522 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11523
11524 buf = xmlAllocParserInputBuffer(enc);
11525 if (buf == NULL) return(NULL);
11526
11527 ctxt = xmlNewParserCtxt();
11528 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011529 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011530 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011531 return(NULL);
11532 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011533 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011534 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11535 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011536 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011537 xmlFreeParserInputBuffer(buf);
11538 xmlFreeParserCtxt(ctxt);
11539 return(NULL);
11540 }
Owen Taylor3473f882001-02-23 17:55:21 +000011541 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011542#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011543 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011544#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011545 xmlFree(ctxt->sax);
11546 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11547 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011548 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011549 xmlFreeParserInputBuffer(buf);
11550 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011551 return(NULL);
11552 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011553 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11554 if (sax->initialized == XML_SAX2_MAGIC)
11555 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11556 else
11557 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011558 if (user_data != NULL)
11559 ctxt->userData = user_data;
11560 }
11561 if (filename == NULL) {
11562 ctxt->directory = NULL;
11563 } else {
11564 ctxt->directory = xmlParserGetDirectory(filename);
11565 }
11566
11567 inputStream = xmlNewInputStream(ctxt);
11568 if (inputStream == NULL) {
11569 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011570 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011571 return(NULL);
11572 }
11573
11574 if (filename == NULL)
11575 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011576 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011577 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011578 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011579 if (inputStream->filename == NULL) {
11580 xmlFreeParserCtxt(ctxt);
11581 xmlFreeParserInputBuffer(buf);
11582 return(NULL);
11583 }
11584 }
Owen Taylor3473f882001-02-23 17:55:21 +000011585 inputStream->buf = buf;
11586 inputStream->base = inputStream->buf->buffer->content;
11587 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011588 inputStream->end =
11589 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011590
11591 inputPush(ctxt, inputStream);
11592
William M. Brack3a1cd212005-02-11 14:35:54 +000011593 /*
11594 * If the caller didn't provide an initial 'chunk' for determining
11595 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11596 * that it can be automatically determined later
11597 */
11598 if ((size == 0) || (chunk == NULL)) {
11599 ctxt->charset = XML_CHAR_ENCODING_NONE;
11600 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011601 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11602 int cur = ctxt->input->cur - ctxt->input->base;
11603
Owen Taylor3473f882001-02-23 17:55:21 +000011604 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011605
11606 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11607 ctxt->input->cur = ctxt->input->base + cur;
11608 ctxt->input->end =
11609 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011610#ifdef DEBUG_PUSH
11611 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11612#endif
11613 }
11614
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011615 if (enc != XML_CHAR_ENCODING_NONE) {
11616 xmlSwitchEncoding(ctxt, enc);
11617 }
11618
Owen Taylor3473f882001-02-23 17:55:21 +000011619 return(ctxt);
11620}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011621#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011622
11623/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011624 * xmlStopParser:
11625 * @ctxt: an XML parser context
11626 *
11627 * Blocks further parser processing
11628 */
11629void
11630xmlStopParser(xmlParserCtxtPtr ctxt) {
11631 if (ctxt == NULL)
11632 return;
11633 ctxt->instate = XML_PARSER_EOF;
11634 ctxt->disableSAX = 1;
11635 if (ctxt->input != NULL) {
11636 ctxt->input->cur = BAD_CAST"";
11637 ctxt->input->base = ctxt->input->cur;
11638 }
11639}
11640
11641/**
Owen Taylor3473f882001-02-23 17:55:21 +000011642 * xmlCreateIOParserCtxt:
11643 * @sax: a SAX handler
11644 * @user_data: The user data returned on SAX callbacks
11645 * @ioread: an I/O read function
11646 * @ioclose: an I/O close function
11647 * @ioctx: an I/O handler
11648 * @enc: the charset encoding if known
11649 *
11650 * Create a parser context for using the XML parser with an existing
11651 * I/O stream
11652 *
11653 * Returns the new parser context or NULL
11654 */
11655xmlParserCtxtPtr
11656xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11657 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11658 void *ioctx, xmlCharEncoding enc) {
11659 xmlParserCtxtPtr ctxt;
11660 xmlParserInputPtr inputStream;
11661 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011662
11663 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011664
11665 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11666 if (buf == NULL) return(NULL);
11667
11668 ctxt = xmlNewParserCtxt();
11669 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011670 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011671 return(NULL);
11672 }
11673 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011674#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011675 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011676#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011677 xmlFree(ctxt->sax);
11678 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11679 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011680 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011681 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011682 return(NULL);
11683 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011684 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11685 if (sax->initialized == XML_SAX2_MAGIC)
11686 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11687 else
11688 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011689 if (user_data != NULL)
11690 ctxt->userData = user_data;
11691 }
11692
11693 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11694 if (inputStream == NULL) {
11695 xmlFreeParserCtxt(ctxt);
11696 return(NULL);
11697 }
11698 inputPush(ctxt, inputStream);
11699
11700 return(ctxt);
11701}
11702
Daniel Veillard4432df22003-09-28 18:58:27 +000011703#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011704/************************************************************************
11705 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011706 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011707 * *
11708 ************************************************************************/
11709
11710/**
11711 * xmlIOParseDTD:
11712 * @sax: the SAX handler block or NULL
11713 * @input: an Input Buffer
11714 * @enc: the charset encoding if known
11715 *
11716 * Load and parse a DTD
11717 *
11718 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011719 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011720 */
11721
11722xmlDtdPtr
11723xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11724 xmlCharEncoding enc) {
11725 xmlDtdPtr ret = NULL;
11726 xmlParserCtxtPtr ctxt;
11727 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011728 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011729
11730 if (input == NULL)
11731 return(NULL);
11732
11733 ctxt = xmlNewParserCtxt();
11734 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011735 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011736 return(NULL);
11737 }
11738
11739 /*
11740 * Set-up the SAX context
11741 */
11742 if (sax != NULL) {
11743 if (ctxt->sax != NULL)
11744 xmlFree(ctxt->sax);
11745 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011746 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011747 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011748 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011749
11750 /*
11751 * generate a parser input from the I/O handler
11752 */
11753
Daniel Veillard43caefb2003-12-07 19:32:22 +000011754 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011755 if (pinput == NULL) {
11756 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011757 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011758 xmlFreeParserCtxt(ctxt);
11759 return(NULL);
11760 }
11761
11762 /*
11763 * plug some encoding conversion routines here.
11764 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011765 if (xmlPushInput(ctxt, pinput) < 0) {
11766 if (sax != NULL) ctxt->sax = NULL;
11767 xmlFreeParserCtxt(ctxt);
11768 return(NULL);
11769 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011770 if (enc != XML_CHAR_ENCODING_NONE) {
11771 xmlSwitchEncoding(ctxt, enc);
11772 }
Owen Taylor3473f882001-02-23 17:55:21 +000011773
11774 pinput->filename = NULL;
11775 pinput->line = 1;
11776 pinput->col = 1;
11777 pinput->base = ctxt->input->cur;
11778 pinput->cur = ctxt->input->cur;
11779 pinput->free = NULL;
11780
11781 /*
11782 * let's parse that entity knowing it's an external subset.
11783 */
11784 ctxt->inSubset = 2;
11785 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011786 if (ctxt->myDoc == NULL) {
11787 xmlErrMemory(ctxt, "New Doc failed");
11788 return(NULL);
11789 }
11790 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011791 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11792 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011793
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011794 if ((enc == XML_CHAR_ENCODING_NONE) &&
11795 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011796 /*
11797 * Get the 4 first bytes and decode the charset
11798 * if enc != XML_CHAR_ENCODING_NONE
11799 * plug some encoding conversion routines.
11800 */
11801 start[0] = RAW;
11802 start[1] = NXT(1);
11803 start[2] = NXT(2);
11804 start[3] = NXT(3);
11805 enc = xmlDetectCharEncoding(start, 4);
11806 if (enc != XML_CHAR_ENCODING_NONE) {
11807 xmlSwitchEncoding(ctxt, enc);
11808 }
11809 }
11810
Owen Taylor3473f882001-02-23 17:55:21 +000011811 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11812
11813 if (ctxt->myDoc != NULL) {
11814 if (ctxt->wellFormed) {
11815 ret = ctxt->myDoc->extSubset;
11816 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011817 if (ret != NULL) {
11818 xmlNodePtr tmp;
11819
11820 ret->doc = NULL;
11821 tmp = ret->children;
11822 while (tmp != NULL) {
11823 tmp->doc = NULL;
11824 tmp = tmp->next;
11825 }
11826 }
Owen Taylor3473f882001-02-23 17:55:21 +000011827 } else {
11828 ret = NULL;
11829 }
11830 xmlFreeDoc(ctxt->myDoc);
11831 ctxt->myDoc = NULL;
11832 }
11833 if (sax != NULL) ctxt->sax = NULL;
11834 xmlFreeParserCtxt(ctxt);
11835
11836 return(ret);
11837}
11838
11839/**
11840 * xmlSAXParseDTD:
11841 * @sax: the SAX handler block
11842 * @ExternalID: a NAME* containing the External ID of the DTD
11843 * @SystemID: a NAME* containing the URL to the DTD
11844 *
11845 * Load and parse an external subset.
11846 *
11847 * Returns the resulting xmlDtdPtr or NULL in case of error.
11848 */
11849
11850xmlDtdPtr
11851xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11852 const xmlChar *SystemID) {
11853 xmlDtdPtr ret = NULL;
11854 xmlParserCtxtPtr ctxt;
11855 xmlParserInputPtr input = NULL;
11856 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011857 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011858
11859 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11860
11861 ctxt = xmlNewParserCtxt();
11862 if (ctxt == NULL) {
11863 return(NULL);
11864 }
11865
11866 /*
11867 * Set-up the SAX context
11868 */
11869 if (sax != NULL) {
11870 if (ctxt->sax != NULL)
11871 xmlFree(ctxt->sax);
11872 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011873 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011874 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011875
11876 /*
11877 * Canonicalise the system ID
11878 */
11879 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011880 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011881 xmlFreeParserCtxt(ctxt);
11882 return(NULL);
11883 }
Owen Taylor3473f882001-02-23 17:55:21 +000011884
11885 /*
11886 * Ask the Entity resolver to load the damn thing
11887 */
11888
11889 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011890 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11891 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011892 if (input == NULL) {
11893 if (sax != NULL) ctxt->sax = NULL;
11894 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011895 if (systemIdCanonic != NULL)
11896 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011897 return(NULL);
11898 }
11899
11900 /*
11901 * plug some encoding conversion routines here.
11902 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011903 if (xmlPushInput(ctxt, input) < 0) {
11904 if (sax != NULL) ctxt->sax = NULL;
11905 xmlFreeParserCtxt(ctxt);
11906 if (systemIdCanonic != NULL)
11907 xmlFree(systemIdCanonic);
11908 return(NULL);
11909 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011910 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11911 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11912 xmlSwitchEncoding(ctxt, enc);
11913 }
Owen Taylor3473f882001-02-23 17:55:21 +000011914
11915 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011916 input->filename = (char *) systemIdCanonic;
11917 else
11918 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011919 input->line = 1;
11920 input->col = 1;
11921 input->base = ctxt->input->cur;
11922 input->cur = ctxt->input->cur;
11923 input->free = NULL;
11924
11925 /*
11926 * let's parse that entity knowing it's an external subset.
11927 */
11928 ctxt->inSubset = 2;
11929 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011930 if (ctxt->myDoc == NULL) {
11931 xmlErrMemory(ctxt, "New Doc failed");
11932 if (sax != NULL) ctxt->sax = NULL;
11933 xmlFreeParserCtxt(ctxt);
11934 return(NULL);
11935 }
11936 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011937 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11938 ExternalID, SystemID);
11939 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11940
11941 if (ctxt->myDoc != NULL) {
11942 if (ctxt->wellFormed) {
11943 ret = ctxt->myDoc->extSubset;
11944 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011945 if (ret != NULL) {
11946 xmlNodePtr tmp;
11947
11948 ret->doc = NULL;
11949 tmp = ret->children;
11950 while (tmp != NULL) {
11951 tmp->doc = NULL;
11952 tmp = tmp->next;
11953 }
11954 }
Owen Taylor3473f882001-02-23 17:55:21 +000011955 } else {
11956 ret = NULL;
11957 }
11958 xmlFreeDoc(ctxt->myDoc);
11959 ctxt->myDoc = NULL;
11960 }
11961 if (sax != NULL) ctxt->sax = NULL;
11962 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000011963
Owen Taylor3473f882001-02-23 17:55:21 +000011964 return(ret);
11965}
11966
Daniel Veillard4432df22003-09-28 18:58:27 +000011967
Owen Taylor3473f882001-02-23 17:55:21 +000011968/**
11969 * xmlParseDTD:
11970 * @ExternalID: a NAME* containing the External ID of the DTD
11971 * @SystemID: a NAME* containing the URL to the DTD
11972 *
11973 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000011974 *
Owen Taylor3473f882001-02-23 17:55:21 +000011975 * Returns the resulting xmlDtdPtr or NULL in case of error.
11976 */
11977
11978xmlDtdPtr
11979xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11980 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11981}
Daniel Veillard4432df22003-09-28 18:58:27 +000011982#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011983
11984/************************************************************************
11985 * *
11986 * Front ends when parsing an Entity *
11987 * *
11988 ************************************************************************/
11989
11990/**
Owen Taylor3473f882001-02-23 17:55:21 +000011991 * xmlParseCtxtExternalEntity:
11992 * @ctx: the existing parsing context
11993 * @URL: the URL for the entity to load
11994 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011995 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011996 *
11997 * Parse an external general entity within an existing parsing context
11998 * An external general parsed entity is well-formed if it matches the
11999 * production labeled extParsedEnt.
12000 *
12001 * [78] extParsedEnt ::= TextDecl? content
12002 *
12003 * Returns 0 if the entity is well formed, -1 in case of args problem and
12004 * the parser error code otherwise
12005 */
12006
12007int
12008xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012009 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012010 xmlParserCtxtPtr ctxt;
12011 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012012 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012013 xmlSAXHandlerPtr oldsax = NULL;
12014 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012015 xmlChar start[4];
12016 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012017 xmlParserInputPtr inputStream;
12018 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012019
Daniel Veillardce682bc2004-11-05 17:22:25 +000012020 if (ctx == NULL) return(-1);
12021
Daniel Veillard0161e632008-08-28 15:36:32 +000012022 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12023 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012024 return(XML_ERR_ENTITY_LOOP);
12025 }
12026
Daniel Veillardcda96922001-08-21 10:56:31 +000012027 if (lst != NULL)
12028 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012029 if ((URL == NULL) && (ID == NULL))
12030 return(-1);
12031 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12032 return(-1);
12033
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012034 ctxt = xmlNewParserCtxt();
12035 if (ctxt == NULL) {
12036 return(-1);
12037 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012038
Owen Taylor3473f882001-02-23 17:55:21 +000012039 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000012040 ctxt->_private = ctx->_private;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012041
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012042 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12043 if (inputStream == NULL) {
12044 xmlFreeParserCtxt(ctxt);
12045 return(-1);
12046 }
12047
12048 inputPush(ctxt, inputStream);
12049
12050 if ((ctxt->directory == NULL) && (directory == NULL))
12051 directory = xmlParserGetDirectory((char *)URL);
12052 if ((ctxt->directory == NULL) && (directory != NULL))
12053 ctxt->directory = directory;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012054
Owen Taylor3473f882001-02-23 17:55:21 +000012055 oldsax = ctxt->sax;
12056 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012057 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012058 newDoc = xmlNewDoc(BAD_CAST "1.0");
12059 if (newDoc == NULL) {
12060 xmlFreeParserCtxt(ctxt);
12061 return(-1);
12062 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012063 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012064 if (ctx->myDoc->dict) {
12065 newDoc->dict = ctx->myDoc->dict;
12066 xmlDictReference(newDoc->dict);
12067 }
Owen Taylor3473f882001-02-23 17:55:21 +000012068 if (ctx->myDoc != NULL) {
12069 newDoc->intSubset = ctx->myDoc->intSubset;
12070 newDoc->extSubset = ctx->myDoc->extSubset;
12071 }
12072 if (ctx->myDoc->URL != NULL) {
12073 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12074 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012075 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12076 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012077 ctxt->sax = oldsax;
12078 xmlFreeParserCtxt(ctxt);
12079 newDoc->intSubset = NULL;
12080 newDoc->extSubset = NULL;
12081 xmlFreeDoc(newDoc);
12082 return(-1);
12083 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012084 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012085 nodePush(ctxt, newDoc->children);
12086 if (ctx->myDoc == NULL) {
12087 ctxt->myDoc = newDoc;
12088 } else {
12089 ctxt->myDoc = ctx->myDoc;
12090 newDoc->children->doc = ctx->myDoc;
12091 }
12092
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012093 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012094 * Get the 4 first bytes and decode the charset
12095 * if enc != XML_CHAR_ENCODING_NONE
12096 * plug some encoding conversion routines.
12097 */
12098 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012099 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12100 start[0] = RAW;
12101 start[1] = NXT(1);
12102 start[2] = NXT(2);
12103 start[3] = NXT(3);
12104 enc = xmlDetectCharEncoding(start, 4);
12105 if (enc != XML_CHAR_ENCODING_NONE) {
12106 xmlSwitchEncoding(ctxt, enc);
12107 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012108 }
12109
Owen Taylor3473f882001-02-23 17:55:21 +000012110 /*
12111 * Parse a possible text declaration first
12112 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012113 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012114 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012115 /*
12116 * An XML-1.0 document can't reference an entity not XML-1.0
12117 */
12118 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12119 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12120 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12121 "Version mismatch between document and entity\n");
12122 }
Owen Taylor3473f882001-02-23 17:55:21 +000012123 }
12124
12125 /*
12126 * Doing validity checking on chunk doesn't make sense
12127 */
12128 ctxt->instate = XML_PARSER_CONTENT;
12129 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012130 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012131 ctxt->loadsubset = ctx->loadsubset;
12132 ctxt->depth = ctx->depth + 1;
12133 ctxt->replaceEntities = ctx->replaceEntities;
12134 if (ctxt->validate) {
12135 ctxt->vctxt.error = ctx->vctxt.error;
12136 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012137 } else {
12138 ctxt->vctxt.error = NULL;
12139 ctxt->vctxt.warning = NULL;
12140 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012141 ctxt->vctxt.nodeTab = NULL;
12142 ctxt->vctxt.nodeNr = 0;
12143 ctxt->vctxt.nodeMax = 0;
12144 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012145 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12146 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012147 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12148 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12149 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012150 ctxt->dictNames = ctx->dictNames;
12151 ctxt->attsDefault = ctx->attsDefault;
12152 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012153 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012154
12155 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012156
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012157 ctx->validate = ctxt->validate;
12158 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012159 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012160 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012161 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012162 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012163 }
12164 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012165 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012166 }
12167
12168 if (!ctxt->wellFormed) {
12169 if (ctxt->errNo == 0)
12170 ret = 1;
12171 else
12172 ret = ctxt->errNo;
12173 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012174 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012175 xmlNodePtr cur;
12176
12177 /*
12178 * Return the newly created nodeset after unlinking it from
12179 * they pseudo parent.
12180 */
12181 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012182 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012183 while (cur != NULL) {
12184 cur->parent = NULL;
12185 cur = cur->next;
12186 }
12187 newDoc->children->children = NULL;
12188 }
12189 ret = 0;
12190 }
12191 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012192 ctxt->dict = NULL;
12193 ctxt->attsDefault = NULL;
12194 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012195 xmlFreeParserCtxt(ctxt);
12196 newDoc->intSubset = NULL;
12197 newDoc->extSubset = NULL;
12198 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012199
Owen Taylor3473f882001-02-23 17:55:21 +000012200 return(ret);
12201}
12202
12203/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012204 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012205 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012206 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012207 * @sax: the SAX handler bloc (possibly NULL)
12208 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12209 * @depth: Used for loop detection, use 0
12210 * @URL: the URL for the entity to load
12211 * @ID: the System ID for the entity to load
12212 * @list: the return value for the set of parsed nodes
12213 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012214 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012215 *
12216 * Returns 0 if the entity is well formed, -1 in case of args problem and
12217 * the parser error code otherwise
12218 */
12219
Daniel Veillard7d515752003-09-26 19:12:37 +000012220static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012221xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12222 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012223 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012224 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012225 xmlParserCtxtPtr ctxt;
12226 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012227 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012228 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012229 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012230 xmlChar start[4];
12231 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012232
Daniel Veillard0161e632008-08-28 15:36:32 +000012233 if (((depth > 40) &&
12234 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12235 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012236 return(XML_ERR_ENTITY_LOOP);
12237 }
12238
Owen Taylor3473f882001-02-23 17:55:21 +000012239 if (list != NULL)
12240 *list = NULL;
12241 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012242 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012243 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012244 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012245
12246
12247 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000012248 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012249 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012250 if (oldctxt != NULL) {
12251 ctxt->_private = oldctxt->_private;
12252 ctxt->loadsubset = oldctxt->loadsubset;
12253 ctxt->validate = oldctxt->validate;
12254 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012255 ctxt->record_info = oldctxt->record_info;
12256 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12257 ctxt->node_seq.length = oldctxt->node_seq.length;
12258 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012259 } else {
12260 /*
12261 * Doing validity checking on chunk without context
12262 * doesn't make sense
12263 */
12264 ctxt->_private = NULL;
12265 ctxt->validate = 0;
12266 ctxt->external = 2;
12267 ctxt->loadsubset = 0;
12268 }
Owen Taylor3473f882001-02-23 17:55:21 +000012269 if (sax != NULL) {
12270 oldsax = ctxt->sax;
12271 ctxt->sax = sax;
12272 if (user_data != NULL)
12273 ctxt->userData = user_data;
12274 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012275 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012276 newDoc = xmlNewDoc(BAD_CAST "1.0");
12277 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012278 ctxt->node_seq.maximum = 0;
12279 ctxt->node_seq.length = 0;
12280 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012281 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012282 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012283 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012284 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012285 newDoc->intSubset = doc->intSubset;
12286 newDoc->extSubset = doc->extSubset;
12287 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012288 xmlDictReference(newDoc->dict);
12289
Owen Taylor3473f882001-02-23 17:55:21 +000012290 if (doc->URL != NULL) {
12291 newDoc->URL = xmlStrdup(doc->URL);
12292 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012293 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12294 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012295 if (sax != NULL)
12296 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012297 ctxt->node_seq.maximum = 0;
12298 ctxt->node_seq.length = 0;
12299 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012300 xmlFreeParserCtxt(ctxt);
12301 newDoc->intSubset = NULL;
12302 newDoc->extSubset = NULL;
12303 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012304 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012305 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012306 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012307 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012308 ctxt->myDoc = doc;
12309 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012310
Daniel Veillard0161e632008-08-28 15:36:32 +000012311 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012312 * Get the 4 first bytes and decode the charset
12313 * if enc != XML_CHAR_ENCODING_NONE
12314 * plug some encoding conversion routines.
12315 */
12316 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012317 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12318 start[0] = RAW;
12319 start[1] = NXT(1);
12320 start[2] = NXT(2);
12321 start[3] = NXT(3);
12322 enc = xmlDetectCharEncoding(start, 4);
12323 if (enc != XML_CHAR_ENCODING_NONE) {
12324 xmlSwitchEncoding(ctxt, enc);
12325 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012326 }
12327
Owen Taylor3473f882001-02-23 17:55:21 +000012328 /*
12329 * Parse a possible text declaration first
12330 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012331 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012332 xmlParseTextDecl(ctxt);
12333 }
12334
Owen Taylor3473f882001-02-23 17:55:21 +000012335 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012336 ctxt->depth = depth;
12337
12338 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012339
Daniel Veillard561b7f82002-03-20 21:55:57 +000012340 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012341 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012342 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012343 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012344 }
12345 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012346 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012347 }
12348
12349 if (!ctxt->wellFormed) {
12350 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012351 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012352 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012353 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012354 } else {
12355 if (list != NULL) {
12356 xmlNodePtr cur;
12357
12358 /*
12359 * Return the newly created nodeset after unlinking it from
12360 * they pseudo parent.
12361 */
12362 cur = newDoc->children->children;
12363 *list = cur;
12364 while (cur != NULL) {
12365 cur->parent = NULL;
12366 cur = cur->next;
12367 }
12368 newDoc->children->children = NULL;
12369 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012370 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012371 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012372
12373 /*
12374 * Record in the parent context the number of entities replacement
12375 * done when parsing that reference.
12376 */
12377 oldctxt->nbentities += ctxt->nbentities;
12378 /*
12379 * Also record the size of the entity parsed
12380 */
12381 if (ctxt->input != NULL) {
12382 oldctxt->sizeentities += ctxt->input->consumed;
12383 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12384 }
12385 /*
12386 * And record the last error if any
12387 */
12388 if (ctxt->lastError.code != XML_ERR_OK)
12389 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12390
Owen Taylor3473f882001-02-23 17:55:21 +000012391 if (sax != NULL)
12392 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012393 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12394 oldctxt->node_seq.length = ctxt->node_seq.length;
12395 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012396 ctxt->node_seq.maximum = 0;
12397 ctxt->node_seq.length = 0;
12398 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012399 xmlFreeParserCtxt(ctxt);
12400 newDoc->intSubset = NULL;
12401 newDoc->extSubset = NULL;
12402 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012403
Owen Taylor3473f882001-02-23 17:55:21 +000012404 return(ret);
12405}
12406
Daniel Veillard81273902003-09-30 00:43:48 +000012407#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012408/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012409 * xmlParseExternalEntity:
12410 * @doc: the document the chunk pertains to
12411 * @sax: the SAX handler bloc (possibly NULL)
12412 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12413 * @depth: Used for loop detection, use 0
12414 * @URL: the URL for the entity to load
12415 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012416 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012417 *
12418 * Parse an external general entity
12419 * An external general parsed entity is well-formed if it matches the
12420 * production labeled extParsedEnt.
12421 *
12422 * [78] extParsedEnt ::= TextDecl? content
12423 *
12424 * Returns 0 if the entity is well formed, -1 in case of args problem and
12425 * the parser error code otherwise
12426 */
12427
12428int
12429xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012430 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012431 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012432 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012433}
12434
12435/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012436 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012437 * @doc: the document the chunk pertains to
12438 * @sax: the SAX handler bloc (possibly NULL)
12439 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12440 * @depth: Used for loop detection, use 0
12441 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012442 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012443 *
12444 * Parse a well-balanced chunk of an XML document
12445 * called by the parser
12446 * The allowed sequence for the Well Balanced Chunk is the one defined by
12447 * the content production in the XML grammar:
12448 *
12449 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12450 *
12451 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12452 * the parser error code otherwise
12453 */
12454
12455int
12456xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012457 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012458 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12459 depth, string, lst, 0 );
12460}
Daniel Veillard81273902003-09-30 00:43:48 +000012461#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012462
12463/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012464 * xmlParseBalancedChunkMemoryInternal:
12465 * @oldctxt: the existing parsing context
12466 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12467 * @user_data: the user data field for the parser context
12468 * @lst: the return value for the set of parsed nodes
12469 *
12470 *
12471 * Parse a well-balanced chunk of an XML document
12472 * called by the parser
12473 * The allowed sequence for the Well Balanced Chunk is the one defined by
12474 * the content production in the XML grammar:
12475 *
12476 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12477 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012478 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12479 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012480 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012481 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012482 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012483 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012484static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012485xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12486 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12487 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012488 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012489 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012490 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012491 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012492 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012493 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012494 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012495
Daniel Veillard0161e632008-08-28 15:36:32 +000012496 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12497 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012498 return(XML_ERR_ENTITY_LOOP);
12499 }
12500
12501
12502 if (lst != NULL)
12503 *lst = NULL;
12504 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012505 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012506
12507 size = xmlStrlen(string);
12508
12509 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012510 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012511 if (user_data != NULL)
12512 ctxt->userData = user_data;
12513 else
12514 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012515 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12516 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012517 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12518 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12519 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012520
12521 oldsax = ctxt->sax;
12522 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012523 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012524 ctxt->replaceEntities = oldctxt->replaceEntities;
12525 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012526
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012527 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012528 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012529 newDoc = xmlNewDoc(BAD_CAST "1.0");
12530 if (newDoc == NULL) {
12531 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012532 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012533 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012534 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012535 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012536 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012537 newDoc->dict = ctxt->dict;
12538 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012539 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012540 } else {
12541 ctxt->myDoc = oldctxt->myDoc;
12542 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012543 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012544 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012545 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12546 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012547 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012548 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012549 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012550 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012551 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012552 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012553 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012554 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012555 ctxt->myDoc->children = NULL;
12556 ctxt->myDoc->last = NULL;
12557 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012558 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012559 ctxt->instate = XML_PARSER_CONTENT;
12560 ctxt->depth = oldctxt->depth + 1;
12561
Daniel Veillard328f48c2002-11-15 15:24:34 +000012562 ctxt->validate = 0;
12563 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012564 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12565 /*
12566 * ID/IDREF registration will be done in xmlValidateElement below
12567 */
12568 ctxt->loadsubset |= XML_SKIP_IDS;
12569 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012570 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012571 ctxt->attsDefault = oldctxt->attsDefault;
12572 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012573
Daniel Veillard68e9e742002-11-16 15:35:11 +000012574 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012575 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012576 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012577 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012578 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012579 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012580 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012581 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012582 }
12583
12584 if (!ctxt->wellFormed) {
12585 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012586 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012587 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012588 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012589 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012590 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012591 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012592
William M. Brack7b9154b2003-09-27 19:23:50 +000012593 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012594 xmlNodePtr cur;
12595
12596 /*
12597 * Return the newly created nodeset after unlinking it from
12598 * they pseudo parent.
12599 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012600 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012601 *lst = cur;
12602 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012603#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012604 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12605 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12606 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012607 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12608 oldctxt->myDoc, cur);
12609 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012610#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012611 cur->parent = NULL;
12612 cur = cur->next;
12613 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012614 ctxt->myDoc->children->children = NULL;
12615 }
12616 if (ctxt->myDoc != NULL) {
12617 xmlFreeNode(ctxt->myDoc->children);
12618 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012619 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012620 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012621
12622 /*
12623 * Record in the parent context the number of entities replacement
12624 * done when parsing that reference.
12625 */
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012626 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard0161e632008-08-28 15:36:32 +000012627 /*
12628 * Also record the last error if any
12629 */
12630 if (ctxt->lastError.code != XML_ERR_OK)
12631 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12632
Daniel Veillard328f48c2002-11-15 15:24:34 +000012633 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012634 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012635 ctxt->attsDefault = NULL;
12636 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012637 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012638 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012639 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012640 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012641
Daniel Veillard328f48c2002-11-15 15:24:34 +000012642 return(ret);
12643}
12644
Daniel Veillard29b17482004-08-16 00:39:03 +000012645/**
12646 * xmlParseInNodeContext:
12647 * @node: the context node
12648 * @data: the input string
12649 * @datalen: the input string length in bytes
12650 * @options: a combination of xmlParserOption
12651 * @lst: the return value for the set of parsed nodes
12652 *
12653 * Parse a well-balanced chunk of an XML document
12654 * within the context (DTD, namespaces, etc ...) of the given node.
12655 *
12656 * The allowed sequence for the data is a Well Balanced Chunk defined by
12657 * the content production in the XML grammar:
12658 *
12659 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12660 *
12661 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12662 * error code otherwise
12663 */
12664xmlParserErrors
12665xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12666 int options, xmlNodePtr *lst) {
12667#ifdef SAX2
12668 xmlParserCtxtPtr ctxt;
12669 xmlDocPtr doc = NULL;
12670 xmlNodePtr fake, cur;
12671 int nsnr = 0;
12672
12673 xmlParserErrors ret = XML_ERR_OK;
12674
12675 /*
12676 * check all input parameters, grab the document
12677 */
12678 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12679 return(XML_ERR_INTERNAL_ERROR);
12680 switch (node->type) {
12681 case XML_ELEMENT_NODE:
12682 case XML_ATTRIBUTE_NODE:
12683 case XML_TEXT_NODE:
12684 case XML_CDATA_SECTION_NODE:
12685 case XML_ENTITY_REF_NODE:
12686 case XML_PI_NODE:
12687 case XML_COMMENT_NODE:
12688 case XML_DOCUMENT_NODE:
12689 case XML_HTML_DOCUMENT_NODE:
12690 break;
12691 default:
12692 return(XML_ERR_INTERNAL_ERROR);
12693
12694 }
12695 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12696 (node->type != XML_DOCUMENT_NODE) &&
12697 (node->type != XML_HTML_DOCUMENT_NODE))
12698 node = node->parent;
12699 if (node == NULL)
12700 return(XML_ERR_INTERNAL_ERROR);
12701 if (node->type == XML_ELEMENT_NODE)
12702 doc = node->doc;
12703 else
12704 doc = (xmlDocPtr) node;
12705 if (doc == NULL)
12706 return(XML_ERR_INTERNAL_ERROR);
12707
12708 /*
12709 * allocate a context and set-up everything not related to the
12710 * node position in the tree
12711 */
12712 if (doc->type == XML_DOCUMENT_NODE)
12713 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12714#ifdef LIBXML_HTML_ENABLED
12715 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12716 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12717#endif
12718 else
12719 return(XML_ERR_INTERNAL_ERROR);
12720
12721 if (ctxt == NULL)
12722 return(XML_ERR_NO_MEMORY);
12723 fake = xmlNewComment(NULL);
12724 if (fake == NULL) {
12725 xmlFreeParserCtxt(ctxt);
12726 return(XML_ERR_NO_MEMORY);
12727 }
12728 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012729
12730 /*
12731 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12732 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12733 * we must wait until the last moment to free the original one.
12734 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012735 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012736 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012737 xmlDictFree(ctxt->dict);
12738 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012739 } else
12740 options |= XML_PARSE_NODICT;
12741
Daniel Veillard37334572008-07-31 08:20:02 +000012742 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012743 xmlDetectSAX2(ctxt);
12744 ctxt->myDoc = doc;
12745
12746 if (node->type == XML_ELEMENT_NODE) {
12747 nodePush(ctxt, node);
12748 /*
12749 * initialize the SAX2 namespaces stack
12750 */
12751 cur = node;
12752 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12753 xmlNsPtr ns = cur->nsDef;
12754 const xmlChar *iprefix, *ihref;
12755
12756 while (ns != NULL) {
12757 if (ctxt->dict) {
12758 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12759 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12760 } else {
12761 iprefix = ns->prefix;
12762 ihref = ns->href;
12763 }
12764
12765 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12766 nsPush(ctxt, iprefix, ihref);
12767 nsnr++;
12768 }
12769 ns = ns->next;
12770 }
12771 cur = cur->parent;
12772 }
12773 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012774 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012775
12776 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12777 /*
12778 * ID/IDREF registration will be done in xmlValidateElement below
12779 */
12780 ctxt->loadsubset |= XML_SKIP_IDS;
12781 }
12782
Daniel Veillard499cc922006-01-18 17:22:35 +000012783#ifdef LIBXML_HTML_ENABLED
12784 if (doc->type == XML_HTML_DOCUMENT_NODE)
12785 __htmlParseContent(ctxt);
12786 else
12787#endif
12788 xmlParseContent(ctxt);
12789
Daniel Veillard29b17482004-08-16 00:39:03 +000012790 nsPop(ctxt, nsnr);
12791 if ((RAW == '<') && (NXT(1) == '/')) {
12792 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12793 } else if (RAW != 0) {
12794 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12795 }
12796 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12797 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12798 ctxt->wellFormed = 0;
12799 }
12800
12801 if (!ctxt->wellFormed) {
12802 if (ctxt->errNo == 0)
12803 ret = XML_ERR_INTERNAL_ERROR;
12804 else
12805 ret = (xmlParserErrors)ctxt->errNo;
12806 } else {
12807 ret = XML_ERR_OK;
12808 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012809
Daniel Veillard29b17482004-08-16 00:39:03 +000012810 /*
12811 * Return the newly created nodeset after unlinking it from
12812 * the pseudo sibling.
12813 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012814
Daniel Veillard29b17482004-08-16 00:39:03 +000012815 cur = fake->next;
12816 fake->next = NULL;
12817 node->last = fake;
12818
12819 if (cur != NULL) {
12820 cur->prev = NULL;
12821 }
12822
12823 *lst = cur;
12824
12825 while (cur != NULL) {
12826 cur->parent = NULL;
12827 cur = cur->next;
12828 }
12829
12830 xmlUnlinkNode(fake);
12831 xmlFreeNode(fake);
12832
12833
12834 if (ret != XML_ERR_OK) {
12835 xmlFreeNodeList(*lst);
12836 *lst = NULL;
12837 }
William M. Brackc3f81342004-10-03 01:22:44 +000012838
William M. Brackb7b54de2004-10-06 16:38:01 +000012839 if (doc->dict != NULL)
12840 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012841 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012842
Daniel Veillard29b17482004-08-16 00:39:03 +000012843 return(ret);
12844#else /* !SAX2 */
12845 return(XML_ERR_INTERNAL_ERROR);
12846#endif
12847}
12848
Daniel Veillard81273902003-09-30 00:43:48 +000012849#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012850/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012851 * xmlParseBalancedChunkMemoryRecover:
12852 * @doc: the document the chunk pertains to
12853 * @sax: the SAX handler bloc (possibly NULL)
12854 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12855 * @depth: Used for loop detection, use 0
12856 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12857 * @lst: the return value for the set of parsed nodes
12858 * @recover: return nodes even if the data is broken (use 0)
12859 *
12860 *
12861 * Parse a well-balanced chunk of an XML document
12862 * called by the parser
12863 * The allowed sequence for the Well Balanced Chunk is the one defined by
12864 * the content production in the XML grammar:
12865 *
12866 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12867 *
12868 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12869 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012870 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012871 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012872 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12873 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012874 */
12875int
12876xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012877 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012878 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012879 xmlParserCtxtPtr ctxt;
12880 xmlDocPtr newDoc;
12881 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012882 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012883 int size;
12884 int ret = 0;
12885
Daniel Veillard0161e632008-08-28 15:36:32 +000012886 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000012887 return(XML_ERR_ENTITY_LOOP);
12888 }
12889
12890
Daniel Veillardcda96922001-08-21 10:56:31 +000012891 if (lst != NULL)
12892 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012893 if (string == NULL)
12894 return(-1);
12895
12896 size = xmlStrlen(string);
12897
12898 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12899 if (ctxt == NULL) return(-1);
12900 ctxt->userData = ctxt;
12901 if (sax != NULL) {
12902 oldsax = ctxt->sax;
12903 ctxt->sax = sax;
12904 if (user_data != NULL)
12905 ctxt->userData = user_data;
12906 }
12907 newDoc = xmlNewDoc(BAD_CAST "1.0");
12908 if (newDoc == NULL) {
12909 xmlFreeParserCtxt(ctxt);
12910 return(-1);
12911 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012912 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012913 if ((doc != NULL) && (doc->dict != NULL)) {
12914 xmlDictFree(ctxt->dict);
12915 ctxt->dict = doc->dict;
12916 xmlDictReference(ctxt->dict);
12917 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12918 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12919 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12920 ctxt->dictNames = 1;
12921 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012922 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012923 }
Owen Taylor3473f882001-02-23 17:55:21 +000012924 if (doc != NULL) {
12925 newDoc->intSubset = doc->intSubset;
12926 newDoc->extSubset = doc->extSubset;
12927 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012928 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12929 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012930 if (sax != NULL)
12931 ctxt->sax = oldsax;
12932 xmlFreeParserCtxt(ctxt);
12933 newDoc->intSubset = NULL;
12934 newDoc->extSubset = NULL;
12935 xmlFreeDoc(newDoc);
12936 return(-1);
12937 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012938 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12939 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012940 if (doc == NULL) {
12941 ctxt->myDoc = newDoc;
12942 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012943 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012944 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012945 /* Ensure that doc has XML spec namespace */
12946 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12947 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012948 }
12949 ctxt->instate = XML_PARSER_CONTENT;
12950 ctxt->depth = depth;
12951
12952 /*
12953 * Doing validity checking on chunk doesn't make sense
12954 */
12955 ctxt->validate = 0;
12956 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012957 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012958
Daniel Veillardb39bc392002-10-26 19:29:51 +000012959 if ( doc != NULL ){
12960 content = doc->children;
12961 doc->children = NULL;
12962 xmlParseContent(ctxt);
12963 doc->children = content;
12964 }
12965 else {
12966 xmlParseContent(ctxt);
12967 }
Owen Taylor3473f882001-02-23 17:55:21 +000012968 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012969 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012970 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012971 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012972 }
12973 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012974 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012975 }
12976
12977 if (!ctxt->wellFormed) {
12978 if (ctxt->errNo == 0)
12979 ret = 1;
12980 else
12981 ret = ctxt->errNo;
12982 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012983 ret = 0;
12984 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012985
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012986 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12987 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012988
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012989 /*
12990 * Return the newly created nodeset after unlinking it from
12991 * they pseudo parent.
12992 */
12993 cur = newDoc->children->children;
12994 *lst = cur;
12995 while (cur != NULL) {
12996 xmlSetTreeDoc(cur, doc);
12997 cur->parent = NULL;
12998 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012999 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013000 newDoc->children->children = NULL;
13001 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013002
13003 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013004 ctxt->sax = oldsax;
13005 xmlFreeParserCtxt(ctxt);
13006 newDoc->intSubset = NULL;
13007 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013008 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013009 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013010
Owen Taylor3473f882001-02-23 17:55:21 +000013011 return(ret);
13012}
13013
13014/**
13015 * xmlSAXParseEntity:
13016 * @sax: the SAX handler block
13017 * @filename: the filename
13018 *
13019 * parse an XML external entity out of context and build a tree.
13020 * It use the given SAX function block to handle the parsing callback.
13021 * If sax is NULL, fallback to the default DOM tree building routines.
13022 *
13023 * [78] extParsedEnt ::= TextDecl? content
13024 *
13025 * This correspond to a "Well Balanced" chunk
13026 *
13027 * Returns the resulting document tree
13028 */
13029
13030xmlDocPtr
13031xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13032 xmlDocPtr ret;
13033 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013034
13035 ctxt = xmlCreateFileParserCtxt(filename);
13036 if (ctxt == NULL) {
13037 return(NULL);
13038 }
13039 if (sax != NULL) {
13040 if (ctxt->sax != NULL)
13041 xmlFree(ctxt->sax);
13042 ctxt->sax = sax;
13043 ctxt->userData = NULL;
13044 }
13045
Owen Taylor3473f882001-02-23 17:55:21 +000013046 xmlParseExtParsedEnt(ctxt);
13047
13048 if (ctxt->wellFormed)
13049 ret = ctxt->myDoc;
13050 else {
13051 ret = NULL;
13052 xmlFreeDoc(ctxt->myDoc);
13053 ctxt->myDoc = NULL;
13054 }
13055 if (sax != NULL)
13056 ctxt->sax = NULL;
13057 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013058
Owen Taylor3473f882001-02-23 17:55:21 +000013059 return(ret);
13060}
13061
13062/**
13063 * xmlParseEntity:
13064 * @filename: the filename
13065 *
13066 * parse an XML external entity out of context and build a tree.
13067 *
13068 * [78] extParsedEnt ::= TextDecl? content
13069 *
13070 * This correspond to a "Well Balanced" chunk
13071 *
13072 * Returns the resulting document tree
13073 */
13074
13075xmlDocPtr
13076xmlParseEntity(const char *filename) {
13077 return(xmlSAXParseEntity(NULL, filename));
13078}
Daniel Veillard81273902003-09-30 00:43:48 +000013079#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013080
13081/**
13082 * xmlCreateEntityParserCtxt:
13083 * @URL: the entity URL
13084 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013085 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000013086 *
13087 * Create a parser context for an external entity
13088 * Automatic support for ZLIB/Compress compressed document is provided
13089 * by default if found at compile-time.
13090 *
13091 * Returns the new parser context or NULL
13092 */
13093xmlParserCtxtPtr
13094xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13095 const xmlChar *base) {
13096 xmlParserCtxtPtr ctxt;
13097 xmlParserInputPtr inputStream;
13098 char *directory = NULL;
13099 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013100
Owen Taylor3473f882001-02-23 17:55:21 +000013101 ctxt = xmlNewParserCtxt();
13102 if (ctxt == NULL) {
13103 return(NULL);
13104 }
13105
13106 uri = xmlBuildURI(URL, base);
13107
13108 if (uri == NULL) {
13109 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13110 if (inputStream == NULL) {
13111 xmlFreeParserCtxt(ctxt);
13112 return(NULL);
13113 }
13114
13115 inputPush(ctxt, inputStream);
13116
13117 if ((ctxt->directory == NULL) && (directory == NULL))
13118 directory = xmlParserGetDirectory((char *)URL);
13119 if ((ctxt->directory == NULL) && (directory != NULL))
13120 ctxt->directory = directory;
13121 } else {
13122 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13123 if (inputStream == NULL) {
13124 xmlFree(uri);
13125 xmlFreeParserCtxt(ctxt);
13126 return(NULL);
13127 }
13128
13129 inputPush(ctxt, inputStream);
13130
13131 if ((ctxt->directory == NULL) && (directory == NULL))
13132 directory = xmlParserGetDirectory((char *)uri);
13133 if ((ctxt->directory == NULL) && (directory != NULL))
13134 ctxt->directory = directory;
13135 xmlFree(uri);
13136 }
Owen Taylor3473f882001-02-23 17:55:21 +000013137 return(ctxt);
13138}
13139
13140/************************************************************************
13141 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013142 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013143 * *
13144 ************************************************************************/
13145
13146/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013147 * xmlCreateURLParserCtxt:
13148 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013149 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013150 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013151 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013152 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013153 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013154 *
13155 * Returns the new parser context or NULL
13156 */
13157xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013158xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013159{
13160 xmlParserCtxtPtr ctxt;
13161 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013162 char *directory = NULL;
13163
Owen Taylor3473f882001-02-23 17:55:21 +000013164 ctxt = xmlNewParserCtxt();
13165 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013166 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013167 return(NULL);
13168 }
13169
Daniel Veillarddf292f72005-01-16 19:00:15 +000013170 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013171 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013172 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013173
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013174 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013175 if (inputStream == NULL) {
13176 xmlFreeParserCtxt(ctxt);
13177 return(NULL);
13178 }
13179
Owen Taylor3473f882001-02-23 17:55:21 +000013180 inputPush(ctxt, inputStream);
13181 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013182 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013183 if ((ctxt->directory == NULL) && (directory != NULL))
13184 ctxt->directory = directory;
13185
13186 return(ctxt);
13187}
13188
Daniel Veillard61b93382003-11-03 14:28:31 +000013189/**
13190 * xmlCreateFileParserCtxt:
13191 * @filename: the filename
13192 *
13193 * Create a parser context for a file content.
13194 * Automatic support for ZLIB/Compress compressed document is provided
13195 * by default if found at compile-time.
13196 *
13197 * Returns the new parser context or NULL
13198 */
13199xmlParserCtxtPtr
13200xmlCreateFileParserCtxt(const char *filename)
13201{
13202 return(xmlCreateURLParserCtxt(filename, 0));
13203}
13204
Daniel Veillard81273902003-09-30 00:43:48 +000013205#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013206/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013207 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013208 * @sax: the SAX handler block
13209 * @filename: the filename
13210 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13211 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013212 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013213 *
13214 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13215 * compressed document is provided by default if found at compile-time.
13216 * It use the given SAX function block to handle the parsing callback.
13217 * If sax is NULL, fallback to the default DOM tree building routines.
13218 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013219 * User data (void *) is stored within the parser context in the
13220 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013221 *
Owen Taylor3473f882001-02-23 17:55:21 +000013222 * Returns the resulting document tree
13223 */
13224
13225xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013226xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13227 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013228 xmlDocPtr ret;
13229 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013230
Daniel Veillard635ef722001-10-29 11:48:19 +000013231 xmlInitParser();
13232
Owen Taylor3473f882001-02-23 17:55:21 +000013233 ctxt = xmlCreateFileParserCtxt(filename);
13234 if (ctxt == NULL) {
13235 return(NULL);
13236 }
13237 if (sax != NULL) {
13238 if (ctxt->sax != NULL)
13239 xmlFree(ctxt->sax);
13240 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013241 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013242 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013243 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013244 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013245 }
Owen Taylor3473f882001-02-23 17:55:21 +000013246
Daniel Veillard37d2d162008-03-14 10:54:00 +000013247 if (ctxt->directory == NULL)
13248 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013249
Daniel Veillarddad3f682002-11-17 16:47:27 +000013250 ctxt->recovery = recovery;
13251
Owen Taylor3473f882001-02-23 17:55:21 +000013252 xmlParseDocument(ctxt);
13253
William M. Brackc07329e2003-09-08 01:57:30 +000013254 if ((ctxt->wellFormed) || recovery) {
13255 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013256 if (ret != NULL) {
13257 if (ctxt->input->buf->compressed > 0)
13258 ret->compression = 9;
13259 else
13260 ret->compression = ctxt->input->buf->compressed;
13261 }
William M. Brackc07329e2003-09-08 01:57:30 +000013262 }
Owen Taylor3473f882001-02-23 17:55:21 +000013263 else {
13264 ret = NULL;
13265 xmlFreeDoc(ctxt->myDoc);
13266 ctxt->myDoc = NULL;
13267 }
13268 if (sax != NULL)
13269 ctxt->sax = NULL;
13270 xmlFreeParserCtxt(ctxt);
13271
13272 return(ret);
13273}
13274
13275/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013276 * xmlSAXParseFile:
13277 * @sax: the SAX handler block
13278 * @filename: the filename
13279 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13280 * documents
13281 *
13282 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13283 * compressed document is provided by default if found at compile-time.
13284 * It use the given SAX function block to handle the parsing callback.
13285 * If sax is NULL, fallback to the default DOM tree building routines.
13286 *
13287 * Returns the resulting document tree
13288 */
13289
13290xmlDocPtr
13291xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13292 int recovery) {
13293 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13294}
13295
13296/**
Owen Taylor3473f882001-02-23 17:55:21 +000013297 * xmlRecoverDoc:
13298 * @cur: a pointer to an array of xmlChar
13299 *
13300 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013301 * In the case the document is not Well Formed, a attempt to build a
13302 * tree is tried anyway
13303 *
13304 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013305 */
13306
13307xmlDocPtr
13308xmlRecoverDoc(xmlChar *cur) {
13309 return(xmlSAXParseDoc(NULL, cur, 1));
13310}
13311
13312/**
13313 * xmlParseFile:
13314 * @filename: the filename
13315 *
13316 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13317 * compressed document is provided by default if found at compile-time.
13318 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013319 * Returns the resulting document tree if the file was wellformed,
13320 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013321 */
13322
13323xmlDocPtr
13324xmlParseFile(const char *filename) {
13325 return(xmlSAXParseFile(NULL, filename, 0));
13326}
13327
13328/**
13329 * xmlRecoverFile:
13330 * @filename: the filename
13331 *
13332 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13333 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013334 * In the case the document is not Well Formed, it attempts to build
13335 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013336 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013337 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013338 */
13339
13340xmlDocPtr
13341xmlRecoverFile(const char *filename) {
13342 return(xmlSAXParseFile(NULL, filename, 1));
13343}
13344
13345
13346/**
13347 * xmlSetupParserForBuffer:
13348 * @ctxt: an XML parser context
13349 * @buffer: a xmlChar * buffer
13350 * @filename: a file name
13351 *
13352 * Setup the parser context to parse a new buffer; Clears any prior
13353 * contents from the parser context. The buffer parameter must not be
13354 * NULL, but the filename parameter can be
13355 */
13356void
13357xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13358 const char* filename)
13359{
13360 xmlParserInputPtr input;
13361
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013362 if ((ctxt == NULL) || (buffer == NULL))
13363 return;
13364
Owen Taylor3473f882001-02-23 17:55:21 +000013365 input = xmlNewInputStream(ctxt);
13366 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013367 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013368 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013369 return;
13370 }
13371
13372 xmlClearParserCtxt(ctxt);
13373 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013374 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013375 input->base = buffer;
13376 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013377 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013378 inputPush(ctxt, input);
13379}
13380
13381/**
13382 * xmlSAXUserParseFile:
13383 * @sax: a SAX handler
13384 * @user_data: The user data returned on SAX callbacks
13385 * @filename: a file name
13386 *
13387 * parse an XML file and call the given SAX handler routines.
13388 * Automatic support for ZLIB/Compress compressed document is provided
13389 *
13390 * Returns 0 in case of success or a error number otherwise
13391 */
13392int
13393xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13394 const char *filename) {
13395 int ret = 0;
13396 xmlParserCtxtPtr ctxt;
13397
13398 ctxt = xmlCreateFileParserCtxt(filename);
13399 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013400 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013401 xmlFree(ctxt->sax);
13402 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013403 xmlDetectSAX2(ctxt);
13404
Owen Taylor3473f882001-02-23 17:55:21 +000013405 if (user_data != NULL)
13406 ctxt->userData = user_data;
13407
13408 xmlParseDocument(ctxt);
13409
13410 if (ctxt->wellFormed)
13411 ret = 0;
13412 else {
13413 if (ctxt->errNo != 0)
13414 ret = ctxt->errNo;
13415 else
13416 ret = -1;
13417 }
13418 if (sax != NULL)
13419 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013420 if (ctxt->myDoc != NULL) {
13421 xmlFreeDoc(ctxt->myDoc);
13422 ctxt->myDoc = NULL;
13423 }
Owen Taylor3473f882001-02-23 17:55:21 +000013424 xmlFreeParserCtxt(ctxt);
13425
13426 return ret;
13427}
Daniel Veillard81273902003-09-30 00:43:48 +000013428#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013429
13430/************************************************************************
13431 * *
13432 * Front ends when parsing from memory *
13433 * *
13434 ************************************************************************/
13435
13436/**
13437 * xmlCreateMemoryParserCtxt:
13438 * @buffer: a pointer to a char array
13439 * @size: the size of the array
13440 *
13441 * Create a parser context for an XML in-memory document.
13442 *
13443 * Returns the new parser context or NULL
13444 */
13445xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013446xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013447 xmlParserCtxtPtr ctxt;
13448 xmlParserInputPtr input;
13449 xmlParserInputBufferPtr buf;
13450
13451 if (buffer == NULL)
13452 return(NULL);
13453 if (size <= 0)
13454 return(NULL);
13455
13456 ctxt = xmlNewParserCtxt();
13457 if (ctxt == NULL)
13458 return(NULL);
13459
Daniel Veillard53350552003-09-18 13:35:51 +000013460 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013461 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013462 if (buf == NULL) {
13463 xmlFreeParserCtxt(ctxt);
13464 return(NULL);
13465 }
Owen Taylor3473f882001-02-23 17:55:21 +000013466
13467 input = xmlNewInputStream(ctxt);
13468 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013469 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013470 xmlFreeParserCtxt(ctxt);
13471 return(NULL);
13472 }
13473
13474 input->filename = NULL;
13475 input->buf = buf;
13476 input->base = input->buf->buffer->content;
13477 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013478 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013479
13480 inputPush(ctxt, input);
13481 return(ctxt);
13482}
13483
Daniel Veillard81273902003-09-30 00:43:48 +000013484#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013485/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013486 * xmlSAXParseMemoryWithData:
13487 * @sax: the SAX handler block
13488 * @buffer: an pointer to a char array
13489 * @size: the size of the array
13490 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13491 * documents
13492 * @data: the userdata
13493 *
13494 * parse an XML in-memory block and use the given SAX function block
13495 * to handle the parsing callback. If sax is NULL, fallback to the default
13496 * DOM tree building routines.
13497 *
13498 * User data (void *) is stored within the parser context in the
13499 * context's _private member, so it is available nearly everywhere in libxml
13500 *
13501 * Returns the resulting document tree
13502 */
13503
13504xmlDocPtr
13505xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13506 int size, int recovery, void *data) {
13507 xmlDocPtr ret;
13508 xmlParserCtxtPtr ctxt;
13509
13510 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13511 if (ctxt == NULL) return(NULL);
13512 if (sax != NULL) {
13513 if (ctxt->sax != NULL)
13514 xmlFree(ctxt->sax);
13515 ctxt->sax = sax;
13516 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013517 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013518 if (data!=NULL) {
13519 ctxt->_private=data;
13520 }
13521
Daniel Veillardadba5f12003-04-04 16:09:01 +000013522 ctxt->recovery = recovery;
13523
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013524 xmlParseDocument(ctxt);
13525
13526 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13527 else {
13528 ret = NULL;
13529 xmlFreeDoc(ctxt->myDoc);
13530 ctxt->myDoc = NULL;
13531 }
13532 if (sax != NULL)
13533 ctxt->sax = NULL;
13534 xmlFreeParserCtxt(ctxt);
13535
13536 return(ret);
13537}
13538
13539/**
Owen Taylor3473f882001-02-23 17:55:21 +000013540 * xmlSAXParseMemory:
13541 * @sax: the SAX handler block
13542 * @buffer: an pointer to a char array
13543 * @size: the size of the array
13544 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13545 * documents
13546 *
13547 * parse an XML in-memory block and use the given SAX function block
13548 * to handle the parsing callback. If sax is NULL, fallback to the default
13549 * DOM tree building routines.
13550 *
13551 * Returns the resulting document tree
13552 */
13553xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013554xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13555 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013556 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013557}
13558
13559/**
13560 * xmlParseMemory:
13561 * @buffer: an pointer to a char array
13562 * @size: the size of the array
13563 *
13564 * parse an XML in-memory block and build a tree.
13565 *
13566 * Returns the resulting document tree
13567 */
13568
Daniel Veillard50822cb2001-07-26 20:05:51 +000013569xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013570 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13571}
13572
13573/**
13574 * xmlRecoverMemory:
13575 * @buffer: an pointer to a char array
13576 * @size: the size of the array
13577 *
13578 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013579 * In the case the document is not Well Formed, an attempt to
13580 * build a tree is tried anyway
13581 *
13582 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013583 */
13584
Daniel Veillard50822cb2001-07-26 20:05:51 +000013585xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013586 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13587}
13588
13589/**
13590 * xmlSAXUserParseMemory:
13591 * @sax: a SAX handler
13592 * @user_data: The user data returned on SAX callbacks
13593 * @buffer: an in-memory XML document input
13594 * @size: the length of the XML document in bytes
13595 *
13596 * A better SAX parsing routine.
13597 * parse an XML in-memory buffer and call the given SAX handler routines.
13598 *
13599 * Returns 0 in case of success or a error number otherwise
13600 */
13601int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013602 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013603 int ret = 0;
13604 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013605
13606 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13607 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013608 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13609 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013610 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013611 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013612
Daniel Veillard30211a02001-04-26 09:33:18 +000013613 if (user_data != NULL)
13614 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013615
13616 xmlParseDocument(ctxt);
13617
13618 if (ctxt->wellFormed)
13619 ret = 0;
13620 else {
13621 if (ctxt->errNo != 0)
13622 ret = ctxt->errNo;
13623 else
13624 ret = -1;
13625 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013626 if (sax != NULL)
13627 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013628 if (ctxt->myDoc != NULL) {
13629 xmlFreeDoc(ctxt->myDoc);
13630 ctxt->myDoc = NULL;
13631 }
Owen Taylor3473f882001-02-23 17:55:21 +000013632 xmlFreeParserCtxt(ctxt);
13633
13634 return ret;
13635}
Daniel Veillard81273902003-09-30 00:43:48 +000013636#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013637
13638/**
13639 * xmlCreateDocParserCtxt:
13640 * @cur: a pointer to an array of xmlChar
13641 *
13642 * Creates a parser context for an XML in-memory document.
13643 *
13644 * Returns the new parser context or NULL
13645 */
13646xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013647xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013648 int len;
13649
13650 if (cur == NULL)
13651 return(NULL);
13652 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013653 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013654}
13655
Daniel Veillard81273902003-09-30 00:43:48 +000013656#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013657/**
13658 * xmlSAXParseDoc:
13659 * @sax: the SAX handler block
13660 * @cur: a pointer to an array of xmlChar
13661 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13662 * documents
13663 *
13664 * parse an XML in-memory document and build a tree.
13665 * It use the given SAX function block to handle the parsing callback.
13666 * If sax is NULL, fallback to the default DOM tree building routines.
13667 *
13668 * Returns the resulting document tree
13669 */
13670
13671xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013672xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013673 xmlDocPtr ret;
13674 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013675 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013676
Daniel Veillard38936062004-11-04 17:45:11 +000013677 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013678
13679
13680 ctxt = xmlCreateDocParserCtxt(cur);
13681 if (ctxt == NULL) return(NULL);
13682 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013683 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013684 ctxt->sax = sax;
13685 ctxt->userData = NULL;
13686 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013687 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013688
13689 xmlParseDocument(ctxt);
13690 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13691 else {
13692 ret = NULL;
13693 xmlFreeDoc(ctxt->myDoc);
13694 ctxt->myDoc = NULL;
13695 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013696 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013697 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013698 xmlFreeParserCtxt(ctxt);
13699
13700 return(ret);
13701}
13702
13703/**
13704 * xmlParseDoc:
13705 * @cur: a pointer to an array of xmlChar
13706 *
13707 * parse an XML in-memory document and build a tree.
13708 *
13709 * Returns the resulting document tree
13710 */
13711
13712xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013713xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013714 return(xmlSAXParseDoc(NULL, cur, 0));
13715}
Daniel Veillard81273902003-09-30 00:43:48 +000013716#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013717
Daniel Veillard81273902003-09-30 00:43:48 +000013718#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013719/************************************************************************
13720 * *
13721 * Specific function to keep track of entities references *
13722 * and used by the XSLT debugger *
13723 * *
13724 ************************************************************************/
13725
13726static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13727
13728/**
13729 * xmlAddEntityReference:
13730 * @ent : A valid entity
13731 * @firstNode : A valid first node for children of entity
13732 * @lastNode : A valid last node of children entity
13733 *
13734 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13735 */
13736static void
13737xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13738 xmlNodePtr lastNode)
13739{
13740 if (xmlEntityRefFunc != NULL) {
13741 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13742 }
13743}
13744
13745
13746/**
13747 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013748 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013749 *
13750 * Set the function to call call back when a xml reference has been made
13751 */
13752void
13753xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13754{
13755 xmlEntityRefFunc = func;
13756}
Daniel Veillard81273902003-09-30 00:43:48 +000013757#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013758
13759/************************************************************************
13760 * *
13761 * Miscellaneous *
13762 * *
13763 ************************************************************************/
13764
13765#ifdef LIBXML_XPATH_ENABLED
13766#include <libxml/xpath.h>
13767#endif
13768
Daniel Veillardffa3c742005-07-21 13:24:09 +000013769extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013770static int xmlParserInitialized = 0;
13771
13772/**
13773 * xmlInitParser:
13774 *
13775 * Initialization function for the XML parser.
13776 * This is not reentrant. Call once before processing in case of
13777 * use in multithreaded programs.
13778 */
13779
13780void
13781xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013782 if (xmlParserInitialized != 0)
13783 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013784
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013785#ifdef LIBXML_THREAD_ENABLED
13786 __xmlGlobalInitMutexLock();
13787 if (xmlParserInitialized == 0) {
13788#endif
13789 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13790 (xmlGenericError == NULL))
13791 initGenericErrorDefaultFunc(NULL);
13792 xmlInitGlobals();
13793 xmlInitThreads();
13794 xmlInitMemory();
13795 xmlInitCharEncodingHandlers();
13796 xmlDefaultSAXHandlerInit();
13797 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013798#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013799 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013800#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013801#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013802 htmlInitAutoClose();
13803 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013804#endif
13805#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013806 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013807#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013808 xmlParserInitialized = 1;
13809#ifdef LIBXML_THREAD_ENABLED
13810 }
13811 __xmlGlobalInitMutexUnlock();
13812#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013813}
13814
13815/**
13816 * xmlCleanupParser:
13817 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013818 * This function name is somewhat misleading. It does not clean up
13819 * parser state, it cleans up memory allocated by the library itself.
13820 * It is a cleanup function for the XML library. It tries to reclaim all
13821 * related global memory allocated for the library processing.
13822 * It doesn't deallocate any document related memory. One should
13823 * call xmlCleanupParser() only when the process has finished using
13824 * the library and all XML/HTML documents built with it.
13825 * See also xmlInitParser() which has the opposite function of preparing
13826 * the library for operations.
Owen Taylor3473f882001-02-23 17:55:21 +000013827 */
13828
13829void
13830xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013831 if (!xmlParserInitialized)
13832 return;
13833
Owen Taylor3473f882001-02-23 17:55:21 +000013834 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013835#ifdef LIBXML_CATALOG_ENABLED
13836 xmlCatalogCleanup();
13837#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013838 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013839 xmlCleanupInputCallbacks();
13840#ifdef LIBXML_OUTPUT_ENABLED
13841 xmlCleanupOutputCallbacks();
13842#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013843#ifdef LIBXML_SCHEMAS_ENABLED
13844 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013845 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013846#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013847 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013848 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013849 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013850 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013851 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013852}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013853
13854/************************************************************************
13855 * *
13856 * New set (2.6.0) of simpler and more flexible APIs *
13857 * *
13858 ************************************************************************/
13859
13860/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013861 * DICT_FREE:
13862 * @str: a string
13863 *
13864 * Free a string if it is not owned by the "dict" dictionnary in the
13865 * current scope
13866 */
13867#define DICT_FREE(str) \
13868 if ((str) && ((!dict) || \
13869 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13870 xmlFree((char *)(str));
13871
13872/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013873 * xmlCtxtReset:
13874 * @ctxt: an XML parser context
13875 *
13876 * Reset a parser context
13877 */
13878void
13879xmlCtxtReset(xmlParserCtxtPtr ctxt)
13880{
13881 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013882 xmlDictPtr dict;
13883
13884 if (ctxt == NULL)
13885 return;
13886
13887 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013888
13889 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13890 xmlFreeInputStream(input);
13891 }
13892 ctxt->inputNr = 0;
13893 ctxt->input = NULL;
13894
13895 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013896 if (ctxt->spaceTab != NULL) {
13897 ctxt->spaceTab[0] = -1;
13898 ctxt->space = &ctxt->spaceTab[0];
13899 } else {
13900 ctxt->space = NULL;
13901 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013902
13903
13904 ctxt->nodeNr = 0;
13905 ctxt->node = NULL;
13906
13907 ctxt->nameNr = 0;
13908 ctxt->name = NULL;
13909
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013910 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013911 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013912 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013913 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013914 DICT_FREE(ctxt->directory);
13915 ctxt->directory = NULL;
13916 DICT_FREE(ctxt->extSubURI);
13917 ctxt->extSubURI = NULL;
13918 DICT_FREE(ctxt->extSubSystem);
13919 ctxt->extSubSystem = NULL;
13920 if (ctxt->myDoc != NULL)
13921 xmlFreeDoc(ctxt->myDoc);
13922 ctxt->myDoc = NULL;
13923
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013924 ctxt->standalone = -1;
13925 ctxt->hasExternalSubset = 0;
13926 ctxt->hasPErefs = 0;
13927 ctxt->html = 0;
13928 ctxt->external = 0;
13929 ctxt->instate = XML_PARSER_START;
13930 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013931
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013932 ctxt->wellFormed = 1;
13933 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013934 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013935 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013936#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013937 ctxt->vctxt.userData = ctxt;
13938 ctxt->vctxt.error = xmlParserValidityError;
13939 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013940#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013941 ctxt->record_info = 0;
13942 ctxt->nbChars = 0;
13943 ctxt->checkIndex = 0;
13944 ctxt->inSubset = 0;
13945 ctxt->errNo = XML_ERR_OK;
13946 ctxt->depth = 0;
13947 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13948 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000013949 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000013950 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013951 xmlInitNodeInfoSeq(&ctxt->node_seq);
13952
13953 if (ctxt->attsDefault != NULL) {
13954 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13955 ctxt->attsDefault = NULL;
13956 }
13957 if (ctxt->attsSpecial != NULL) {
13958 xmlHashFree(ctxt->attsSpecial, NULL);
13959 ctxt->attsSpecial = NULL;
13960 }
13961
Daniel Veillard4432df22003-09-28 18:58:27 +000013962#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013963 if (ctxt->catalogs != NULL)
13964 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013965#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013966 if (ctxt->lastError.code != XML_ERR_OK)
13967 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013968}
13969
13970/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013971 * xmlCtxtResetPush:
13972 * @ctxt: an XML parser context
13973 * @chunk: a pointer to an array of chars
13974 * @size: number of chars in the array
13975 * @filename: an optional file name or URI
13976 * @encoding: the document encoding, or NULL
13977 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013978 * Reset a push parser context
13979 *
13980 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013981 */
13982int
13983xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13984 int size, const char *filename, const char *encoding)
13985{
13986 xmlParserInputPtr inputStream;
13987 xmlParserInputBufferPtr buf;
13988 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13989
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013990 if (ctxt == NULL)
13991 return(1);
13992
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013993 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13994 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13995
13996 buf = xmlAllocParserInputBuffer(enc);
13997 if (buf == NULL)
13998 return(1);
13999
14000 if (ctxt == NULL) {
14001 xmlFreeParserInputBuffer(buf);
14002 return(1);
14003 }
14004
14005 xmlCtxtReset(ctxt);
14006
14007 if (ctxt->pushTab == NULL) {
14008 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14009 sizeof(xmlChar *));
14010 if (ctxt->pushTab == NULL) {
14011 xmlErrMemory(ctxt, NULL);
14012 xmlFreeParserInputBuffer(buf);
14013 return(1);
14014 }
14015 }
14016
14017 if (filename == NULL) {
14018 ctxt->directory = NULL;
14019 } else {
14020 ctxt->directory = xmlParserGetDirectory(filename);
14021 }
14022
14023 inputStream = xmlNewInputStream(ctxt);
14024 if (inputStream == NULL) {
14025 xmlFreeParserInputBuffer(buf);
14026 return(1);
14027 }
14028
14029 if (filename == NULL)
14030 inputStream->filename = NULL;
14031 else
14032 inputStream->filename = (char *)
14033 xmlCanonicPath((const xmlChar *) filename);
14034 inputStream->buf = buf;
14035 inputStream->base = inputStream->buf->buffer->content;
14036 inputStream->cur = inputStream->buf->buffer->content;
14037 inputStream->end =
14038 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14039
14040 inputPush(ctxt, inputStream);
14041
14042 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14043 (ctxt->input->buf != NULL)) {
14044 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14045 int cur = ctxt->input->cur - ctxt->input->base;
14046
14047 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14048
14049 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14050 ctxt->input->cur = ctxt->input->base + cur;
14051 ctxt->input->end =
14052 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14053 use];
14054#ifdef DEBUG_PUSH
14055 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14056#endif
14057 }
14058
14059 if (encoding != NULL) {
14060 xmlCharEncodingHandlerPtr hdlr;
14061
Daniel Veillard37334572008-07-31 08:20:02 +000014062 if (ctxt->encoding != NULL)
14063 xmlFree((xmlChar *) ctxt->encoding);
14064 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14065
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014066 hdlr = xmlFindCharEncodingHandler(encoding);
14067 if (hdlr != NULL) {
14068 xmlSwitchToEncoding(ctxt, hdlr);
14069 } else {
14070 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14071 "Unsupported encoding %s\n", BAD_CAST encoding);
14072 }
14073 } else if (enc != XML_CHAR_ENCODING_NONE) {
14074 xmlSwitchEncoding(ctxt, enc);
14075 }
14076
14077 return(0);
14078}
14079
Daniel Veillard37334572008-07-31 08:20:02 +000014080
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014081/**
Daniel Veillard37334572008-07-31 08:20:02 +000014082 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014083 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014084 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014085 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014086 *
14087 * Applies the options to the parser context
14088 *
14089 * Returns 0 in case of success, the set of unknown or unimplemented options
14090 * in case of error.
14091 */
Daniel Veillard37334572008-07-31 08:20:02 +000014092static int
14093xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014094{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014095 if (ctxt == NULL)
14096 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014097 if (encoding != NULL) {
14098 if (ctxt->encoding != NULL)
14099 xmlFree((xmlChar *) ctxt->encoding);
14100 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14101 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014102 if (options & XML_PARSE_RECOVER) {
14103 ctxt->recovery = 1;
14104 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014105 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014106 } else
14107 ctxt->recovery = 0;
14108 if (options & XML_PARSE_DTDLOAD) {
14109 ctxt->loadsubset = XML_DETECT_IDS;
14110 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014111 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014112 } else
14113 ctxt->loadsubset = 0;
14114 if (options & XML_PARSE_DTDATTR) {
14115 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14116 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014117 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014118 }
14119 if (options & XML_PARSE_NOENT) {
14120 ctxt->replaceEntities = 1;
14121 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14122 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014123 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014124 } else
14125 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014126 if (options & XML_PARSE_PEDANTIC) {
14127 ctxt->pedantic = 1;
14128 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014129 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014130 } else
14131 ctxt->pedantic = 0;
14132 if (options & XML_PARSE_NOBLANKS) {
14133 ctxt->keepBlanks = 0;
14134 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14135 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014136 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014137 } else
14138 ctxt->keepBlanks = 1;
14139 if (options & XML_PARSE_DTDVALID) {
14140 ctxt->validate = 1;
14141 if (options & XML_PARSE_NOWARNING)
14142 ctxt->vctxt.warning = NULL;
14143 if (options & XML_PARSE_NOERROR)
14144 ctxt->vctxt.error = NULL;
14145 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014146 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014147 } else
14148 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014149 if (options & XML_PARSE_NOWARNING) {
14150 ctxt->sax->warning = NULL;
14151 options -= XML_PARSE_NOWARNING;
14152 }
14153 if (options & XML_PARSE_NOERROR) {
14154 ctxt->sax->error = NULL;
14155 ctxt->sax->fatalError = NULL;
14156 options -= XML_PARSE_NOERROR;
14157 }
Daniel Veillard81273902003-09-30 00:43:48 +000014158#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014159 if (options & XML_PARSE_SAX1) {
14160 ctxt->sax->startElement = xmlSAX2StartElement;
14161 ctxt->sax->endElement = xmlSAX2EndElement;
14162 ctxt->sax->startElementNs = NULL;
14163 ctxt->sax->endElementNs = NULL;
14164 ctxt->sax->initialized = 1;
14165 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014166 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014167 }
Daniel Veillard81273902003-09-30 00:43:48 +000014168#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014169 if (options & XML_PARSE_NODICT) {
14170 ctxt->dictNames = 0;
14171 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014172 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014173 } else {
14174 ctxt->dictNames = 1;
14175 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014176 if (options & XML_PARSE_NOCDATA) {
14177 ctxt->sax->cdataBlock = NULL;
14178 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014179 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014180 }
14181 if (options & XML_PARSE_NSCLEAN) {
14182 ctxt->options |= XML_PARSE_NSCLEAN;
14183 options -= XML_PARSE_NSCLEAN;
14184 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014185 if (options & XML_PARSE_NONET) {
14186 ctxt->options |= XML_PARSE_NONET;
14187 options -= XML_PARSE_NONET;
14188 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014189 if (options & XML_PARSE_COMPACT) {
14190 ctxt->options |= XML_PARSE_COMPACT;
14191 options -= XML_PARSE_COMPACT;
14192 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014193 if (options & XML_PARSE_OLD10) {
14194 ctxt->options |= XML_PARSE_OLD10;
14195 options -= XML_PARSE_OLD10;
14196 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014197 if (options & XML_PARSE_NOBASEFIX) {
14198 ctxt->options |= XML_PARSE_NOBASEFIX;
14199 options -= XML_PARSE_NOBASEFIX;
14200 }
14201 if (options & XML_PARSE_HUGE) {
14202 ctxt->options |= XML_PARSE_HUGE;
14203 options -= XML_PARSE_HUGE;
14204 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014205 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014206 return (options);
14207}
14208
14209/**
Daniel Veillard37334572008-07-31 08:20:02 +000014210 * xmlCtxtUseOptions:
14211 * @ctxt: an XML parser context
14212 * @options: a combination of xmlParserOption
14213 *
14214 * Applies the options to the parser context
14215 *
14216 * Returns 0 in case of success, the set of unknown or unimplemented options
14217 * in case of error.
14218 */
14219int
14220xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14221{
14222 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14223}
14224
14225/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014226 * xmlDoRead:
14227 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014228 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014229 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014230 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014231 * @reuse: keep the context for reuse
14232 *
14233 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014234 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014235 * Returns the resulting document tree or NULL
14236 */
14237static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014238xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14239 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014240{
14241 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014242
14243 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014244 if (encoding != NULL) {
14245 xmlCharEncodingHandlerPtr hdlr;
14246
14247 hdlr = xmlFindCharEncodingHandler(encoding);
14248 if (hdlr != NULL)
14249 xmlSwitchToEncoding(ctxt, hdlr);
14250 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014251 if ((URL != NULL) && (ctxt->input != NULL) &&
14252 (ctxt->input->filename == NULL))
14253 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014254 xmlParseDocument(ctxt);
14255 if ((ctxt->wellFormed) || ctxt->recovery)
14256 ret = ctxt->myDoc;
14257 else {
14258 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014259 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014260 xmlFreeDoc(ctxt->myDoc);
14261 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014262 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014263 ctxt->myDoc = NULL;
14264 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014265 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014266 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014267
14268 return (ret);
14269}
14270
14271/**
14272 * xmlReadDoc:
14273 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014274 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014275 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014276 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014277 *
14278 * parse an XML in-memory document and build a tree.
14279 *
14280 * Returns the resulting document tree
14281 */
14282xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014283xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014284{
14285 xmlParserCtxtPtr ctxt;
14286
14287 if (cur == NULL)
14288 return (NULL);
14289
14290 ctxt = xmlCreateDocParserCtxt(cur);
14291 if (ctxt == NULL)
14292 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014293 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014294}
14295
14296/**
14297 * xmlReadFile:
14298 * @filename: a file or URL
14299 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014300 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014301 *
14302 * parse an XML file from the filesystem or the network.
14303 *
14304 * Returns the resulting document tree
14305 */
14306xmlDocPtr
14307xmlReadFile(const char *filename, const char *encoding, int options)
14308{
14309 xmlParserCtxtPtr ctxt;
14310
Daniel Veillard61b93382003-11-03 14:28:31 +000014311 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014312 if (ctxt == NULL)
14313 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014314 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014315}
14316
14317/**
14318 * xmlReadMemory:
14319 * @buffer: a pointer to a char array
14320 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014321 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014322 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014323 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014324 *
14325 * parse an XML in-memory document and build a tree.
14326 *
14327 * Returns the resulting document tree
14328 */
14329xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014330xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014331{
14332 xmlParserCtxtPtr ctxt;
14333
14334 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14335 if (ctxt == NULL)
14336 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014337 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014338}
14339
14340/**
14341 * xmlReadFd:
14342 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014343 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014344 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014345 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014346 *
14347 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014348 * NOTE that the file descriptor will not be closed when the
14349 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014350 *
14351 * Returns the resulting document tree
14352 */
14353xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014354xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014355{
14356 xmlParserCtxtPtr ctxt;
14357 xmlParserInputBufferPtr input;
14358 xmlParserInputPtr stream;
14359
14360 if (fd < 0)
14361 return (NULL);
14362
14363 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14364 if (input == NULL)
14365 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014366 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014367 ctxt = xmlNewParserCtxt();
14368 if (ctxt == NULL) {
14369 xmlFreeParserInputBuffer(input);
14370 return (NULL);
14371 }
14372 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14373 if (stream == NULL) {
14374 xmlFreeParserInputBuffer(input);
14375 xmlFreeParserCtxt(ctxt);
14376 return (NULL);
14377 }
14378 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014379 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014380}
14381
14382/**
14383 * xmlReadIO:
14384 * @ioread: an I/O read function
14385 * @ioclose: an I/O close function
14386 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014387 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014388 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014389 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014390 *
14391 * parse an XML document from I/O functions and source and build a tree.
14392 *
14393 * Returns the resulting document tree
14394 */
14395xmlDocPtr
14396xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014397 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014398{
14399 xmlParserCtxtPtr ctxt;
14400 xmlParserInputBufferPtr input;
14401 xmlParserInputPtr stream;
14402
14403 if (ioread == NULL)
14404 return (NULL);
14405
14406 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14407 XML_CHAR_ENCODING_NONE);
14408 if (input == NULL)
14409 return (NULL);
14410 ctxt = xmlNewParserCtxt();
14411 if (ctxt == NULL) {
14412 xmlFreeParserInputBuffer(input);
14413 return (NULL);
14414 }
14415 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14416 if (stream == NULL) {
14417 xmlFreeParserInputBuffer(input);
14418 xmlFreeParserCtxt(ctxt);
14419 return (NULL);
14420 }
14421 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014422 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014423}
14424
14425/**
14426 * xmlCtxtReadDoc:
14427 * @ctxt: an XML parser context
14428 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014429 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014430 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014431 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014432 *
14433 * parse an XML in-memory document and build a tree.
14434 * This reuses the existing @ctxt parser context
14435 *
14436 * Returns the resulting document tree
14437 */
14438xmlDocPtr
14439xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014440 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014441{
14442 xmlParserInputPtr stream;
14443
14444 if (cur == NULL)
14445 return (NULL);
14446 if (ctxt == NULL)
14447 return (NULL);
14448
14449 xmlCtxtReset(ctxt);
14450
14451 stream = xmlNewStringInputStream(ctxt, cur);
14452 if (stream == NULL) {
14453 return (NULL);
14454 }
14455 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014456 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014457}
14458
14459/**
14460 * xmlCtxtReadFile:
14461 * @ctxt: an XML parser context
14462 * @filename: a file or URL
14463 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014464 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014465 *
14466 * parse an XML file from the filesystem or the network.
14467 * This reuses the existing @ctxt parser context
14468 *
14469 * Returns the resulting document tree
14470 */
14471xmlDocPtr
14472xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14473 const char *encoding, int options)
14474{
14475 xmlParserInputPtr stream;
14476
14477 if (filename == NULL)
14478 return (NULL);
14479 if (ctxt == NULL)
14480 return (NULL);
14481
14482 xmlCtxtReset(ctxt);
14483
Daniel Veillard29614c72004-11-26 10:47:26 +000014484 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014485 if (stream == NULL) {
14486 return (NULL);
14487 }
14488 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014489 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014490}
14491
14492/**
14493 * xmlCtxtReadMemory:
14494 * @ctxt: an XML parser context
14495 * @buffer: a pointer to a char array
14496 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014497 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014498 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014499 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014500 *
14501 * parse an XML in-memory document and build a tree.
14502 * This reuses the existing @ctxt parser context
14503 *
14504 * Returns the resulting document tree
14505 */
14506xmlDocPtr
14507xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014508 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014509{
14510 xmlParserInputBufferPtr input;
14511 xmlParserInputPtr stream;
14512
14513 if (ctxt == NULL)
14514 return (NULL);
14515 if (buffer == NULL)
14516 return (NULL);
14517
14518 xmlCtxtReset(ctxt);
14519
14520 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14521 if (input == NULL) {
14522 return(NULL);
14523 }
14524
14525 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14526 if (stream == NULL) {
14527 xmlFreeParserInputBuffer(input);
14528 return(NULL);
14529 }
14530
14531 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014532 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014533}
14534
14535/**
14536 * xmlCtxtReadFd:
14537 * @ctxt: an XML parser context
14538 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014539 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014540 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014541 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014542 *
14543 * parse an XML from a file descriptor and build a tree.
14544 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014545 * NOTE that the file descriptor will not be closed when the
14546 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014547 *
14548 * Returns the resulting document tree
14549 */
14550xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014551xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14552 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014553{
14554 xmlParserInputBufferPtr input;
14555 xmlParserInputPtr stream;
14556
14557 if (fd < 0)
14558 return (NULL);
14559 if (ctxt == NULL)
14560 return (NULL);
14561
14562 xmlCtxtReset(ctxt);
14563
14564
14565 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14566 if (input == NULL)
14567 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014568 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014569 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14570 if (stream == NULL) {
14571 xmlFreeParserInputBuffer(input);
14572 return (NULL);
14573 }
14574 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014575 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014576}
14577
14578/**
14579 * xmlCtxtReadIO:
14580 * @ctxt: an XML parser context
14581 * @ioread: an I/O read function
14582 * @ioclose: an I/O close function
14583 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014584 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014585 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014586 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014587 *
14588 * parse an XML document from I/O functions and source and build a tree.
14589 * This reuses the existing @ctxt parser context
14590 *
14591 * Returns the resulting document tree
14592 */
14593xmlDocPtr
14594xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14595 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014596 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014597 const char *encoding, int options)
14598{
14599 xmlParserInputBufferPtr input;
14600 xmlParserInputPtr stream;
14601
14602 if (ioread == NULL)
14603 return (NULL);
14604 if (ctxt == NULL)
14605 return (NULL);
14606
14607 xmlCtxtReset(ctxt);
14608
14609 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14610 XML_CHAR_ENCODING_NONE);
14611 if (input == NULL)
14612 return (NULL);
14613 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14614 if (stream == NULL) {
14615 xmlFreeParserInputBuffer(input);
14616 return (NULL);
14617 }
14618 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014619 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014620}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014621
14622#define bottom_parser
14623#include "elfgcchack.h"