blob: 9876a469480d60f5e1e35e835fb36533fda901b2 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
86/************************************************************************
87 * *
88 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
89 * *
90 ************************************************************************/
91
92#define XML_PARSER_BIG_ENTITY 1000
93#define XML_PARSER_LOT_ENTITY 5000
94
95/*
96 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
97 * replacement over the size in byte of the input indicates that you have
98 * and eponential behaviour. A value of 10 correspond to at least 3 entity
99 * replacement per byte of input.
100 */
101#define XML_PARSER_NON_LINEAR 10
102
103/*
104 * xmlParserEntityCheck
105 *
106 * Function to check non-linear entity expansion behaviour
107 * This is here to detect and stop exponential linear entity expansion
108 * This is not a limitation of the parser but a safety
109 * boundary feature. It can be disabled with the XML_PARSE_HUGE
110 * parser option.
111 */
112static int
113xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
114 xmlEntityPtr ent)
115{
Daniel Veillardcba68392008-08-29 12:43:40 +0000116 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000117
118 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
119 return (0);
120 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
121 return (1);
122 if (size != 0) {
123 /*
124 * Do the check based on the replacement size of the entity
125 */
126 if (size < XML_PARSER_BIG_ENTITY)
127 return(0);
128
129 /*
130 * A limit on the amount of text data reasonably used
131 */
132 if (ctxt->input != NULL) {
133 consumed = ctxt->input->consumed +
134 (ctxt->input->cur - ctxt->input->base);
135 }
136 consumed += ctxt->sizeentities;
137
138 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
139 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
140 return (0);
141 } else if (ent != NULL) {
142 /*
143 * use the number of parsed entities in the replacement
144 */
145 size = ent->checked;
146
147 /*
148 * The amount of data parsed counting entities size only once
149 */
150 if (ctxt->input != NULL) {
151 consumed = ctxt->input->consumed +
152 (ctxt->input->cur - ctxt->input->base);
153 }
154 consumed += ctxt->sizeentities;
155
156 /*
157 * Check the density of entities for the amount of data
158 * knowing an entity reference will take at least 3 bytes
159 */
160 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
161 return (0);
162 } else {
163 /*
164 * strange we got no data for checking just return
165 */
166 return (0);
167 }
168
169 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
170 return (1);
171}
172
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000173/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000174 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000175 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000176 * arbitrary depth limit for the XML documents that we allow to
177 * process. This is not a limitation of the parser but a safety
178 * boundary feature. It can be disabled with the XML_PARSE_HUGE
179 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000180 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000181unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000182
Daniel Veillard0fb18932003-09-07 09:14:37 +0000183
Daniel Veillard0161e632008-08-28 15:36:32 +0000184
185#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000186#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000187#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000188#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
189
Owen Taylor3473f882001-02-23 17:55:21 +0000190/*
Owen Taylor3473f882001-02-23 17:55:21 +0000191 * List of XML prefixed PI allowed by W3C specs
192 */
193
Daniel Veillardb44025c2001-10-11 22:55:55 +0000194static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000195 "xml-stylesheet",
196 NULL
197};
198
Daniel Veillarda07050d2003-10-19 14:46:32 +0000199
Owen Taylor3473f882001-02-23 17:55:21 +0000200/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000201xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
202 const xmlChar **str);
203
Daniel Veillard7d515752003-09-26 19:12:37 +0000204static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000205xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
206 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000207 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000208 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard37334572008-07-31 08:20:02 +0000210static int
211xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
212 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000213#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000214static void
215xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
216 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000218
Daniel Veillard7d515752003-09-26 19:12:37 +0000219static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000220xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
221 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000222
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000223static int
224xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
225
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226/************************************************************************
227 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000228 * Some factorized error routines *
229 * *
230 ************************************************************************/
231
232/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000233 * xmlErrAttributeDup:
234 * @ctxt: an XML parser context
235 * @prefix: the attribute prefix
236 * @localname: the attribute localname
237 *
238 * Handle a redefinition of attribute error
239 */
240static void
241xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
242 const xmlChar * localname)
243{
Daniel Veillard157fee02003-10-31 10:36:03 +0000244 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
245 (ctxt->instate == XML_PARSER_EOF))
246 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000247 if (ctxt != NULL)
248 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000249 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000250 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
252 (const char *) localname, NULL, NULL, 0, 0,
253 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000254 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000256 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
257 (const char *) prefix, (const char *) localname,
258 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
259 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000260 if (ctxt != NULL) {
261 ctxt->wellFormed = 0;
262 if (ctxt->recovery == 0)
263 ctxt->disableSAX = 1;
264 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265}
266
267/**
268 * xmlFatalErr:
269 * @ctxt: an XML parser context
270 * @error: the error number
271 * @extra: extra information string
272 *
273 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
274 */
275static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000276xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277{
278 const char *errmsg;
279
Daniel Veillard157fee02003-10-31 10:36:03 +0000280 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
281 (ctxt->instate == XML_PARSER_EOF))
282 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 switch (error) {
284 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000285 errmsg = "CharRef: invalid hexadecimal value\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "CharRef: invalid decimal value\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "CharRef: invalid value\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "internal error";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "PEReference at end of document\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "PEReference in prolog\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "PEReference in epilog\n";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "PEReference: no name\n";
307 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000309 errmsg = "PEReference: expecting ';'\n";
310 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000311 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000312 errmsg = "Detected an entity reference loop\n";
313 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000314 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000315 errmsg = "EntityValue: \" or ' expected\n";
316 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000318 errmsg = "PEReferences forbidden in internal subset\n";
319 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000321 errmsg = "EntityValue: \" or ' expected\n";
322 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000323 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000324 errmsg = "AttValue: \" or ' expected\n";
325 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000326 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 errmsg = "Unescaped '<' not allowed in attributes values\n";
328 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000329 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 errmsg = "SystemLiteral \" or ' expected\n";
331 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000332 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 errmsg = "Unfinished System or Public ID \" or ' expected\n";
334 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000335 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000336 errmsg = "Sequence ']]>' not allowed in content\n";
337 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000338 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
340 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000341 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 errmsg = "PUBLIC, the Public Identifier is missing\n";
343 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000344 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 errmsg = "Comment must not contain '--' (double-hyphen)\n";
346 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000347 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 errmsg = "xmlParsePI : no target name\n";
349 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000350 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 errmsg = "Invalid PI name\n";
352 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000353 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 errmsg = "NOTATION: Name expected here\n";
355 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 errmsg = "'>' required to close NOTATION declaration\n";
358 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000359 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 errmsg = "Entity value required\n";
361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 errmsg = "Fragment not allowed";
364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 errmsg = "'(' required to start ATTLIST enumeration\n";
367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 errmsg = "NmToken expected in ATTLIST enumeration\n";
370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 errmsg = "')' required to finish ATTLIST enumeration\n";
373 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000374 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
379 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 errmsg = "ContentDecl : Name or '(' expected\n";
382 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
385 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000386 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 errmsg =
388 "PEReference: forbidden within markup decl in internal subset\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg = "expected '>'\n";
392 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000393 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 errmsg = "XML conditional section '[' expected\n";
395 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 errmsg = "Content error in the external subset\n";
398 break;
399 case XML_ERR_CONDSEC_INVALID_KEYWORD:
400 errmsg =
401 "conditional section INCLUDE or IGNORE keyword expected\n";
402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "XML conditional section not closed\n";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 errmsg = "Text declaration '<?xml' required\n";
408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000410 errmsg = "parsing XML declaration: '?>' expected\n";
411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000413 errmsg = "external parsed entities cannot be standalone\n";
414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000416 errmsg = "EntityRef: expecting ';'\n";
417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000419 errmsg = "DOCTYPE improperly terminated\n";
420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000422 errmsg = "EndTag: '</' not found\n";
423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000425 errmsg = "expected '='\n";
426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg = "String not closed expecting \" or '\n";
429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000431 errmsg = "String not started expecting ' or \"\n";
432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000434 errmsg = "Invalid XML encoding name\n";
435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000437 errmsg = "standalone accepts only 'yes' or 'no'\n";
438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000440 errmsg = "Document is empty\n";
441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 errmsg = "Extra content at the end of the document\n";
444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 errmsg = "chunk is not well balanced\n";
447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 errmsg = "extra content at the end of well balanced chunk\n";
450 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000451 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 errmsg = "Malformed declaration expecting version\n";
453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 case:
456 errmsg = "\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 default:
460 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000461 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000462 if (ctxt != NULL)
463 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000464 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
466 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000467 if (ctxt != NULL) {
468 ctxt->wellFormed = 0;
469 if (ctxt->recovery == 0)
470 ctxt->disableSAX = 1;
471 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000472}
473
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474/**
475 * xmlFatalErrMsg:
476 * @ctxt: an XML parser context
477 * @error: the error number
478 * @msg: the error message
479 *
480 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
481 */
482static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
484 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000485{
Daniel Veillard157fee02003-10-31 10:36:03 +0000486 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
487 (ctxt->instate == XML_PARSER_EOF))
488 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL)
490 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000491 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000493 if (ctxt != NULL) {
494 ctxt->wellFormed = 0;
495 if (ctxt->recovery == 0)
496 ctxt->disableSAX = 1;
497 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000498}
499
500/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000501 * xmlWarningMsg:
502 * @ctxt: an XML parser context
503 * @error: the error number
504 * @msg: the error message
505 * @str1: extra data
506 * @str2: extra data
507 *
508 * Handle a warning.
509 */
510static void
511xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
512 const char *msg, const xmlChar *str1, const xmlChar *str2)
513{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000514 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000515
Daniel Veillard157fee02003-10-31 10:36:03 +0000516 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
517 (ctxt->instate == XML_PARSER_EOF))
518 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000519 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
520 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000521 schannel = ctxt->sax->serror;
522 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 (ctxt->sax) ? ctxt->sax->warning : NULL,
524 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000525 ctxt, NULL, XML_FROM_PARSER, error,
526 XML_ERR_WARNING, NULL, 0,
527 (const char *) str1, (const char *) str2, NULL, 0, 0,
528 msg, (const char *) str1, (const char *) str2);
529}
530
531/**
532 * xmlValidityError:
533 * @ctxt: an XML parser context
534 * @error: the error number
535 * @msg: the error message
536 * @str1: extra data
537 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000538 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000539 */
540static void
541xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000542 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000543{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000544 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000545
546 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
547 (ctxt->instate == XML_PARSER_EOF))
548 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000549 if (ctxt != NULL) {
550 ctxt->errNo = error;
551 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
552 schannel = ctxt->sax->serror;
553 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000554 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000555 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000556 ctxt, NULL, XML_FROM_DTD, error,
557 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000558 (const char *) str2, NULL, 0, 0,
559 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000560 if (ctxt != NULL) {
561 ctxt->valid = 0;
562 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000563}
564
565/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000566 * xmlFatalErrMsgInt:
567 * @ctxt: an XML parser context
568 * @error: the error number
569 * @msg: the error message
570 * @val: an integer value
571 *
572 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
573 */
574static void
575xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000576 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000577{
Daniel Veillard157fee02003-10-31 10:36:03 +0000578 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
579 (ctxt->instate == XML_PARSER_EOF))
580 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000581 if (ctxt != NULL)
582 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000583 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000584 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
585 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000586 if (ctxt != NULL) {
587 ctxt->wellFormed = 0;
588 if (ctxt->recovery == 0)
589 ctxt->disableSAX = 1;
590 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000591}
592
593/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000594 * xmlFatalErrMsgStrIntStr:
595 * @ctxt: an XML parser context
596 * @error: the error number
597 * @msg: the error message
598 * @str1: an string info
599 * @val: an integer value
600 * @str2: an string info
601 *
602 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
603 */
604static void
605xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606 const char *msg, const xmlChar *str1, int val,
607 const xmlChar *str2)
608{
Daniel Veillard157fee02003-10-31 10:36:03 +0000609 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
610 (ctxt->instate == XML_PARSER_EOF))
611 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000612 if (ctxt != NULL)
613 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000614 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000615 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
616 NULL, 0, (const char *) str1, (const char *) str2,
617 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000618 if (ctxt != NULL) {
619 ctxt->wellFormed = 0;
620 if (ctxt->recovery == 0)
621 ctxt->disableSAX = 1;
622 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000623}
624
625/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000626 * xmlFatalErrMsgStr:
627 * @ctxt: an XML parser context
628 * @error: the error number
629 * @msg: the error message
630 * @val: a string value
631 *
632 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
633 */
634static void
635xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000636 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000637{
Daniel Veillard157fee02003-10-31 10:36:03 +0000638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000641 if (ctxt != NULL)
642 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000643 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000644 XML_FROM_PARSER, error, XML_ERR_FATAL,
645 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
646 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000647 if (ctxt != NULL) {
648 ctxt->wellFormed = 0;
649 if (ctxt->recovery == 0)
650 ctxt->disableSAX = 1;
651 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000652}
653
654/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000655 * xmlErrMsgStr:
656 * @ctxt: an XML parser context
657 * @error: the error number
658 * @msg: the error message
659 * @val: a string value
660 *
661 * Handle a non fatal parser error
662 */
663static void
664xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
665 const char *msg, const xmlChar * val)
666{
Daniel Veillard157fee02003-10-31 10:36:03 +0000667 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
668 (ctxt->instate == XML_PARSER_EOF))
669 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000670 if (ctxt != NULL)
671 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000672 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000673 XML_FROM_PARSER, error, XML_ERR_ERROR,
674 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
675 val);
676}
677
678/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000679 * xmlNsErr:
680 * @ctxt: an XML parser context
681 * @error: the error number
682 * @msg: the message
683 * @info1: extra information string
684 * @info2: extra information string
685 *
686 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
687 */
688static void
689xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
690 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000691 const xmlChar * info1, const xmlChar * info2,
692 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000693{
Daniel Veillard157fee02003-10-31 10:36:03 +0000694 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
695 (ctxt->instate == XML_PARSER_EOF))
696 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000697 if (ctxt != NULL)
698 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000699 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000700 XML_ERR_ERROR, NULL, 0, (const char *) info1,
701 (const char *) info2, (const char *) info3, 0, 0, msg,
702 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000705}
706
Daniel Veillard37334572008-07-31 08:20:02 +0000707/**
708 * xmlNsWarn
709 * @ctxt: an XML parser context
710 * @error: the error number
711 * @msg: the message
712 * @info1: extra information string
713 * @info2: extra information string
714 *
715 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
716 */
717static void
718xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
719 const char *msg,
720 const xmlChar * info1, const xmlChar * info2,
721 const xmlChar * info3)
722{
723 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
724 (ctxt->instate == XML_PARSER_EOF))
725 return;
726 if (ctxt != NULL)
727 ctxt->errNo = error;
728 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
729 XML_ERR_WARNING, NULL, 0, (const char *) info1,
730 (const char *) info2, (const char *) info3, 0, 0, msg,
731 info1, info2, info3);
732}
733
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000734/************************************************************************
735 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736 * Library wide options *
737 * *
738 ************************************************************************/
739
740/**
741 * xmlHasFeature:
742 * @feature: the feature to be examined
743 *
744 * Examines if the library has been compiled with a given feature.
745 *
746 * Returns a non-zero value if the feature exist, otherwise zero.
747 * Returns zero (0) if the feature does not exist or an unknown
748 * unknown feature is requested, non-zero otherwise.
749 */
750int
751xmlHasFeature(xmlFeature feature)
752{
753 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_THREAD_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_TREE_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_OUTPUT_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef LIBXML_PUSH_ENABLED
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_READER_ENABLED
780 return(1);
781#else
782 return(0);
783#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000784 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000785#ifdef LIBXML_PATTERN_ENABLED
786 return(1);
787#else
788 return(0);
789#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000790 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000791#ifdef LIBXML_WRITER_ENABLED
792 return(1);
793#else
794 return(0);
795#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000796 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797#ifdef LIBXML_SAX1_ENABLED
798 return(1);
799#else
800 return(0);
801#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000802 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000803#ifdef LIBXML_FTP_ENABLED
804 return(1);
805#else
806 return(0);
807#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000808 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000809#ifdef LIBXML_HTTP_ENABLED
810 return(1);
811#else
812 return(0);
813#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_VALID_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_HTML_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_LEGACY_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_C14N_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_CATALOG_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_XPATH_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_XPTR_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_XINCLUDE_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_ICONV_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_ISO8859X_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_UNICODE_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_REGEXP_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_AUTOMATA_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_EXPR_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_SCHEMAS_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_SCHEMATRON_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_MODULES_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_DEBUG_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef DEBUG_MEMORY_LOCATION
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_DEBUG_RUNTIME
930 return(1);
931#else
932 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000933#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000934 case XML_WITH_ZLIB:
935#ifdef LIBXML_ZLIB_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940 default:
941 break;
942 }
943 return(0);
944}
945
946/************************************************************************
947 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000948 * SAX2 defaulted attributes handling *
949 * *
950 ************************************************************************/
951
952/**
953 * xmlDetectSAX2:
954 * @ctxt: an XML parser context
955 *
956 * Do the SAX2 detection and specific intialization
957 */
958static void
959xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
960 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000961#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000962 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
963 ((ctxt->sax->startElementNs != NULL) ||
964 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000965#else
966 ctxt->sax2 = 1;
967#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000968
969 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
970 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
971 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000972 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
973 (ctxt->str_xml_ns == NULL)) {
974 xmlErrMemory(ctxt, NULL);
975 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000976}
977
Daniel Veillarde57ec792003-09-10 10:50:59 +0000978typedef struct _xmlDefAttrs xmlDefAttrs;
979typedef xmlDefAttrs *xmlDefAttrsPtr;
980struct _xmlDefAttrs {
981 int nbAttrs; /* number of defaulted attributes on that element */
982 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000983 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000984};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985
986/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000987 * xmlAttrNormalizeSpace:
988 * @src: the source string
989 * @dst: the target string
990 *
991 * Normalize the space in non CDATA attribute values:
992 * If the attribute type is not CDATA, then the XML processor MUST further
993 * process the normalized attribute value by discarding any leading and
994 * trailing space (#x20) characters, and by replacing sequences of space
995 * (#x20) characters by a single space (#x20) character.
996 * Note that the size of dst need to be at least src, and if one doesn't need
997 * to preserve dst (and it doesn't come from a dictionary or read-only) then
998 * passing src as dst is just fine.
999 *
1000 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1001 * is needed.
1002 */
1003static xmlChar *
1004xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1005{
1006 if ((src == NULL) || (dst == NULL))
1007 return(NULL);
1008
1009 while (*src == 0x20) src++;
1010 while (*src != 0) {
1011 if (*src == 0x20) {
1012 while (*src == 0x20) src++;
1013 if (*src != 0)
1014 *dst++ = 0x20;
1015 } else {
1016 *dst++ = *src++;
1017 }
1018 }
1019 *dst = 0;
1020 if (dst == src)
1021 return(NULL);
1022 return(dst);
1023}
1024
1025/**
1026 * xmlAttrNormalizeSpace2:
1027 * @src: the source string
1028 *
1029 * Normalize the space in non CDATA attribute values, a slightly more complex
1030 * front end to avoid allocation problems when running on attribute values
1031 * coming from the input.
1032 *
1033 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1034 * is needed.
1035 */
1036static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001037xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001038{
1039 int i;
1040 int remove_head = 0;
1041 int need_realloc = 0;
1042 const xmlChar *cur;
1043
1044 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1045 return(NULL);
1046 i = *len;
1047 if (i <= 0)
1048 return(NULL);
1049
1050 cur = src;
1051 while (*cur == 0x20) {
1052 cur++;
1053 remove_head++;
1054 }
1055 while (*cur != 0) {
1056 if (*cur == 0x20) {
1057 cur++;
1058 if ((*cur == 0x20) || (*cur == 0)) {
1059 need_realloc = 1;
1060 break;
1061 }
1062 } else
1063 cur++;
1064 }
1065 if (need_realloc) {
1066 xmlChar *ret;
1067
1068 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1069 if (ret == NULL) {
1070 xmlErrMemory(ctxt, NULL);
1071 return(NULL);
1072 }
1073 xmlAttrNormalizeSpace(ret, ret);
1074 *len = (int) strlen((const char *)ret);
1075 return(ret);
1076 } else if (remove_head) {
1077 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001078 memmove(src, src + remove_head, 1 + *len);
1079 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001080 }
1081 return(NULL);
1082}
1083
1084/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001085 * xmlAddDefAttrs:
1086 * @ctxt: an XML parser context
1087 * @fullname: the element fullname
1088 * @fullattr: the attribute fullname
1089 * @value: the attribute value
1090 *
1091 * Add a defaulted attribute for an element
1092 */
1093static void
1094xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1095 const xmlChar *fullname,
1096 const xmlChar *fullattr,
1097 const xmlChar *value) {
1098 xmlDefAttrsPtr defaults;
1099 int len;
1100 const xmlChar *name;
1101 const xmlChar *prefix;
1102
Daniel Veillard6a31b832008-03-26 14:06:44 +00001103 /*
1104 * Allows to detect attribute redefinitions
1105 */
1106 if (ctxt->attsSpecial != NULL) {
1107 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1108 return;
1109 }
1110
Daniel Veillarde57ec792003-09-10 10:50:59 +00001111 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001112 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001113 if (ctxt->attsDefault == NULL)
1114 goto mem_error;
1115 }
1116
1117 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001118 * split the element name into prefix:localname , the string found
1119 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 */
1121 name = xmlSplitQName3(fullname, &len);
1122 if (name == NULL) {
1123 name = xmlDictLookup(ctxt->dict, fullname, -1);
1124 prefix = NULL;
1125 } else {
1126 name = xmlDictLookup(ctxt->dict, name, -1);
1127 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1128 }
1129
1130 /*
1131 * make sure there is some storage
1132 */
1133 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1134 if (defaults == NULL) {
1135 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001136 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001137 if (defaults == NULL)
1138 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001139 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001140 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001141 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1142 defaults, NULL) < 0) {
1143 xmlFree(defaults);
1144 goto mem_error;
1145 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001146 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001147 xmlDefAttrsPtr temp;
1148
1149 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001151 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001152 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001153 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001155 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1156 defaults, NULL) < 0) {
1157 xmlFree(defaults);
1158 goto mem_error;
1159 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001160 }
1161
1162 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001163 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001164 * are within the DTD and hen not associated to namespace names.
1165 */
1166 name = xmlSplitQName3(fullattr, &len);
1167 if (name == NULL) {
1168 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1169 prefix = NULL;
1170 } else {
1171 name = xmlDictLookup(ctxt->dict, name, -1);
1172 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1173 }
1174
Daniel Veillardae0765b2008-07-31 19:54:59 +00001175 defaults->values[5 * defaults->nbAttrs] = name;
1176 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 /* intern the string and precompute the end */
1178 len = xmlStrlen(value);
1179 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001180 defaults->values[5 * defaults->nbAttrs + 2] = value;
1181 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1182 if (ctxt->external)
1183 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1184 else
1185 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001186 defaults->nbAttrs++;
1187
1188 return;
1189
1190mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001191 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 return;
1193}
1194
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001195/**
1196 * xmlAddSpecialAttr:
1197 * @ctxt: an XML parser context
1198 * @fullname: the element fullname
1199 * @fullattr: the attribute fullname
1200 * @type: the attribute type
1201 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001202 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001203 */
1204static void
1205xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1206 const xmlChar *fullname,
1207 const xmlChar *fullattr,
1208 int type)
1209{
1210 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001211 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001212 if (ctxt->attsSpecial == NULL)
1213 goto mem_error;
1214 }
1215
Daniel Veillardac4118d2008-01-11 05:27:32 +00001216 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1217 return;
1218
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001219 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1220 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001221 return;
1222
1223mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001224 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001225 return;
1226}
1227
Daniel Veillard4432df22003-09-28 18:58:27 +00001228/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001229 * xmlCleanSpecialAttrCallback:
1230 *
1231 * Removes CDATA attributes from the special attribute table
1232 */
1233static void
1234xmlCleanSpecialAttrCallback(void *payload, void *data,
1235 const xmlChar *fullname, const xmlChar *fullattr,
1236 const xmlChar *unused ATTRIBUTE_UNUSED) {
1237 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1238
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001239 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001240 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1241 }
1242}
1243
1244/**
1245 * xmlCleanSpecialAttr:
1246 * @ctxt: an XML parser context
1247 *
1248 * Trim the list of attributes defined to remove all those of type
1249 * CDATA as they are not special. This call should be done when finishing
1250 * to parse the DTD and before starting to parse the document root.
1251 */
1252static void
1253xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1254{
1255 if (ctxt->attsSpecial == NULL)
1256 return;
1257
1258 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1259
1260 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1261 xmlHashFree(ctxt->attsSpecial, NULL);
1262 ctxt->attsSpecial = NULL;
1263 }
1264 return;
1265}
1266
1267/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001268 * xmlCheckLanguageID:
1269 * @lang: pointer to the string value
1270 *
1271 * Checks that the value conforms to the LanguageID production:
1272 *
1273 * NOTE: this is somewhat deprecated, those productions were removed from
1274 * the XML Second edition.
1275 *
1276 * [33] LanguageID ::= Langcode ('-' Subcode)*
1277 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1278 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1279 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1280 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1281 * [38] Subcode ::= ([a-z] | [A-Z])+
1282 *
1283 * Returns 1 if correct 0 otherwise
1284 **/
1285int
1286xmlCheckLanguageID(const xmlChar * lang)
1287{
1288 const xmlChar *cur = lang;
1289
1290 if (cur == NULL)
1291 return (0);
1292 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1293 ((cur[0] == 'I') && (cur[1] == '-'))) {
1294 /*
1295 * IANA code
1296 */
1297 cur += 2;
1298 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1299 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1300 cur++;
1301 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1302 ((cur[0] == 'X') && (cur[1] == '-'))) {
1303 /*
1304 * User code
1305 */
1306 cur += 2;
1307 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1308 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1309 cur++;
1310 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1311 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1312 /*
1313 * ISO639
1314 */
1315 cur++;
1316 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1317 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1318 cur++;
1319 else
1320 return (0);
1321 } else
1322 return (0);
1323 while (cur[0] != 0) { /* non input consuming */
1324 if (cur[0] != '-')
1325 return (0);
1326 cur++;
1327 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1328 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1329 cur++;
1330 else
1331 return (0);
1332 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1333 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1334 cur++;
1335 }
1336 return (1);
1337}
1338
Owen Taylor3473f882001-02-23 17:55:21 +00001339/************************************************************************
1340 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001341 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001342 * *
1343 ************************************************************************/
1344
1345xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1346 const xmlChar ** str);
1347
Daniel Veillard0fb18932003-09-07 09:14:37 +00001348#ifdef SAX2
1349/**
1350 * nsPush:
1351 * @ctxt: an XML parser context
1352 * @prefix: the namespace prefix or NULL
1353 * @URL: the namespace name
1354 *
1355 * Pushes a new parser namespace on top of the ns stack
1356 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001357 * Returns -1 in case of error, -2 if the namespace should be discarded
1358 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001359 */
1360static int
1361nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1362{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001363 if (ctxt->options & XML_PARSE_NSCLEAN) {
1364 int i;
1365 for (i = 0;i < ctxt->nsNr;i += 2) {
1366 if (ctxt->nsTab[i] == prefix) {
1367 /* in scope */
1368 if (ctxt->nsTab[i + 1] == URL)
1369 return(-2);
1370 /* out of scope keep it */
1371 break;
1372 }
1373 }
1374 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001375 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1376 ctxt->nsMax = 10;
1377 ctxt->nsNr = 0;
1378 ctxt->nsTab = (const xmlChar **)
1379 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1380 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001381 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001382 ctxt->nsMax = 0;
1383 return (-1);
1384 }
1385 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001386 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001387 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001388 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1389 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1390 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001391 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001392 ctxt->nsMax /= 2;
1393 return (-1);
1394 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001395 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001396 }
1397 ctxt->nsTab[ctxt->nsNr++] = prefix;
1398 ctxt->nsTab[ctxt->nsNr++] = URL;
1399 return (ctxt->nsNr);
1400}
1401/**
1402 * nsPop:
1403 * @ctxt: an XML parser context
1404 * @nr: the number to pop
1405 *
1406 * Pops the top @nr parser prefix/namespace from the ns stack
1407 *
1408 * Returns the number of namespaces removed
1409 */
1410static int
1411nsPop(xmlParserCtxtPtr ctxt, int nr)
1412{
1413 int i;
1414
1415 if (ctxt->nsTab == NULL) return(0);
1416 if (ctxt->nsNr < nr) {
1417 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1418 nr = ctxt->nsNr;
1419 }
1420 if (ctxt->nsNr <= 0)
1421 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001422
Daniel Veillard0fb18932003-09-07 09:14:37 +00001423 for (i = 0;i < nr;i++) {
1424 ctxt->nsNr--;
1425 ctxt->nsTab[ctxt->nsNr] = NULL;
1426 }
1427 return(nr);
1428}
1429#endif
1430
1431static int
1432xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1433 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001434 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001435 int maxatts;
1436
1437 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001438 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001439 atts = (const xmlChar **)
1440 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001441 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001442 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001443 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1444 if (attallocs == NULL) goto mem_error;
1445 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001446 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001447 } else if (nr + 5 > ctxt->maxatts) {
1448 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001449 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1450 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001451 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001452 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001453 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1454 (maxatts / 5) * sizeof(int));
1455 if (attallocs == NULL) goto mem_error;
1456 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001457 ctxt->maxatts = maxatts;
1458 }
1459 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001460mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001461 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001462 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001463}
1464
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001465/**
1466 * inputPush:
1467 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001468 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001469 *
1470 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001471 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001472 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001473 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001474int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001475inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1476{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001477 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001478 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001479 if (ctxt->inputNr >= ctxt->inputMax) {
1480 ctxt->inputMax *= 2;
1481 ctxt->inputTab =
1482 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1483 ctxt->inputMax *
1484 sizeof(ctxt->inputTab[0]));
1485 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001486 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001487 xmlFreeInputStream(value);
1488 ctxt->inputMax /= 2;
1489 value = NULL;
1490 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001491 }
1492 }
1493 ctxt->inputTab[ctxt->inputNr] = value;
1494 ctxt->input = value;
1495 return (ctxt->inputNr++);
1496}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001497/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001498 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001499 * @ctxt: an XML parser context
1500 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001501 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001502 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001503 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001504 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001505xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001506inputPop(xmlParserCtxtPtr ctxt)
1507{
1508 xmlParserInputPtr ret;
1509
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001510 if (ctxt == NULL)
1511 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001512 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001513 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001514 ctxt->inputNr--;
1515 if (ctxt->inputNr > 0)
1516 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1517 else
1518 ctxt->input = NULL;
1519 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001520 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001521 return (ret);
1522}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001523/**
1524 * nodePush:
1525 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001526 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001527 *
1528 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001529 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001530 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001531 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001532int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001533nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1534{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001535 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001536 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001537 xmlNodePtr *tmp;
1538
1539 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1540 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001541 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001542 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001543 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001544 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001545 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001546 ctxt->nodeTab = tmp;
1547 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001548 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001549 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1550 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001551 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001552 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001553 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001554 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001555 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001556 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001557 ctxt->nodeTab[ctxt->nodeNr] = value;
1558 ctxt->node = value;
1559 return (ctxt->nodeNr++);
1560}
Daniel Veillard8915c152008-08-26 13:05:34 +00001561
Daniel Veillard1c732d22002-11-30 11:22:59 +00001562/**
1563 * nodePop:
1564 * @ctxt: an XML parser context
1565 *
1566 * Pops the top element node from the node stack
1567 *
1568 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001569 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001570xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001571nodePop(xmlParserCtxtPtr ctxt)
1572{
1573 xmlNodePtr ret;
1574
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001575 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001576 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001577 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 ctxt->nodeNr--;
1579 if (ctxt->nodeNr > 0)
1580 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1581 else
1582 ctxt->node = NULL;
1583 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001584 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001585 return (ret);
1586}
Daniel Veillarda2351322004-06-27 12:08:10 +00001587
1588#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001589/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001590 * nameNsPush:
1591 * @ctxt: an XML parser context
1592 * @value: the element name
1593 * @prefix: the element prefix
1594 * @URI: the element namespace name
1595 *
1596 * Pushes a new element name/prefix/URL on top of the name stack
1597 *
1598 * Returns -1 in case of error, the index in the stack otherwise
1599 */
1600static int
1601nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1602 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1603{
1604 if (ctxt->nameNr >= ctxt->nameMax) {
1605 const xmlChar * *tmp;
1606 void **tmp2;
1607 ctxt->nameMax *= 2;
1608 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1609 ctxt->nameMax *
1610 sizeof(ctxt->nameTab[0]));
1611 if (tmp == NULL) {
1612 ctxt->nameMax /= 2;
1613 goto mem_error;
1614 }
1615 ctxt->nameTab = tmp;
1616 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1617 ctxt->nameMax * 3 *
1618 sizeof(ctxt->pushTab[0]));
1619 if (tmp2 == NULL) {
1620 ctxt->nameMax /= 2;
1621 goto mem_error;
1622 }
1623 ctxt->pushTab = tmp2;
1624 }
1625 ctxt->nameTab[ctxt->nameNr] = value;
1626 ctxt->name = value;
1627 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1628 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001629 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001630 return (ctxt->nameNr++);
1631mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001632 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001633 return (-1);
1634}
1635/**
1636 * nameNsPop:
1637 * @ctxt: an XML parser context
1638 *
1639 * Pops the top element/prefix/URI name from the name stack
1640 *
1641 * Returns the name just removed
1642 */
1643static const xmlChar *
1644nameNsPop(xmlParserCtxtPtr ctxt)
1645{
1646 const xmlChar *ret;
1647
1648 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001649 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001650 ctxt->nameNr--;
1651 if (ctxt->nameNr > 0)
1652 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1653 else
1654 ctxt->name = NULL;
1655 ret = ctxt->nameTab[ctxt->nameNr];
1656 ctxt->nameTab[ctxt->nameNr] = NULL;
1657 return (ret);
1658}
Daniel Veillarda2351322004-06-27 12:08:10 +00001659#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001660
1661/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001662 * namePush:
1663 * @ctxt: an XML parser context
1664 * @value: the element name
1665 *
1666 * Pushes a new element name on top of the name stack
1667 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001668 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001670int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001671namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001672{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001673 if (ctxt == NULL) return (-1);
1674
Daniel Veillard1c732d22002-11-30 11:22:59 +00001675 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001676 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001677 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 ctxt->nameMax *
1680 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001681 if (tmp == NULL) {
1682 ctxt->nameMax /= 2;
1683 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001684 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001685 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 }
1687 ctxt->nameTab[ctxt->nameNr] = value;
1688 ctxt->name = value;
1689 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001690mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001691 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001692 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001693}
1694/**
1695 * namePop:
1696 * @ctxt: an XML parser context
1697 *
1698 * Pops the top element name from the name stack
1699 *
1700 * Returns the name just removed
1701 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001702const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001703namePop(xmlParserCtxtPtr ctxt)
1704{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001705 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001707 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1708 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001709 ctxt->nameNr--;
1710 if (ctxt->nameNr > 0)
1711 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1712 else
1713 ctxt->name = NULL;
1714 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001715 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001716 return (ret);
1717}
Owen Taylor3473f882001-02-23 17:55:21 +00001718
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001719static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001720 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001721 int *tmp;
1722
Owen Taylor3473f882001-02-23 17:55:21 +00001723 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001724 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1725 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1726 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001727 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001728 ctxt->spaceMax /=2;
1729 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001730 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001731 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001732 }
1733 ctxt->spaceTab[ctxt->spaceNr] = val;
1734 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1735 return(ctxt->spaceNr++);
1736}
1737
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001738static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001739 int ret;
1740 if (ctxt->spaceNr <= 0) return(0);
1741 ctxt->spaceNr--;
1742 if (ctxt->spaceNr > 0)
1743 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1744 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001745 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001746 ret = ctxt->spaceTab[ctxt->spaceNr];
1747 ctxt->spaceTab[ctxt->spaceNr] = -1;
1748 return(ret);
1749}
1750
1751/*
1752 * Macros for accessing the content. Those should be used only by the parser,
1753 * and not exported.
1754 *
1755 * Dirty macros, i.e. one often need to make assumption on the context to
1756 * use them
1757 *
1758 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1759 * To be used with extreme caution since operations consuming
1760 * characters may move the input buffer to a different location !
1761 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1762 * This should be used internally by the parser
1763 * only to compare to ASCII values otherwise it would break when
1764 * running with UTF-8 encoding.
1765 * RAW same as CUR but in the input buffer, bypass any token
1766 * extraction that may have been done
1767 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1768 * to compare on ASCII based substring.
1769 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001770 * strings without newlines within the parser.
1771 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1772 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001773 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1774 *
1775 * NEXT Skip to the next character, this does the proper decoding
1776 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001777 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001778 * CUR_CHAR(l) returns the current unicode character (int), set l
1779 * to the number of xmlChars used for the encoding [0-5].
1780 * CUR_SCHAR same but operate on a string instead of the context
1781 * COPY_BUF copy the current unicode char to the target buffer, increment
1782 * the index
1783 * GROW, SHRINK handling of input buffers
1784 */
1785
Daniel Veillardfdc91562002-07-01 21:52:03 +00001786#define RAW (*ctxt->input->cur)
1787#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001788#define NXT(val) ctxt->input->cur[(val)]
1789#define CUR_PTR ctxt->input->cur
1790
Daniel Veillarda07050d2003-10-19 14:46:32 +00001791#define CMP4( s, c1, c2, c3, c4 ) \
1792 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1793 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1794#define CMP5( s, c1, c2, c3, c4, c5 ) \
1795 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1796#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1797 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1798#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1799 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1800#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1801 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1802#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1803 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1804 ((unsigned char *) s)[ 8 ] == c9 )
1805#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1806 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1807 ((unsigned char *) s)[ 9 ] == c10 )
1808
Owen Taylor3473f882001-02-23 17:55:21 +00001809#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001810 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001811 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001812 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001813 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1814 xmlPopInput(ctxt); \
1815 } while (0)
1816
Daniel Veillard0b787f32004-03-26 17:29:53 +00001817#define SKIPL(val) do { \
1818 int skipl; \
1819 for(skipl=0; skipl<val; skipl++) { \
1820 if (*(ctxt->input->cur) == '\n') { \
1821 ctxt->input->line++; ctxt->input->col = 1; \
1822 } else ctxt->input->col++; \
1823 ctxt->nbChars++; \
1824 ctxt->input->cur++; \
1825 } \
1826 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1827 if ((*ctxt->input->cur == 0) && \
1828 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1829 xmlPopInput(ctxt); \
1830 } while (0)
1831
Daniel Veillarda880b122003-04-21 21:36:41 +00001832#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001833 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1834 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001835 xmlSHRINK (ctxt);
1836
1837static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1838 xmlParserInputShrink(ctxt->input);
1839 if ((*ctxt->input->cur == 0) &&
1840 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1841 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001842 }
Owen Taylor3473f882001-02-23 17:55:21 +00001843
Daniel Veillarda880b122003-04-21 21:36:41 +00001844#define GROW if ((ctxt->progressive == 0) && \
1845 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001846 xmlGROW (ctxt);
1847
1848static void xmlGROW (xmlParserCtxtPtr ctxt) {
1849 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1850 if ((*ctxt->input->cur == 0) &&
1851 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1852 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001853}
Owen Taylor3473f882001-02-23 17:55:21 +00001854
1855#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1856
1857#define NEXT xmlNextChar(ctxt)
1858
Daniel Veillard21a0f912001-02-25 19:54:14 +00001859#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001860 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001861 ctxt->input->cur++; \
1862 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001863 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001864 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1865 }
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867#define NEXTL(l) do { \
1868 if (*(ctxt->input->cur) == '\n') { \
1869 ctxt->input->line++; ctxt->input->col = 1; \
1870 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001871 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001872 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001873 } while (0)
1874
1875#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1876#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1877
1878#define COPY_BUF(l,b,i,v) \
1879 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001880 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001881
1882/**
1883 * xmlSkipBlankChars:
1884 * @ctxt: the XML parser context
1885 *
1886 * skip all blanks character found at that point in the input streams.
1887 * It pops up finished entities in the process if allowable at that point.
1888 *
1889 * Returns the number of space chars skipped
1890 */
1891
1892int
1893xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001894 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001895
1896 /*
1897 * It's Okay to use CUR/NEXT here since all the blanks are on
1898 * the ASCII range.
1899 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001900 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1901 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001902 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001903 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001904 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001905 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001906 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001907 if (*cur == '\n') {
1908 ctxt->input->line++; ctxt->input->col = 1;
1909 }
1910 cur++;
1911 res++;
1912 if (*cur == 0) {
1913 ctxt->input->cur = cur;
1914 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1915 cur = ctxt->input->cur;
1916 }
1917 }
1918 ctxt->input->cur = cur;
1919 } else {
1920 int cur;
1921 do {
1922 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001923 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001924 NEXT;
1925 cur = CUR;
1926 res++;
1927 }
1928 while ((cur == 0) && (ctxt->inputNr > 1) &&
1929 (ctxt->instate != XML_PARSER_COMMENT)) {
1930 xmlPopInput(ctxt);
1931 cur = CUR;
1932 }
1933 /*
1934 * Need to handle support of entities branching here
1935 */
1936 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1937 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1938 }
Owen Taylor3473f882001-02-23 17:55:21 +00001939 return(res);
1940}
1941
1942/************************************************************************
1943 * *
1944 * Commodity functions to handle entities *
1945 * *
1946 ************************************************************************/
1947
1948/**
1949 * xmlPopInput:
1950 * @ctxt: an XML parser context
1951 *
1952 * xmlPopInput: the current input pointed by ctxt->input came to an end
1953 * pop it and return the next char.
1954 *
1955 * Returns the current xmlChar in the parser context
1956 */
1957xmlChar
1958xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001959 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001960 if (xmlParserDebugEntities)
1961 xmlGenericError(xmlGenericErrorContext,
1962 "Popping input %d\n", ctxt->inputNr);
1963 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001964 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001965 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1966 return(xmlPopInput(ctxt));
1967 return(CUR);
1968}
1969
1970/**
1971 * xmlPushInput:
1972 * @ctxt: an XML parser context
1973 * @input: an XML parser input fragment (entity, XML fragment ...).
1974 *
1975 * xmlPushInput: switch to a new input stream which is stacked on top
1976 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001977 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001978 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001979int
Owen Taylor3473f882001-02-23 17:55:21 +00001980xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001981 int ret;
1982 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001983
1984 if (xmlParserDebugEntities) {
1985 if ((ctxt->input != NULL) && (ctxt->input->filename))
1986 xmlGenericError(xmlGenericErrorContext,
1987 "%s(%d): ", ctxt->input->filename,
1988 ctxt->input->line);
1989 xmlGenericError(xmlGenericErrorContext,
1990 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1991 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001992 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001994 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001995}
1996
1997/**
1998 * xmlParseCharRef:
1999 * @ctxt: an XML parser context
2000 *
2001 * parse Reference declarations
2002 *
2003 * [66] CharRef ::= '&#' [0-9]+ ';' |
2004 * '&#x' [0-9a-fA-F]+ ';'
2005 *
2006 * [ WFC: Legal Character ]
2007 * Characters referred to using character references must match the
2008 * production for Char.
2009 *
2010 * Returns the value parsed (as an int), 0 in case of error
2011 */
2012int
2013xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002014 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002015 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002016 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002017
Owen Taylor3473f882001-02-23 17:55:21 +00002018 /*
2019 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2020 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002021 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002022 (NXT(2) == 'x')) {
2023 SKIP(3);
2024 GROW;
2025 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002026 if (count++ > 20) {
2027 count = 0;
2028 GROW;
2029 }
2030 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002031 val = val * 16 + (CUR - '0');
2032 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2033 val = val * 16 + (CUR - 'a') + 10;
2034 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2035 val = val * 16 + (CUR - 'A') + 10;
2036 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002037 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002038 val = 0;
2039 break;
2040 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002041 if (val > 0x10FFFF)
2042 outofrange = val;
2043
Owen Taylor3473f882001-02-23 17:55:21 +00002044 NEXT;
2045 count++;
2046 }
2047 if (RAW == ';') {
2048 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002049 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002050 ctxt->nbChars ++;
2051 ctxt->input->cur++;
2052 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002053 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002054 SKIP(2);
2055 GROW;
2056 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002057 if (count++ > 20) {
2058 count = 0;
2059 GROW;
2060 }
2061 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002062 val = val * 10 + (CUR - '0');
2063 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002064 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002065 val = 0;
2066 break;
2067 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002068 if (val > 0x10FFFF)
2069 outofrange = val;
2070
Owen Taylor3473f882001-02-23 17:55:21 +00002071 NEXT;
2072 count++;
2073 }
2074 if (RAW == ';') {
2075 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002076 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002077 ctxt->nbChars ++;
2078 ctxt->input->cur++;
2079 }
2080 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002081 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002082 }
2083
2084 /*
2085 * [ WFC: Legal Character ]
2086 * Characters referred to using character references must match the
2087 * production for Char.
2088 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002089 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 return(val);
2091 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002092 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2093 "xmlParseCharRef: invalid xmlChar value %d\n",
2094 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002095 }
2096 return(0);
2097}
2098
2099/**
2100 * xmlParseStringCharRef:
2101 * @ctxt: an XML parser context
2102 * @str: a pointer to an index in the string
2103 *
2104 * parse Reference declarations, variant parsing from a string rather
2105 * than an an input flow.
2106 *
2107 * [66] CharRef ::= '&#' [0-9]+ ';' |
2108 * '&#x' [0-9a-fA-F]+ ';'
2109 *
2110 * [ WFC: Legal Character ]
2111 * Characters referred to using character references must match the
2112 * production for Char.
2113 *
2114 * Returns the value parsed (as an int), 0 in case of error, str will be
2115 * updated to the current value of the index
2116 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002117static int
Owen Taylor3473f882001-02-23 17:55:21 +00002118xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2119 const xmlChar *ptr;
2120 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002121 unsigned int val = 0;
2122 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002123
2124 if ((str == NULL) || (*str == NULL)) return(0);
2125 ptr = *str;
2126 cur = *ptr;
2127 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2128 ptr += 3;
2129 cur = *ptr;
2130 while (cur != ';') { /* Non input consuming loop */
2131 if ((cur >= '0') && (cur <= '9'))
2132 val = val * 16 + (cur - '0');
2133 else if ((cur >= 'a') && (cur <= 'f'))
2134 val = val * 16 + (cur - 'a') + 10;
2135 else if ((cur >= 'A') && (cur <= 'F'))
2136 val = val * 16 + (cur - 'A') + 10;
2137 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002138 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002139 val = 0;
2140 break;
2141 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002142 if (val > 0x10FFFF)
2143 outofrange = val;
2144
Owen Taylor3473f882001-02-23 17:55:21 +00002145 ptr++;
2146 cur = *ptr;
2147 }
2148 if (cur == ';')
2149 ptr++;
2150 } else if ((cur == '&') && (ptr[1] == '#')){
2151 ptr += 2;
2152 cur = *ptr;
2153 while (cur != ';') { /* Non input consuming loops */
2154 if ((cur >= '0') && (cur <= '9'))
2155 val = val * 10 + (cur - '0');
2156 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002157 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002158 val = 0;
2159 break;
2160 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002161 if (val > 0x10FFFF)
2162 outofrange = val;
2163
Owen Taylor3473f882001-02-23 17:55:21 +00002164 ptr++;
2165 cur = *ptr;
2166 }
2167 if (cur == ';')
2168 ptr++;
2169 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002170 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002171 return(0);
2172 }
2173 *str = ptr;
2174
2175 /*
2176 * [ WFC: Legal Character ]
2177 * Characters referred to using character references must match the
2178 * production for Char.
2179 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002180 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002181 return(val);
2182 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002183 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2184 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2185 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002186 }
2187 return(0);
2188}
2189
2190/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002191 * xmlNewBlanksWrapperInputStream:
2192 * @ctxt: an XML parser context
2193 * @entity: an Entity pointer
2194 *
2195 * Create a new input stream for wrapping
2196 * blanks around a PEReference
2197 *
2198 * Returns the new input stream or NULL
2199 */
2200
2201static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2202
Daniel Veillardf4862f02002-09-10 11:13:43 +00002203static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002204xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2205 xmlParserInputPtr input;
2206 xmlChar *buffer;
2207 size_t length;
2208 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002209 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2210 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002211 return(NULL);
2212 }
2213 if (xmlParserDebugEntities)
2214 xmlGenericError(xmlGenericErrorContext,
2215 "new blanks wrapper for entity: %s\n", entity->name);
2216 input = xmlNewInputStream(ctxt);
2217 if (input == NULL) {
2218 return(NULL);
2219 }
2220 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002221 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002222 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002223 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002224 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002225 return(NULL);
2226 }
2227 buffer [0] = ' ';
2228 buffer [1] = '%';
2229 buffer [length-3] = ';';
2230 buffer [length-2] = ' ';
2231 buffer [length-1] = 0;
2232 memcpy(buffer + 2, entity->name, length - 5);
2233 input->free = deallocblankswrapper;
2234 input->base = buffer;
2235 input->cur = buffer;
2236 input->length = length;
2237 input->end = &buffer[length];
2238 return(input);
2239}
2240
2241/**
Owen Taylor3473f882001-02-23 17:55:21 +00002242 * xmlParserHandlePEReference:
2243 * @ctxt: the parser context
2244 *
2245 * [69] PEReference ::= '%' Name ';'
2246 *
2247 * [ WFC: No Recursion ]
2248 * A parsed entity must not contain a recursive
2249 * reference to itself, either directly or indirectly.
2250 *
2251 * [ WFC: Entity Declared ]
2252 * In a document without any DTD, a document with only an internal DTD
2253 * subset which contains no parameter entity references, or a document
2254 * with "standalone='yes'", ... ... The declaration of a parameter
2255 * entity must precede any reference to it...
2256 *
2257 * [ VC: Entity Declared ]
2258 * In a document with an external subset or external parameter entities
2259 * with "standalone='no'", ... ... The declaration of a parameter entity
2260 * must precede any reference to it...
2261 *
2262 * [ WFC: In DTD ]
2263 * Parameter-entity references may only appear in the DTD.
2264 * NOTE: misleading but this is handled.
2265 *
2266 * A PEReference may have been detected in the current input stream
2267 * the handling is done accordingly to
2268 * http://www.w3.org/TR/REC-xml#entproc
2269 * i.e.
2270 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002271 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002272 */
2273void
2274xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002275 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002276 xmlEntityPtr entity = NULL;
2277 xmlParserInputPtr input;
2278
Owen Taylor3473f882001-02-23 17:55:21 +00002279 if (RAW != '%') return;
2280 switch(ctxt->instate) {
2281 case XML_PARSER_CDATA_SECTION:
2282 return;
2283 case XML_PARSER_COMMENT:
2284 return;
2285 case XML_PARSER_START_TAG:
2286 return;
2287 case XML_PARSER_END_TAG:
2288 return;
2289 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002290 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002291 return;
2292 case XML_PARSER_PROLOG:
2293 case XML_PARSER_START:
2294 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002295 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002296 return;
2297 case XML_PARSER_ENTITY_DECL:
2298 case XML_PARSER_CONTENT:
2299 case XML_PARSER_ATTRIBUTE_VALUE:
2300 case XML_PARSER_PI:
2301 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002302 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002303 /* we just ignore it there */
2304 return;
2305 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002306 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002307 return;
2308 case XML_PARSER_ENTITY_VALUE:
2309 /*
2310 * NOTE: in the case of entity values, we don't do the
2311 * substitution here since we need the literal
2312 * entity value to be able to save the internal
2313 * subset of the document.
2314 * This will be handled by xmlStringDecodeEntities
2315 */
2316 return;
2317 case XML_PARSER_DTD:
2318 /*
2319 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2320 * In the internal DTD subset, parameter-entity references
2321 * can occur only where markup declarations can occur, not
2322 * within markup declarations.
2323 * In that case this is handled in xmlParseMarkupDecl
2324 */
2325 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2326 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002327 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002328 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002329 break;
2330 case XML_PARSER_IGNORE:
2331 return;
2332 }
2333
2334 NEXT;
2335 name = xmlParseName(ctxt);
2336 if (xmlParserDebugEntities)
2337 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002338 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002339 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002340 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 } else {
2342 if (RAW == ';') {
2343 NEXT;
2344 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2345 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2346 if (entity == NULL) {
2347
2348 /*
2349 * [ WFC: Entity Declared ]
2350 * In a document without any DTD, a document with only an
2351 * internal DTD subset which contains no parameter entity
2352 * references, or a document with "standalone='yes'", ...
2353 * ... The declaration of a parameter entity must precede
2354 * any reference to it...
2355 */
2356 if ((ctxt->standalone == 1) ||
2357 ((ctxt->hasExternalSubset == 0) &&
2358 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002359 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002360 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002361 } else {
2362 /*
2363 * [ VC: Entity Declared ]
2364 * In a document with an external subset or external
2365 * parameter entities with "standalone='no'", ...
2366 * ... The declaration of a parameter entity must precede
2367 * any reference to it...
2368 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002369 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2370 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2371 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002372 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002373 } else
2374 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2375 "PEReference: %%%s; not found\n",
2376 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002377 ctxt->valid = 0;
2378 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002379 } else if (ctxt->input->free != deallocblankswrapper) {
2380 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002381 if (xmlPushInput(ctxt, input) < 0)
2382 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002383 } else {
2384 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2385 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002386 xmlChar start[4];
2387 xmlCharEncoding enc;
2388
Owen Taylor3473f882001-02-23 17:55:21 +00002389 /*
2390 * handle the extra spaces added before and after
2391 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002392 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002393 */
2394 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002395 if (xmlPushInput(ctxt, input) < 0)
2396 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002397
2398 /*
2399 * Get the 4 first bytes and decode the charset
2400 * if enc != XML_CHAR_ENCODING_NONE
2401 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002402 * Note that, since we may have some non-UTF8
2403 * encoding (like UTF16, bug 135229), the 'length'
2404 * is not known, but we can calculate based upon
2405 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002406 */
2407 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002408 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002409 start[0] = RAW;
2410 start[1] = NXT(1);
2411 start[2] = NXT(2);
2412 start[3] = NXT(3);
2413 enc = xmlDetectCharEncoding(start, 4);
2414 if (enc != XML_CHAR_ENCODING_NONE) {
2415 xmlSwitchEncoding(ctxt, enc);
2416 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002417 }
2418
Owen Taylor3473f882001-02-23 17:55:21 +00002419 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002420 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2421 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002422 xmlParseTextDecl(ctxt);
2423 }
Owen Taylor3473f882001-02-23 17:55:21 +00002424 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002425 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2426 "PEReference: %s is not a parameter entity\n",
2427 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002428 }
2429 }
2430 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002431 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002432 }
Owen Taylor3473f882001-02-23 17:55:21 +00002433 }
2434}
2435
2436/*
2437 * Macro used to grow the current buffer.
2438 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002439#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002440 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002441 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002442 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002443 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002444 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002445 if (tmp == NULL) goto mem_error; \
2446 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002447}
2448
2449/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002450 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002451 * @ctxt: the parser context
2452 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002453 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002454 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2455 * @end: an end marker xmlChar, 0 if none
2456 * @end2: an end marker xmlChar, 0 if none
2457 * @end3: an end marker xmlChar, 0 if none
2458 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002459 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002460 *
2461 * [67] Reference ::= EntityRef | CharRef
2462 *
2463 * [69] PEReference ::= '%' Name ';'
2464 *
2465 * Returns A newly allocated string with the substitution done. The caller
2466 * must deallocate it !
2467 */
2468xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002469xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2470 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002471 xmlChar *buffer = NULL;
2472 int buffer_size = 0;
2473
2474 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002475 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002476 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002477 xmlEntityPtr ent;
2478 int c,l;
2479 int nbchars = 0;
2480
Daniel Veillarda82b1822004-11-08 16:24:57 +00002481 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002482 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002483 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002484
Daniel Veillard0161e632008-08-28 15:36:32 +00002485 if (((ctxt->depth > 40) &&
2486 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2487 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002488 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002489 return(NULL);
2490 }
2491
2492 /*
2493 * allocate a translation buffer.
2494 */
2495 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002496 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002497 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002498
2499 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002500 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002501 * we are operating on already parsed values.
2502 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002503 if (str < last)
2504 c = CUR_SCHAR(str, l);
2505 else
2506 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002507 while ((c != 0) && (c != end) && /* non input consuming loop */
2508 (c != end2) && (c != end3)) {
2509
2510 if (c == 0) break;
2511 if ((c == '&') && (str[1] == '#')) {
2512 int val = xmlParseStringCharRef(ctxt, &str);
2513 if (val != 0) {
2514 COPY_BUF(0,buffer,nbchars,val);
2515 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002516 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002517 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002518 }
Owen Taylor3473f882001-02-23 17:55:21 +00002519 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2520 if (xmlParserDebugEntities)
2521 xmlGenericError(xmlGenericErrorContext,
2522 "String decoding Entity Reference: %.30s\n",
2523 str);
2524 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002525 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2526 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002527 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002528 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002529 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002530 if ((ent != NULL) &&
2531 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2532 if (ent->content != NULL) {
2533 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002534 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002535 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002536 }
Owen Taylor3473f882001-02-23 17:55:21 +00002537 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002538 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2539 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002540 }
2541 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002542 ctxt->depth++;
2543 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2544 0, 0, 0);
2545 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002546
Owen Taylor3473f882001-02-23 17:55:21 +00002547 if (rep != NULL) {
2548 current = rep;
2549 while (*current != 0) { /* non input consuming loop */
2550 buffer[nbchars++] = *current++;
2551 if (nbchars >
2552 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002553 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2554 goto int_error;
2555 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002556 }
2557 }
2558 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002559 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002560 }
2561 } else if (ent != NULL) {
2562 int i = xmlStrlen(ent->name);
2563 const xmlChar *cur = ent->name;
2564
2565 buffer[nbchars++] = '&';
2566 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002567 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002568 }
2569 for (;i > 0;i--)
2570 buffer[nbchars++] = *cur++;
2571 buffer[nbchars++] = ';';
2572 }
2573 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2574 if (xmlParserDebugEntities)
2575 xmlGenericError(xmlGenericErrorContext,
2576 "String decoding PE Reference: %.30s\n", str);
2577 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002578 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2579 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002580 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002581 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002582 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002583 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002584 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002585 }
Owen Taylor3473f882001-02-23 17:55:21 +00002586 ctxt->depth++;
2587 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2588 0, 0, 0);
2589 ctxt->depth--;
2590 if (rep != NULL) {
2591 current = rep;
2592 while (*current != 0) { /* non input consuming loop */
2593 buffer[nbchars++] = *current++;
2594 if (nbchars >
2595 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002596 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2597 goto int_error;
2598 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002599 }
2600 }
2601 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002602 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002603 }
2604 }
2605 } else {
2606 COPY_BUF(l,buffer,nbchars,c);
2607 str += l;
2608 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002609 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002610 }
2611 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002612 if (str < last)
2613 c = CUR_SCHAR(str, l);
2614 else
2615 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002616 }
2617 buffer[nbchars++] = 0;
2618 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002619
2620mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002621 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002622int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002623 if (rep != NULL)
2624 xmlFree(rep);
2625 if (buffer != NULL)
2626 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002627 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002628}
2629
Daniel Veillarde57ec792003-09-10 10:50:59 +00002630/**
2631 * xmlStringDecodeEntities:
2632 * @ctxt: the parser context
2633 * @str: the input string
2634 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2635 * @end: an end marker xmlChar, 0 if none
2636 * @end2: an end marker xmlChar, 0 if none
2637 * @end3: an end marker xmlChar, 0 if none
2638 *
2639 * Takes a entity string content and process to do the adequate substitutions.
2640 *
2641 * [67] Reference ::= EntityRef | CharRef
2642 *
2643 * [69] PEReference ::= '%' Name ';'
2644 *
2645 * Returns A newly allocated string with the substitution done. The caller
2646 * must deallocate it !
2647 */
2648xmlChar *
2649xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2650 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002651 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002652 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2653 end, end2, end3));
2654}
Owen Taylor3473f882001-02-23 17:55:21 +00002655
2656/************************************************************************
2657 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002658 * Commodity functions, cleanup needed ? *
2659 * *
2660 ************************************************************************/
2661
2662/**
2663 * areBlanks:
2664 * @ctxt: an XML parser context
2665 * @str: a xmlChar *
2666 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002667 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002668 *
2669 * Is this a sequence of blank chars that one can ignore ?
2670 *
2671 * Returns 1 if ignorable 0 otherwise.
2672 */
2673
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002674static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2675 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002676 int i, ret;
2677 xmlNodePtr lastChild;
2678
Daniel Veillard05c13a22001-09-09 08:38:09 +00002679 /*
2680 * Don't spend time trying to differentiate them, the same callback is
2681 * used !
2682 */
2683 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002684 return(0);
2685
Owen Taylor3473f882001-02-23 17:55:21 +00002686 /*
2687 * Check for xml:space value.
2688 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002689 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2690 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002691 return(0);
2692
2693 /*
2694 * Check that the string is made of blanks
2695 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002696 if (blank_chars == 0) {
2697 for (i = 0;i < len;i++)
2698 if (!(IS_BLANK_CH(str[i]))) return(0);
2699 }
Owen Taylor3473f882001-02-23 17:55:21 +00002700
2701 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002702 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002703 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002704 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002705 if (ctxt->myDoc != NULL) {
2706 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2707 if (ret == 0) return(1);
2708 if (ret == 1) return(0);
2709 }
2710
2711 /*
2712 * Otherwise, heuristic :-\
2713 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002714 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002715 if ((ctxt->node->children == NULL) &&
2716 (RAW == '<') && (NXT(1) == '/')) return(0);
2717
2718 lastChild = xmlGetLastChild(ctxt->node);
2719 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002720 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2721 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002722 } else if (xmlNodeIsText(lastChild))
2723 return(0);
2724 else if ((ctxt->node->children != NULL) &&
2725 (xmlNodeIsText(ctxt->node->children)))
2726 return(0);
2727 return(1);
2728}
2729
Owen Taylor3473f882001-02-23 17:55:21 +00002730/************************************************************************
2731 * *
2732 * Extra stuff for namespace support *
2733 * Relates to http://www.w3.org/TR/WD-xml-names *
2734 * *
2735 ************************************************************************/
2736
2737/**
2738 * xmlSplitQName:
2739 * @ctxt: an XML parser context
2740 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002741 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002742 *
2743 * parse an UTF8 encoded XML qualified name string
2744 *
2745 * [NS 5] QName ::= (Prefix ':')? LocalPart
2746 *
2747 * [NS 6] Prefix ::= NCName
2748 *
2749 * [NS 7] LocalPart ::= NCName
2750 *
2751 * Returns the local part, and prefix is updated
2752 * to get the Prefix if any.
2753 */
2754
2755xmlChar *
2756xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2757 xmlChar buf[XML_MAX_NAMELEN + 5];
2758 xmlChar *buffer = NULL;
2759 int len = 0;
2760 int max = XML_MAX_NAMELEN;
2761 xmlChar *ret = NULL;
2762 const xmlChar *cur = name;
2763 int c;
2764
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002765 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002766 *prefix = NULL;
2767
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002768 if (cur == NULL) return(NULL);
2769
Owen Taylor3473f882001-02-23 17:55:21 +00002770#ifndef XML_XML_NAMESPACE
2771 /* xml: prefix is not really a namespace */
2772 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2773 (cur[2] == 'l') && (cur[3] == ':'))
2774 return(xmlStrdup(name));
2775#endif
2776
Daniel Veillard597bc482003-07-24 16:08:28 +00002777 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002778 if (cur[0] == ':')
2779 return(xmlStrdup(name));
2780
2781 c = *cur++;
2782 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2783 buf[len++] = c;
2784 c = *cur++;
2785 }
2786 if (len >= max) {
2787 /*
2788 * Okay someone managed to make a huge name, so he's ready to pay
2789 * for the processing speed.
2790 */
2791 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002792
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002793 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002794 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002795 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002796 return(NULL);
2797 }
2798 memcpy(buffer, buf, len);
2799 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2800 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002801 xmlChar *tmp;
2802
Owen Taylor3473f882001-02-23 17:55:21 +00002803 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002804 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002805 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002806 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002807 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002808 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002809 return(NULL);
2810 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002811 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002812 }
2813 buffer[len++] = c;
2814 c = *cur++;
2815 }
2816 buffer[len] = 0;
2817 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002818
Daniel Veillard597bc482003-07-24 16:08:28 +00002819 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002820 if (buffer != NULL)
2821 xmlFree(buffer);
2822 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002823 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002824 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002825
Owen Taylor3473f882001-02-23 17:55:21 +00002826 if (buffer == NULL)
2827 ret = xmlStrndup(buf, len);
2828 else {
2829 ret = buffer;
2830 buffer = NULL;
2831 max = XML_MAX_NAMELEN;
2832 }
2833
2834
2835 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002836 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002837 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002838 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002839 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002840 }
Owen Taylor3473f882001-02-23 17:55:21 +00002841 len = 0;
2842
Daniel Veillardbb284f42002-10-16 18:02:47 +00002843 /*
2844 * Check that the first character is proper to start
2845 * a new name
2846 */
2847 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2848 ((c >= 0x41) && (c <= 0x5A)) ||
2849 (c == '_') || (c == ':'))) {
2850 int l;
2851 int first = CUR_SCHAR(cur, l);
2852
2853 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002854 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002855 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002856 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002857 }
2858 }
2859 cur++;
2860
Owen Taylor3473f882001-02-23 17:55:21 +00002861 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2862 buf[len++] = c;
2863 c = *cur++;
2864 }
2865 if (len >= max) {
2866 /*
2867 * Okay someone managed to make a huge name, so he's ready to pay
2868 * for the processing speed.
2869 */
2870 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002871
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002872 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002873 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002874 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002875 return(NULL);
2876 }
2877 memcpy(buffer, buf, len);
2878 while (c != 0) { /* tested bigname2.xml */
2879 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002880 xmlChar *tmp;
2881
Owen Taylor3473f882001-02-23 17:55:21 +00002882 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002883 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002884 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002885 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002886 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002887 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(NULL);
2889 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002890 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002891 }
2892 buffer[len++] = c;
2893 c = *cur++;
2894 }
2895 buffer[len] = 0;
2896 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002897
Owen Taylor3473f882001-02-23 17:55:21 +00002898 if (buffer == NULL)
2899 ret = xmlStrndup(buf, len);
2900 else {
2901 ret = buffer;
2902 }
2903 }
2904
2905 return(ret);
2906}
2907
2908/************************************************************************
2909 * *
2910 * The parser itself *
2911 * Relates to http://www.w3.org/TR/REC-xml *
2912 * *
2913 ************************************************************************/
2914
Daniel Veillard34e3f642008-07-29 09:02:27 +00002915/************************************************************************
2916 * *
2917 * Routines to parse Name, NCName and NmToken *
2918 * *
2919 ************************************************************************/
2920unsigned long nbParseName = 0;
2921unsigned long nbParseNmToken = 0;
2922unsigned long nbParseNCName = 0;
2923unsigned long nbParseNCNameComplex = 0;
2924unsigned long nbParseNameComplex = 0;
2925unsigned long nbParseStringName = 0;
2926/*
2927 * The two following functions are related to the change of accepted
2928 * characters for Name and NmToken in the Revision 5 of XML-1.0
2929 * They correspond to the modified production [4] and the new production [4a]
2930 * changes in that revision. Also note that the macros used for the
2931 * productions Letter, Digit, CombiningChar and Extender are not needed
2932 * anymore.
2933 * We still keep compatibility to pre-revision5 parsing semantic if the
2934 * new XML_PARSE_OLD10 option is given to the parser.
2935 */
2936static int
2937xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2938 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2939 /*
2940 * Use the new checks of production [4] [4a] amd [5] of the
2941 * Update 5 of XML-1.0
2942 */
2943 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2944 (((c >= 'a') && (c <= 'z')) ||
2945 ((c >= 'A') && (c <= 'Z')) ||
2946 (c == '_') || (c == ':') ||
2947 ((c >= 0xC0) && (c <= 0xD6)) ||
2948 ((c >= 0xD8) && (c <= 0xF6)) ||
2949 ((c >= 0xF8) && (c <= 0x2FF)) ||
2950 ((c >= 0x370) && (c <= 0x37D)) ||
2951 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2952 ((c >= 0x200C) && (c <= 0x200D)) ||
2953 ((c >= 0x2070) && (c <= 0x218F)) ||
2954 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2955 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2956 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2957 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2958 ((c >= 0x10000) && (c <= 0xEFFFF))))
2959 return(1);
2960 } else {
2961 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2962 return(1);
2963 }
2964 return(0);
2965}
2966
2967static int
2968xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2969 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2970 /*
2971 * Use the new checks of production [4] [4a] amd [5] of the
2972 * Update 5 of XML-1.0
2973 */
2974 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2975 (((c >= 'a') && (c <= 'z')) ||
2976 ((c >= 'A') && (c <= 'Z')) ||
2977 ((c >= '0') && (c <= '9')) || /* !start */
2978 (c == '_') || (c == ':') ||
2979 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2980 ((c >= 0xC0) && (c <= 0xD6)) ||
2981 ((c >= 0xD8) && (c <= 0xF6)) ||
2982 ((c >= 0xF8) && (c <= 0x2FF)) ||
2983 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2984 ((c >= 0x370) && (c <= 0x37D)) ||
2985 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2986 ((c >= 0x200C) && (c <= 0x200D)) ||
2987 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2988 ((c >= 0x2070) && (c <= 0x218F)) ||
2989 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2990 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2991 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2992 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2993 ((c >= 0x10000) && (c <= 0xEFFFF))))
2994 return(1);
2995 } else {
2996 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2997 (c == '.') || (c == '-') ||
2998 (c == '_') || (c == ':') ||
2999 (IS_COMBINING(c)) ||
3000 (IS_EXTENDER(c)))
3001 return(1);
3002 }
3003 return(0);
3004}
3005
Daniel Veillarde57ec792003-09-10 10:50:59 +00003006static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003007 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003008
Daniel Veillard34e3f642008-07-29 09:02:27 +00003009static const xmlChar *
3010xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3011 int len = 0, l;
3012 int c;
3013 int count = 0;
3014
3015 nbParseNameComplex++;
3016
3017 /*
3018 * Handler for more complex cases
3019 */
3020 GROW;
3021 c = CUR_CHAR(l);
3022 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3023 /*
3024 * Use the new checks of production [4] [4a] amd [5] of the
3025 * Update 5 of XML-1.0
3026 */
3027 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3028 (!(((c >= 'a') && (c <= 'z')) ||
3029 ((c >= 'A') && (c <= 'Z')) ||
3030 (c == '_') || (c == ':') ||
3031 ((c >= 0xC0) && (c <= 0xD6)) ||
3032 ((c >= 0xD8) && (c <= 0xF6)) ||
3033 ((c >= 0xF8) && (c <= 0x2FF)) ||
3034 ((c >= 0x370) && (c <= 0x37D)) ||
3035 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3036 ((c >= 0x200C) && (c <= 0x200D)) ||
3037 ((c >= 0x2070) && (c <= 0x218F)) ||
3038 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3039 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3040 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3041 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3042 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3043 return(NULL);
3044 }
3045 len += l;
3046 NEXTL(l);
3047 c = CUR_CHAR(l);
3048 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3049 (((c >= 'a') && (c <= 'z')) ||
3050 ((c >= 'A') && (c <= 'Z')) ||
3051 ((c >= '0') && (c <= '9')) || /* !start */
3052 (c == '_') || (c == ':') ||
3053 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3054 ((c >= 0xC0) && (c <= 0xD6)) ||
3055 ((c >= 0xD8) && (c <= 0xF6)) ||
3056 ((c >= 0xF8) && (c <= 0x2FF)) ||
3057 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3058 ((c >= 0x370) && (c <= 0x37D)) ||
3059 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3060 ((c >= 0x200C) && (c <= 0x200D)) ||
3061 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3062 ((c >= 0x2070) && (c <= 0x218F)) ||
3063 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3064 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3065 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3066 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3067 ((c >= 0x10000) && (c <= 0xEFFFF))
3068 )) {
3069 if (count++ > 100) {
3070 count = 0;
3071 GROW;
3072 }
3073 len += l;
3074 NEXTL(l);
3075 c = CUR_CHAR(l);
3076 }
3077 } else {
3078 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3079 (!IS_LETTER(c) && (c != '_') &&
3080 (c != ':'))) {
3081 return(NULL);
3082 }
3083 len += l;
3084 NEXTL(l);
3085 c = CUR_CHAR(l);
3086
3087 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3088 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3089 (c == '.') || (c == '-') ||
3090 (c == '_') || (c == ':') ||
3091 (IS_COMBINING(c)) ||
3092 (IS_EXTENDER(c)))) {
3093 if (count++ > 100) {
3094 count = 0;
3095 GROW;
3096 }
3097 len += l;
3098 NEXTL(l);
3099 c = CUR_CHAR(l);
3100 }
3101 }
3102 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3103 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3104 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3105}
3106
Owen Taylor3473f882001-02-23 17:55:21 +00003107/**
3108 * xmlParseName:
3109 * @ctxt: an XML parser context
3110 *
3111 * parse an XML name.
3112 *
3113 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3114 * CombiningChar | Extender
3115 *
3116 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3117 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003118 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003119 *
3120 * Returns the Name parsed or NULL
3121 */
3122
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003123const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003124xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003125 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003126 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003127 int count = 0;
3128
3129 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003130
Daniel Veillard34e3f642008-07-29 09:02:27 +00003131 nbParseName++;
3132
Daniel Veillard48b2f892001-02-25 16:11:03 +00003133 /*
3134 * Accelerator for simple ASCII names
3135 */
3136 in = ctxt->input->cur;
3137 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3138 ((*in >= 0x41) && (*in <= 0x5A)) ||
3139 (*in == '_') || (*in == ':')) {
3140 in++;
3141 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3142 ((*in >= 0x41) && (*in <= 0x5A)) ||
3143 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003144 (*in == '_') || (*in == '-') ||
3145 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003146 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003147 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003148 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003149 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003150 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003151 ctxt->nbChars += count;
3152 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003153 if (ret == NULL)
3154 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003155 return(ret);
3156 }
3157 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003158 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003159 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003160}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003161
Daniel Veillard34e3f642008-07-29 09:02:27 +00003162static const xmlChar *
3163xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3164 int len = 0, l;
3165 int c;
3166 int count = 0;
3167
3168 nbParseNCNameComplex++;
3169
3170 /*
3171 * Handler for more complex cases
3172 */
3173 GROW;
3174 c = CUR_CHAR(l);
3175 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3176 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3177 return(NULL);
3178 }
3179
3180 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3181 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3182 if (count++ > 100) {
3183 count = 0;
3184 GROW;
3185 }
3186 len += l;
3187 NEXTL(l);
3188 c = CUR_CHAR(l);
3189 }
3190 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3191}
3192
3193/**
3194 * xmlParseNCName:
3195 * @ctxt: an XML parser context
3196 * @len: lenght of the string parsed
3197 *
3198 * parse an XML name.
3199 *
3200 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3201 * CombiningChar | Extender
3202 *
3203 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3204 *
3205 * Returns the Name parsed or NULL
3206 */
3207
3208static const xmlChar *
3209xmlParseNCName(xmlParserCtxtPtr ctxt) {
3210 const xmlChar *in;
3211 const xmlChar *ret;
3212 int count = 0;
3213
3214 nbParseNCName++;
3215
3216 /*
3217 * Accelerator for simple ASCII names
3218 */
3219 in = ctxt->input->cur;
3220 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3221 ((*in >= 0x41) && (*in <= 0x5A)) ||
3222 (*in == '_')) {
3223 in++;
3224 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3225 ((*in >= 0x41) && (*in <= 0x5A)) ||
3226 ((*in >= 0x30) && (*in <= 0x39)) ||
3227 (*in == '_') || (*in == '-') ||
3228 (*in == '.'))
3229 in++;
3230 if ((*in > 0) && (*in < 0x80)) {
3231 count = in - ctxt->input->cur;
3232 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3233 ctxt->input->cur = in;
3234 ctxt->nbChars += count;
3235 ctxt->input->col += count;
3236 if (ret == NULL) {
3237 xmlErrMemory(ctxt, NULL);
3238 }
3239 return(ret);
3240 }
3241 }
3242 return(xmlParseNCNameComplex(ctxt));
3243}
3244
Daniel Veillard46de64e2002-05-29 08:21:33 +00003245/**
3246 * xmlParseNameAndCompare:
3247 * @ctxt: an XML parser context
3248 *
3249 * parse an XML name and compares for match
3250 * (specialized for endtag parsing)
3251 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003252 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3253 * and the name for mismatch
3254 */
3255
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003256static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003257xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003258 register const xmlChar *cmp = other;
3259 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003260 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003261
3262 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003263
Daniel Veillard46de64e2002-05-29 08:21:33 +00003264 in = ctxt->input->cur;
3265 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003266 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003267 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003268 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003269 }
William M. Brack76e95df2003-10-18 16:20:14 +00003270 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003271 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003272 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003273 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003274 }
3275 /* failure (or end of input buffer), check with full function */
3276 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003277 /* strings coming from the dictionnary direct compare possible */
3278 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003279 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003280 }
3281 return ret;
3282}
3283
Owen Taylor3473f882001-02-23 17:55:21 +00003284/**
3285 * xmlParseStringName:
3286 * @ctxt: an XML parser context
3287 * @str: a pointer to the string pointer (IN/OUT)
3288 *
3289 * parse an XML name.
3290 *
3291 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3292 * CombiningChar | Extender
3293 *
3294 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3295 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003296 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003297 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003298 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003299 * is updated to the current location in the string.
3300 */
3301
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003302static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003303xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3304 xmlChar buf[XML_MAX_NAMELEN + 5];
3305 const xmlChar *cur = *str;
3306 int len = 0, l;
3307 int c;
3308
Daniel Veillard34e3f642008-07-29 09:02:27 +00003309 nbParseStringName++;
3310
Owen Taylor3473f882001-02-23 17:55:21 +00003311 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003312 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003313 return(NULL);
3314 }
3315
Daniel Veillard34e3f642008-07-29 09:02:27 +00003316 COPY_BUF(l,buf,len,c);
3317 cur += l;
3318 c = CUR_SCHAR(cur, l);
3319 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003320 COPY_BUF(l,buf,len,c);
3321 cur += l;
3322 c = CUR_SCHAR(cur, l);
3323 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3324 /*
3325 * Okay someone managed to make a huge name, so he's ready to pay
3326 * for the processing speed.
3327 */
3328 xmlChar *buffer;
3329 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003330
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003331 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003332 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003333 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003334 return(NULL);
3335 }
3336 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003337 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003338 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003339 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003340 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003341 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003342 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003343 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003344 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003345 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003346 return(NULL);
3347 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003348 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003349 }
3350 COPY_BUF(l,buffer,len,c);
3351 cur += l;
3352 c = CUR_SCHAR(cur, l);
3353 }
3354 buffer[len] = 0;
3355 *str = cur;
3356 return(buffer);
3357 }
3358 }
3359 *str = cur;
3360 return(xmlStrndup(buf, len));
3361}
3362
3363/**
3364 * xmlParseNmtoken:
3365 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003366 *
Owen Taylor3473f882001-02-23 17:55:21 +00003367 * parse an XML Nmtoken.
3368 *
3369 * [7] Nmtoken ::= (NameChar)+
3370 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003371 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003372 *
3373 * Returns the Nmtoken parsed or NULL
3374 */
3375
3376xmlChar *
3377xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3378 xmlChar buf[XML_MAX_NAMELEN + 5];
3379 int len = 0, l;
3380 int c;
3381 int count = 0;
3382
Daniel Veillard34e3f642008-07-29 09:02:27 +00003383 nbParseNmToken++;
3384
Owen Taylor3473f882001-02-23 17:55:21 +00003385 GROW;
3386 c = CUR_CHAR(l);
3387
Daniel Veillard34e3f642008-07-29 09:02:27 +00003388 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003389 if (count++ > 100) {
3390 count = 0;
3391 GROW;
3392 }
3393 COPY_BUF(l,buf,len,c);
3394 NEXTL(l);
3395 c = CUR_CHAR(l);
3396 if (len >= XML_MAX_NAMELEN) {
3397 /*
3398 * Okay someone managed to make a huge token, so he's ready to pay
3399 * for the processing speed.
3400 */
3401 xmlChar *buffer;
3402 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003403
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003404 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003405 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003406 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003407 return(NULL);
3408 }
3409 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003410 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003411 if (count++ > 100) {
3412 count = 0;
3413 GROW;
3414 }
3415 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003416 xmlChar *tmp;
3417
Owen Taylor3473f882001-02-23 17:55:21 +00003418 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003419 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003420 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003421 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003422 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003423 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003424 return(NULL);
3425 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003426 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003427 }
3428 COPY_BUF(l,buffer,len,c);
3429 NEXTL(l);
3430 c = CUR_CHAR(l);
3431 }
3432 buffer[len] = 0;
3433 return(buffer);
3434 }
3435 }
3436 if (len == 0)
3437 return(NULL);
3438 return(xmlStrndup(buf, len));
3439}
3440
3441/**
3442 * xmlParseEntityValue:
3443 * @ctxt: an XML parser context
3444 * @orig: if non-NULL store a copy of the original entity value
3445 *
3446 * parse a value for ENTITY declarations
3447 *
3448 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3449 * "'" ([^%&'] | PEReference | Reference)* "'"
3450 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003451 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003452 */
3453
3454xmlChar *
3455xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3456 xmlChar *buf = NULL;
3457 int len = 0;
3458 int size = XML_PARSER_BUFFER_SIZE;
3459 int c, l;
3460 xmlChar stop;
3461 xmlChar *ret = NULL;
3462 const xmlChar *cur = NULL;
3463 xmlParserInputPtr input;
3464
3465 if (RAW == '"') stop = '"';
3466 else if (RAW == '\'') stop = '\'';
3467 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003468 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003469 return(NULL);
3470 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003471 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003472 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003473 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003474 return(NULL);
3475 }
3476
3477 /*
3478 * The content of the entity definition is copied in a buffer.
3479 */
3480
3481 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3482 input = ctxt->input;
3483 GROW;
3484 NEXT;
3485 c = CUR_CHAR(l);
3486 /*
3487 * NOTE: 4.4.5 Included in Literal
3488 * When a parameter entity reference appears in a literal entity
3489 * value, ... a single or double quote character in the replacement
3490 * text is always treated as a normal data character and will not
3491 * terminate the literal.
3492 * In practice it means we stop the loop only when back at parsing
3493 * the initial entity and the quote is found
3494 */
William M. Brack871611b2003-10-18 04:53:14 +00003495 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003496 (ctxt->input != input))) {
3497 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003498 xmlChar *tmp;
3499
Owen Taylor3473f882001-02-23 17:55:21 +00003500 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003501 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3502 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003503 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003504 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003505 return(NULL);
3506 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003507 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003508 }
3509 COPY_BUF(l,buf,len,c);
3510 NEXTL(l);
3511 /*
3512 * Pop-up of finished entities.
3513 */
3514 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3515 xmlPopInput(ctxt);
3516
3517 GROW;
3518 c = CUR_CHAR(l);
3519 if (c == 0) {
3520 GROW;
3521 c = CUR_CHAR(l);
3522 }
3523 }
3524 buf[len] = 0;
3525
3526 /*
3527 * Raise problem w.r.t. '&' and '%' being used in non-entities
3528 * reference constructs. Note Charref will be handled in
3529 * xmlStringDecodeEntities()
3530 */
3531 cur = buf;
3532 while (*cur != 0) { /* non input consuming */
3533 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3534 xmlChar *name;
3535 xmlChar tmp = *cur;
3536
3537 cur++;
3538 name = xmlParseStringName(ctxt, &cur);
3539 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003540 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003541 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003542 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003543 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003544 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3545 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003546 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003547 }
3548 if (name != NULL)
3549 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003550 if (*cur == 0)
3551 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003552 }
3553 cur++;
3554 }
3555
3556 /*
3557 * Then PEReference entities are substituted.
3558 */
3559 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003560 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003561 xmlFree(buf);
3562 } else {
3563 NEXT;
3564 /*
3565 * NOTE: 4.4.7 Bypassed
3566 * When a general entity reference appears in the EntityValue in
3567 * an entity declaration, it is bypassed and left as is.
3568 * so XML_SUBSTITUTE_REF is not set here.
3569 */
3570 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3571 0, 0, 0);
3572 if (orig != NULL)
3573 *orig = buf;
3574 else
3575 xmlFree(buf);
3576 }
3577
3578 return(ret);
3579}
3580
3581/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003582 * xmlParseAttValueComplex:
3583 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003584 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003585 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003586 *
3587 * parse a value for an attribute, this is the fallback function
3588 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003589 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003590 *
3591 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3592 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003593static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003594xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003595 xmlChar limit = 0;
3596 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003597 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003598 int len = 0;
3599 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003600 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003601 xmlChar *current = NULL;
3602 xmlEntityPtr ent;
3603
Owen Taylor3473f882001-02-23 17:55:21 +00003604 if (NXT(0) == '"') {
3605 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3606 limit = '"';
3607 NEXT;
3608 } else if (NXT(0) == '\'') {
3609 limit = '\'';
3610 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3611 NEXT;
3612 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003613 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003614 return(NULL);
3615 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003616
Owen Taylor3473f882001-02-23 17:55:21 +00003617 /*
3618 * allocate a translation buffer.
3619 */
3620 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003621 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003622 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003623
3624 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003625 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003626 */
3627 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003628 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003629 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003630 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003631 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003632 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003633 if (NXT(1) == '#') {
3634 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003635
Owen Taylor3473f882001-02-23 17:55:21 +00003636 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003637 if (ctxt->replaceEntities) {
3638 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003639 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003640 }
3641 buf[len++] = '&';
3642 } else {
3643 /*
3644 * The reparsing will be done in xmlStringGetNodeList()
3645 * called by the attribute() function in SAX.c
3646 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003647 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003648 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003649 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003650 buf[len++] = '&';
3651 buf[len++] = '#';
3652 buf[len++] = '3';
3653 buf[len++] = '8';
3654 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003655 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003656 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003657 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003658 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003659 }
Owen Taylor3473f882001-02-23 17:55:21 +00003660 len += xmlCopyChar(0, &buf[len], val);
3661 }
3662 } else {
3663 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003664 ctxt->nbentities++;
3665 if (ent != NULL)
3666 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003667 if ((ent != NULL) &&
3668 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3669 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003670 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003671 }
3672 if ((ctxt->replaceEntities == 0) &&
3673 (ent->content[0] == '&')) {
3674 buf[len++] = '&';
3675 buf[len++] = '#';
3676 buf[len++] = '3';
3677 buf[len++] = '8';
3678 buf[len++] = ';';
3679 } else {
3680 buf[len++] = ent->content[0];
3681 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003682 } else if ((ent != NULL) &&
3683 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003684 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3685 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003686 XML_SUBSTITUTE_REF,
3687 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003688 if (rep != NULL) {
3689 current = rep;
3690 while (*current != 0) { /* non input consuming */
3691 buf[len++] = *current++;
3692 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003693 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003694 }
3695 }
3696 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003697 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003698 }
3699 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003700 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003701 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003702 }
Owen Taylor3473f882001-02-23 17:55:21 +00003703 if (ent->content != NULL)
3704 buf[len++] = ent->content[0];
3705 }
3706 } else if (ent != NULL) {
3707 int i = xmlStrlen(ent->name);
3708 const xmlChar *cur = ent->name;
3709
3710 /*
3711 * This may look absurd but is needed to detect
3712 * entities problems
3713 */
3714 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3715 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003716 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003717 XML_SUBSTITUTE_REF, 0, 0, 0);
3718 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003719 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003720 rep = NULL;
3721 }
Owen Taylor3473f882001-02-23 17:55:21 +00003722 }
3723
3724 /*
3725 * Just output the reference
3726 */
3727 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003728 while (len > buf_size - i - 10) {
3729 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003730 }
3731 for (;i > 0;i--)
3732 buf[len++] = *cur++;
3733 buf[len++] = ';';
3734 }
3735 }
3736 } else {
3737 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003738 if ((len != 0) || (!normalize)) {
3739 if ((!normalize) || (!in_space)) {
3740 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003741 while (len > buf_size - 10) {
3742 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003743 }
3744 }
3745 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003746 }
3747 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003748 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003749 COPY_BUF(l,buf,len,c);
3750 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003751 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003752 }
3753 }
3754 NEXTL(l);
3755 }
3756 GROW;
3757 c = CUR_CHAR(l);
3758 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003759 if ((in_space) && (normalize)) {
3760 while (buf[len - 1] == 0x20) len--;
3761 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003762 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003763 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003764 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003765 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003766 if ((c != 0) && (!IS_CHAR(c))) {
3767 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3768 "invalid character in attribute value\n");
3769 } else {
3770 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3771 "AttValue: ' expected\n");
3772 }
Owen Taylor3473f882001-02-23 17:55:21 +00003773 } else
3774 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003775 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003776 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003777
3778mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003779 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003780 if (buf != NULL)
3781 xmlFree(buf);
3782 if (rep != NULL)
3783 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003784 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003785}
3786
3787/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003788 * xmlParseAttValue:
3789 * @ctxt: an XML parser context
3790 *
3791 * parse a value for an attribute
3792 * Note: the parser won't do substitution of entities here, this
3793 * will be handled later in xmlStringGetNodeList
3794 *
3795 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3796 * "'" ([^<&'] | Reference)* "'"
3797 *
3798 * 3.3.3 Attribute-Value Normalization:
3799 * Before the value of an attribute is passed to the application or
3800 * checked for validity, the XML processor must normalize it as follows:
3801 * - a character reference is processed by appending the referenced
3802 * character to the attribute value
3803 * - an entity reference is processed by recursively processing the
3804 * replacement text of the entity
3805 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3806 * appending #x20 to the normalized value, except that only a single
3807 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3808 * parsed entity or the literal entity value of an internal parsed entity
3809 * - other characters are processed by appending them to the normalized value
3810 * If the declared value is not CDATA, then the XML processor must further
3811 * process the normalized attribute value by discarding any leading and
3812 * trailing space (#x20) characters, and by replacing sequences of space
3813 * (#x20) characters by a single space (#x20) character.
3814 * All attributes for which no declaration has been read should be treated
3815 * by a non-validating parser as if declared CDATA.
3816 *
3817 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3818 */
3819
3820
3821xmlChar *
3822xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003823 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003824 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003825}
3826
3827/**
Owen Taylor3473f882001-02-23 17:55:21 +00003828 * xmlParseSystemLiteral:
3829 * @ctxt: an XML parser context
3830 *
3831 * parse an XML Literal
3832 *
3833 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3834 *
3835 * Returns the SystemLiteral parsed or NULL
3836 */
3837
3838xmlChar *
3839xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3840 xmlChar *buf = NULL;
3841 int len = 0;
3842 int size = XML_PARSER_BUFFER_SIZE;
3843 int cur, l;
3844 xmlChar stop;
3845 int state = ctxt->instate;
3846 int count = 0;
3847
3848 SHRINK;
3849 if (RAW == '"') {
3850 NEXT;
3851 stop = '"';
3852 } else if (RAW == '\'') {
3853 NEXT;
3854 stop = '\'';
3855 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003856 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003857 return(NULL);
3858 }
3859
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003860 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003861 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003862 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003863 return(NULL);
3864 }
3865 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3866 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003867 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003868 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003869 xmlChar *tmp;
3870
Owen Taylor3473f882001-02-23 17:55:21 +00003871 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003872 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3873 if (tmp == NULL) {
3874 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003875 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003876 ctxt->instate = (xmlParserInputState) state;
3877 return(NULL);
3878 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003879 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003880 }
3881 count++;
3882 if (count > 50) {
3883 GROW;
3884 count = 0;
3885 }
3886 COPY_BUF(l,buf,len,cur);
3887 NEXTL(l);
3888 cur = CUR_CHAR(l);
3889 if (cur == 0) {
3890 GROW;
3891 SHRINK;
3892 cur = CUR_CHAR(l);
3893 }
3894 }
3895 buf[len] = 0;
3896 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003897 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003898 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003899 } else {
3900 NEXT;
3901 }
3902 return(buf);
3903}
3904
3905/**
3906 * xmlParsePubidLiteral:
3907 * @ctxt: an XML parser context
3908 *
3909 * parse an XML public literal
3910 *
3911 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3912 *
3913 * Returns the PubidLiteral parsed or NULL.
3914 */
3915
3916xmlChar *
3917xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3918 xmlChar *buf = NULL;
3919 int len = 0;
3920 int size = XML_PARSER_BUFFER_SIZE;
3921 xmlChar cur;
3922 xmlChar stop;
3923 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003924 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003925
3926 SHRINK;
3927 if (RAW == '"') {
3928 NEXT;
3929 stop = '"';
3930 } else if (RAW == '\'') {
3931 NEXT;
3932 stop = '\'';
3933 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003934 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003935 return(NULL);
3936 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003937 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003938 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003939 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003940 return(NULL);
3941 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003942 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003943 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003944 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003945 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003946 xmlChar *tmp;
3947
Owen Taylor3473f882001-02-23 17:55:21 +00003948 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003949 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3950 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003951 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003952 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003953 return(NULL);
3954 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003955 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003956 }
3957 buf[len++] = cur;
3958 count++;
3959 if (count > 50) {
3960 GROW;
3961 count = 0;
3962 }
3963 NEXT;
3964 cur = CUR;
3965 if (cur == 0) {
3966 GROW;
3967 SHRINK;
3968 cur = CUR;
3969 }
3970 }
3971 buf[len] = 0;
3972 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003973 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003974 } else {
3975 NEXT;
3976 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003977 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003978 return(buf);
3979}
3980
Daniel Veillard48b2f892001-02-25 16:11:03 +00003981void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003982
3983/*
3984 * used for the test in the inner loop of the char data testing
3985 */
3986static const unsigned char test_char_data[256] = {
3987 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3988 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3989 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3990 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3991 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3992 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3993 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3994 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3995 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3996 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3997 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3998 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3999 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4000 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4001 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4002 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4003 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4004 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4005 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4006 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4007 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4008 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4009 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4010 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4011 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4012 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4013 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4014 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4015 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4016 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4017 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4018 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4019};
4020
Owen Taylor3473f882001-02-23 17:55:21 +00004021/**
4022 * xmlParseCharData:
4023 * @ctxt: an XML parser context
4024 * @cdata: int indicating whether we are within a CDATA section
4025 *
4026 * parse a CharData section.
4027 * if we are within a CDATA section ']]>' marks an end of section.
4028 *
4029 * The right angle bracket (>) may be represented using the string "&gt;",
4030 * and must, for compatibility, be escaped using "&gt;" or a character
4031 * reference when it appears in the string "]]>" in content, when that
4032 * string is not marking the end of a CDATA section.
4033 *
4034 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4035 */
4036
4037void
4038xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004039 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004040 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004041 int line = ctxt->input->line;
4042 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004043 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004044
4045 SHRINK;
4046 GROW;
4047 /*
4048 * Accelerated common case where input don't need to be
4049 * modified before passing it to the handler.
4050 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004051 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004052 in = ctxt->input->cur;
4053 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004054get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004055 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004056 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004057 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004058 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004059 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004060 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004061 goto get_more_space;
4062 }
4063 if (*in == '<') {
4064 nbchar = in - ctxt->input->cur;
4065 if (nbchar > 0) {
4066 const xmlChar *tmp = ctxt->input->cur;
4067 ctxt->input->cur = in;
4068
Daniel Veillard34099b42004-11-04 17:34:35 +00004069 if ((ctxt->sax != NULL) &&
4070 (ctxt->sax->ignorableWhitespace !=
4071 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004072 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004073 if (ctxt->sax->ignorableWhitespace != NULL)
4074 ctxt->sax->ignorableWhitespace(ctxt->userData,
4075 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004076 } else {
4077 if (ctxt->sax->characters != NULL)
4078 ctxt->sax->characters(ctxt->userData,
4079 tmp, nbchar);
4080 if (*ctxt->space == -1)
4081 *ctxt->space = -2;
4082 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004083 } else if ((ctxt->sax != NULL) &&
4084 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004085 ctxt->sax->characters(ctxt->userData,
4086 tmp, nbchar);
4087 }
4088 }
4089 return;
4090 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004091
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004092get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004093 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004094 while (test_char_data[*in]) {
4095 in++;
4096 ccol++;
4097 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004098 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004099 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004100 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004101 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004102 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004103 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004104 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004105 }
4106 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004107 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004108 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004109 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004110 return;
4111 }
4112 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004113 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004114 goto get_more;
4115 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004116 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004117 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004118 if ((ctxt->sax != NULL) &&
4119 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004120 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004121 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004122 const xmlChar *tmp = ctxt->input->cur;
4123 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004124
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004125 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004126 if (ctxt->sax->ignorableWhitespace != NULL)
4127 ctxt->sax->ignorableWhitespace(ctxt->userData,
4128 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004129 } else {
4130 if (ctxt->sax->characters != NULL)
4131 ctxt->sax->characters(ctxt->userData,
4132 tmp, nbchar);
4133 if (*ctxt->space == -1)
4134 *ctxt->space = -2;
4135 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004136 line = ctxt->input->line;
4137 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004138 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004139 if (ctxt->sax->characters != NULL)
4140 ctxt->sax->characters(ctxt->userData,
4141 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004142 line = ctxt->input->line;
4143 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004144 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004145 }
4146 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004147 if (*in == 0xD) {
4148 in++;
4149 if (*in == 0xA) {
4150 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004151 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004152 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004153 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004154 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004155 in--;
4156 }
4157 if (*in == '<') {
4158 return;
4159 }
4160 if (*in == '&') {
4161 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004162 }
4163 SHRINK;
4164 GROW;
4165 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004166 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004167 nbchar = 0;
4168 }
Daniel Veillard50582112001-03-26 22:52:16 +00004169 ctxt->input->line = line;
4170 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004171 xmlParseCharDataComplex(ctxt, cdata);
4172}
4173
Daniel Veillard01c13b52002-12-10 15:19:08 +00004174/**
4175 * xmlParseCharDataComplex:
4176 * @ctxt: an XML parser context
4177 * @cdata: int indicating whether we are within a CDATA section
4178 *
4179 * parse a CharData section.this is the fallback function
4180 * of xmlParseCharData() when the parsing requires handling
4181 * of non-ASCII characters.
4182 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004183void
4184xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004185 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4186 int nbchar = 0;
4187 int cur, l;
4188 int count = 0;
4189
4190 SHRINK;
4191 GROW;
4192 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004193 while ((cur != '<') && /* checked */
4194 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004195 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004196 if ((cur == ']') && (NXT(1) == ']') &&
4197 (NXT(2) == '>')) {
4198 if (cdata) break;
4199 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004200 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004201 }
4202 }
4203 COPY_BUF(l,buf,nbchar,cur);
4204 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004205 buf[nbchar] = 0;
4206
Owen Taylor3473f882001-02-23 17:55:21 +00004207 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004208 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004209 */
4210 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004211 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004212 if (ctxt->sax->ignorableWhitespace != NULL)
4213 ctxt->sax->ignorableWhitespace(ctxt->userData,
4214 buf, nbchar);
4215 } else {
4216 if (ctxt->sax->characters != NULL)
4217 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004218 if ((ctxt->sax->characters !=
4219 ctxt->sax->ignorableWhitespace) &&
4220 (*ctxt->space == -1))
4221 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004222 }
4223 }
4224 nbchar = 0;
4225 }
4226 count++;
4227 if (count > 50) {
4228 GROW;
4229 count = 0;
4230 }
4231 NEXTL(l);
4232 cur = CUR_CHAR(l);
4233 }
4234 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004235 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004236 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004237 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004238 */
4239 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004240 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004241 if (ctxt->sax->ignorableWhitespace != NULL)
4242 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4243 } else {
4244 if (ctxt->sax->characters != NULL)
4245 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004246 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4247 (*ctxt->space == -1))
4248 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004249 }
4250 }
4251 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004252 if ((cur != 0) && (!IS_CHAR(cur))) {
4253 /* Generate the error and skip the offending character */
4254 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4255 "PCDATA invalid Char value %d\n",
4256 cur);
4257 NEXTL(l);
4258 }
Owen Taylor3473f882001-02-23 17:55:21 +00004259}
4260
4261/**
4262 * xmlParseExternalID:
4263 * @ctxt: an XML parser context
4264 * @publicID: a xmlChar** receiving PubidLiteral
4265 * @strict: indicate whether we should restrict parsing to only
4266 * production [75], see NOTE below
4267 *
4268 * Parse an External ID or a Public ID
4269 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004270 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004271 * 'PUBLIC' S PubidLiteral S SystemLiteral
4272 *
4273 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4274 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4275 *
4276 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4277 *
4278 * Returns the function returns SystemLiteral and in the second
4279 * case publicID receives PubidLiteral, is strict is off
4280 * it is possible to return NULL and have publicID set.
4281 */
4282
4283xmlChar *
4284xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4285 xmlChar *URI = NULL;
4286
4287 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004288
4289 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004290 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004291 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004292 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004293 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4294 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004295 }
4296 SKIP_BLANKS;
4297 URI = xmlParseSystemLiteral(ctxt);
4298 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004299 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004300 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004301 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004302 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004303 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004304 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004305 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004306 }
4307 SKIP_BLANKS;
4308 *publicID = xmlParsePubidLiteral(ctxt);
4309 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004310 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004311 }
4312 if (strict) {
4313 /*
4314 * We don't handle [83] so "S SystemLiteral" is required.
4315 */
William M. Brack76e95df2003-10-18 16:20:14 +00004316 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004317 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004318 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004319 }
4320 } else {
4321 /*
4322 * We handle [83] so we return immediately, if
4323 * "S SystemLiteral" is not detected. From a purely parsing
4324 * point of view that's a nice mess.
4325 */
4326 const xmlChar *ptr;
4327 GROW;
4328
4329 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004330 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004331
William M. Brack76e95df2003-10-18 16:20:14 +00004332 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004333 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4334 }
4335 SKIP_BLANKS;
4336 URI = xmlParseSystemLiteral(ctxt);
4337 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004338 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004339 }
4340 }
4341 return(URI);
4342}
4343
4344/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004345 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004346 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004347 * @buf: the already parsed part of the buffer
4348 * @len: number of bytes filles in the buffer
4349 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004350 *
4351 * Skip an XML (SGML) comment <!-- .... -->
4352 * The spec says that "For compatibility, the string "--" (double-hyphen)
4353 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004354 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004355 *
4356 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4357 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004358static void
4359xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004360 int q, ql;
4361 int r, rl;
4362 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004364 int inputid;
4365
4366 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004367
Owen Taylor3473f882001-02-23 17:55:21 +00004368 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004369 len = 0;
4370 size = XML_PARSER_BUFFER_SIZE;
4371 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4372 if (buf == NULL) {
4373 xmlErrMemory(ctxt, NULL);
4374 return;
4375 }
Owen Taylor3473f882001-02-23 17:55:21 +00004376 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004377 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004378 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004379 if (q == 0)
4380 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004381 if (!IS_CHAR(q)) {
4382 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4383 "xmlParseComment: invalid xmlChar value %d\n",
4384 q);
4385 xmlFree (buf);
4386 return;
4387 }
Owen Taylor3473f882001-02-23 17:55:21 +00004388 NEXTL(ql);
4389 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004390 if (r == 0)
4391 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004392 if (!IS_CHAR(r)) {
4393 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4394 "xmlParseComment: invalid xmlChar value %d\n",
4395 q);
4396 xmlFree (buf);
4397 return;
4398 }
Owen Taylor3473f882001-02-23 17:55:21 +00004399 NEXTL(rl);
4400 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004401 if (cur == 0)
4402 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004403 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004404 ((cur != '>') ||
4405 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004406 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004407 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004408 }
4409 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004410 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004411 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004412 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4413 if (new_buf == NULL) {
4414 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004415 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004416 return;
4417 }
William M. Bracka3215c72004-07-31 16:24:01 +00004418 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004419 }
4420 COPY_BUF(ql,buf,len,q);
4421 q = r;
4422 ql = rl;
4423 r = cur;
4424 rl = l;
4425
4426 count++;
4427 if (count > 50) {
4428 GROW;
4429 count = 0;
4430 }
4431 NEXTL(l);
4432 cur = CUR_CHAR(l);
4433 if (cur == 0) {
4434 SHRINK;
4435 GROW;
4436 cur = CUR_CHAR(l);
4437 }
4438 }
4439 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004440 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004441 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004442 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004443 } else if (!IS_CHAR(cur)) {
4444 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4445 "xmlParseComment: invalid xmlChar value %d\n",
4446 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004447 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004448 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004449 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4450 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004451 }
4452 NEXT;
4453 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4454 (!ctxt->disableSAX))
4455 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004456 }
Daniel Veillardda629342007-08-01 07:49:06 +00004457 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004458 return;
4459not_terminated:
4460 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4461 "Comment not terminated\n", NULL);
4462 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004463 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004464}
Daniel Veillardda629342007-08-01 07:49:06 +00004465
Daniel Veillard4c778d82005-01-23 17:37:44 +00004466/**
4467 * xmlParseComment:
4468 * @ctxt: an XML parser context
4469 *
4470 * Skip an XML (SGML) comment <!-- .... -->
4471 * The spec says that "For compatibility, the string "--" (double-hyphen)
4472 * must not occur within comments. "
4473 *
4474 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4475 */
4476void
4477xmlParseComment(xmlParserCtxtPtr ctxt) {
4478 xmlChar *buf = NULL;
4479 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004480 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004481 xmlParserInputState state;
4482 const xmlChar *in;
4483 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004484 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004485
4486 /*
4487 * Check that there is a comment right here.
4488 */
4489 if ((RAW != '<') || (NXT(1) != '!') ||
4490 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004491 state = ctxt->instate;
4492 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004493 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004494 SKIP(4);
4495 SHRINK;
4496 GROW;
4497
4498 /*
4499 * Accelerated common case where input don't need to be
4500 * modified before passing it to the handler.
4501 */
4502 in = ctxt->input->cur;
4503 do {
4504 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004505 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004506 ctxt->input->line++; ctxt->input->col = 1;
4507 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004508 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004509 }
4510get_more:
4511 ccol = ctxt->input->col;
4512 while (((*in > '-') && (*in <= 0x7F)) ||
4513 ((*in >= 0x20) && (*in < '-')) ||
4514 (*in == 0x09)) {
4515 in++;
4516 ccol++;
4517 }
4518 ctxt->input->col = ccol;
4519 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004520 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004521 ctxt->input->line++; ctxt->input->col = 1;
4522 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004523 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004524 goto get_more;
4525 }
4526 nbchar = in - ctxt->input->cur;
4527 /*
4528 * save current set of data
4529 */
4530 if (nbchar > 0) {
4531 if ((ctxt->sax != NULL) &&
4532 (ctxt->sax->comment != NULL)) {
4533 if (buf == NULL) {
4534 if ((*in == '-') && (in[1] == '-'))
4535 size = nbchar + 1;
4536 else
4537 size = XML_PARSER_BUFFER_SIZE + nbchar;
4538 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4539 if (buf == NULL) {
4540 xmlErrMemory(ctxt, NULL);
4541 ctxt->instate = state;
4542 return;
4543 }
4544 len = 0;
4545 } else if (len + nbchar + 1 >= size) {
4546 xmlChar *new_buf;
4547 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4548 new_buf = (xmlChar *) xmlRealloc(buf,
4549 size * sizeof(xmlChar));
4550 if (new_buf == NULL) {
4551 xmlFree (buf);
4552 xmlErrMemory(ctxt, NULL);
4553 ctxt->instate = state;
4554 return;
4555 }
4556 buf = new_buf;
4557 }
4558 memcpy(&buf[len], ctxt->input->cur, nbchar);
4559 len += nbchar;
4560 buf[len] = 0;
4561 }
4562 }
4563 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004564 if (*in == 0xA) {
4565 in++;
4566 ctxt->input->line++; ctxt->input->col = 1;
4567 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004568 if (*in == 0xD) {
4569 in++;
4570 if (*in == 0xA) {
4571 ctxt->input->cur = in;
4572 in++;
4573 ctxt->input->line++; ctxt->input->col = 1;
4574 continue; /* while */
4575 }
4576 in--;
4577 }
4578 SHRINK;
4579 GROW;
4580 in = ctxt->input->cur;
4581 if (*in == '-') {
4582 if (in[1] == '-') {
4583 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004584 if (ctxt->input->id != inputid) {
4585 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4586 "comment doesn't start and stop in the same entity\n");
4587 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004588 SKIP(3);
4589 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4590 (!ctxt->disableSAX)) {
4591 if (buf != NULL)
4592 ctxt->sax->comment(ctxt->userData, buf);
4593 else
4594 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4595 }
4596 if (buf != NULL)
4597 xmlFree(buf);
4598 ctxt->instate = state;
4599 return;
4600 }
4601 if (buf != NULL)
4602 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4603 "Comment not terminated \n<!--%.50s\n",
4604 buf);
4605 else
4606 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4607 "Comment not terminated \n", NULL);
4608 in++;
4609 ctxt->input->col++;
4610 }
4611 in++;
4612 ctxt->input->col++;
4613 goto get_more;
4614 }
4615 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4616 xmlParseCommentComplex(ctxt, buf, len, size);
4617 ctxt->instate = state;
4618 return;
4619}
4620
Owen Taylor3473f882001-02-23 17:55:21 +00004621
4622/**
4623 * xmlParsePITarget:
4624 * @ctxt: an XML parser context
4625 *
4626 * parse the name of a PI
4627 *
4628 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4629 *
4630 * Returns the PITarget name or NULL
4631 */
4632
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004633const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004634xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004635 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004636
4637 name = xmlParseName(ctxt);
4638 if ((name != NULL) &&
4639 ((name[0] == 'x') || (name[0] == 'X')) &&
4640 ((name[1] == 'm') || (name[1] == 'M')) &&
4641 ((name[2] == 'l') || (name[2] == 'L'))) {
4642 int i;
4643 if ((name[0] == 'x') && (name[1] == 'm') &&
4644 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004645 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004646 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004647 return(name);
4648 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004649 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004650 return(name);
4651 }
4652 for (i = 0;;i++) {
4653 if (xmlW3CPIs[i] == NULL) break;
4654 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4655 return(name);
4656 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004657 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4658 "xmlParsePITarget: invalid name prefix 'xml'\n",
4659 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004660 }
Daniel Veillard37334572008-07-31 08:20:02 +00004661 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4662 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4663 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4664 }
Owen Taylor3473f882001-02-23 17:55:21 +00004665 return(name);
4666}
4667
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004668#ifdef LIBXML_CATALOG_ENABLED
4669/**
4670 * xmlParseCatalogPI:
4671 * @ctxt: an XML parser context
4672 * @catalog: the PI value string
4673 *
4674 * parse an XML Catalog Processing Instruction.
4675 *
4676 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4677 *
4678 * Occurs only if allowed by the user and if happening in the Misc
4679 * part of the document before any doctype informations
4680 * This will add the given catalog to the parsing context in order
4681 * to be used if there is a resolution need further down in the document
4682 */
4683
4684static void
4685xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4686 xmlChar *URL = NULL;
4687 const xmlChar *tmp, *base;
4688 xmlChar marker;
4689
4690 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004691 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004692 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4693 goto error;
4694 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004695 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004696 if (*tmp != '=') {
4697 return;
4698 }
4699 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004700 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004701 marker = *tmp;
4702 if ((marker != '\'') && (marker != '"'))
4703 goto error;
4704 tmp++;
4705 base = tmp;
4706 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4707 if (*tmp == 0)
4708 goto error;
4709 URL = xmlStrndup(base, tmp - base);
4710 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004711 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004712 if (*tmp != 0)
4713 goto error;
4714
4715 if (URL != NULL) {
4716 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4717 xmlFree(URL);
4718 }
4719 return;
4720
4721error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004722 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4723 "Catalog PI syntax error: %s\n",
4724 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004725 if (URL != NULL)
4726 xmlFree(URL);
4727}
4728#endif
4729
Owen Taylor3473f882001-02-23 17:55:21 +00004730/**
4731 * xmlParsePI:
4732 * @ctxt: an XML parser context
4733 *
4734 * parse an XML Processing Instruction.
4735 *
4736 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4737 *
4738 * The processing is transfered to SAX once parsed.
4739 */
4740
4741void
4742xmlParsePI(xmlParserCtxtPtr ctxt) {
4743 xmlChar *buf = NULL;
4744 int len = 0;
4745 int size = XML_PARSER_BUFFER_SIZE;
4746 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004747 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004748 xmlParserInputState state;
4749 int count = 0;
4750
4751 if ((RAW == '<') && (NXT(1) == '?')) {
4752 xmlParserInputPtr input = ctxt->input;
4753 state = ctxt->instate;
4754 ctxt->instate = XML_PARSER_PI;
4755 /*
4756 * this is a Processing Instruction.
4757 */
4758 SKIP(2);
4759 SHRINK;
4760
4761 /*
4762 * Parse the target name and check for special support like
4763 * namespace.
4764 */
4765 target = xmlParsePITarget(ctxt);
4766 if (target != NULL) {
4767 if ((RAW == '?') && (NXT(1) == '>')) {
4768 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004769 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4770 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004771 }
4772 SKIP(2);
4773
4774 /*
4775 * SAX: PI detected.
4776 */
4777 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4778 (ctxt->sax->processingInstruction != NULL))
4779 ctxt->sax->processingInstruction(ctxt->userData,
4780 target, NULL);
4781 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004782 return;
4783 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004784 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004785 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004786 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004787 ctxt->instate = state;
4788 return;
4789 }
4790 cur = CUR;
4791 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004792 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4793 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004794 }
4795 SKIP_BLANKS;
4796 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004797 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004798 ((cur != '?') || (NXT(1) != '>'))) {
4799 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004800 xmlChar *tmp;
4801
Owen Taylor3473f882001-02-23 17:55:21 +00004802 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004803 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4804 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004805 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004806 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004807 ctxt->instate = state;
4808 return;
4809 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004810 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004811 }
4812 count++;
4813 if (count > 50) {
4814 GROW;
4815 count = 0;
4816 }
4817 COPY_BUF(l,buf,len,cur);
4818 NEXTL(l);
4819 cur = CUR_CHAR(l);
4820 if (cur == 0) {
4821 SHRINK;
4822 GROW;
4823 cur = CUR_CHAR(l);
4824 }
4825 }
4826 buf[len] = 0;
4827 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004828 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4829 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004830 } else {
4831 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004832 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4833 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004834 }
4835 SKIP(2);
4836
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004837#ifdef LIBXML_CATALOG_ENABLED
4838 if (((state == XML_PARSER_MISC) ||
4839 (state == XML_PARSER_START)) &&
4840 (xmlStrEqual(target, XML_CATALOG_PI))) {
4841 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4842 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4843 (allow == XML_CATA_ALLOW_ALL))
4844 xmlParseCatalogPI(ctxt, buf);
4845 }
4846#endif
4847
4848
Owen Taylor3473f882001-02-23 17:55:21 +00004849 /*
4850 * SAX: PI detected.
4851 */
4852 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4853 (ctxt->sax->processingInstruction != NULL))
4854 ctxt->sax->processingInstruction(ctxt->userData,
4855 target, buf);
4856 }
4857 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004858 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004859 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004860 }
4861 ctxt->instate = state;
4862 }
4863}
4864
4865/**
4866 * xmlParseNotationDecl:
4867 * @ctxt: an XML parser context
4868 *
4869 * parse a notation declaration
4870 *
4871 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4872 *
4873 * Hence there is actually 3 choices:
4874 * 'PUBLIC' S PubidLiteral
4875 * 'PUBLIC' S PubidLiteral S SystemLiteral
4876 * and 'SYSTEM' S SystemLiteral
4877 *
4878 * See the NOTE on xmlParseExternalID().
4879 */
4880
4881void
4882xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004883 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004884 xmlChar *Pubid;
4885 xmlChar *Systemid;
4886
Daniel Veillarda07050d2003-10-19 14:46:32 +00004887 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004888 xmlParserInputPtr input = ctxt->input;
4889 SHRINK;
4890 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004891 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004892 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4893 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004894 return;
4895 }
4896 SKIP_BLANKS;
4897
Daniel Veillard76d66f42001-05-16 21:05:17 +00004898 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004899 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004900 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004901 return;
4902 }
William M. Brack76e95df2003-10-18 16:20:14 +00004903 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004904 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004905 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004906 return;
4907 }
Daniel Veillard37334572008-07-31 08:20:02 +00004908 if (xmlStrchr(name, ':') != NULL) {
4909 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4910 "colon are forbidden from notation names '%s'\n",
4911 name, NULL, NULL);
4912 }
Owen Taylor3473f882001-02-23 17:55:21 +00004913 SKIP_BLANKS;
4914
4915 /*
4916 * Parse the IDs.
4917 */
4918 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4919 SKIP_BLANKS;
4920
4921 if (RAW == '>') {
4922 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004923 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4924 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004925 }
4926 NEXT;
4927 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4928 (ctxt->sax->notationDecl != NULL))
4929 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4930 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004931 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004932 }
Owen Taylor3473f882001-02-23 17:55:21 +00004933 if (Systemid != NULL) xmlFree(Systemid);
4934 if (Pubid != NULL) xmlFree(Pubid);
4935 }
4936}
4937
4938/**
4939 * xmlParseEntityDecl:
4940 * @ctxt: an XML parser context
4941 *
4942 * parse <!ENTITY declarations
4943 *
4944 * [70] EntityDecl ::= GEDecl | PEDecl
4945 *
4946 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4947 *
4948 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4949 *
4950 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4951 *
4952 * [74] PEDef ::= EntityValue | ExternalID
4953 *
4954 * [76] NDataDecl ::= S 'NDATA' S Name
4955 *
4956 * [ VC: Notation Declared ]
4957 * The Name must match the declared name of a notation.
4958 */
4959
4960void
4961xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004962 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004963 xmlChar *value = NULL;
4964 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004965 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004966 int isParameter = 0;
4967 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004968 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004969
Daniel Veillard4c778d82005-01-23 17:37:44 +00004970 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004971 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004972 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004973 SHRINK;
4974 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004975 skipped = SKIP_BLANKS;
4976 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004977 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4978 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004979 }
Owen Taylor3473f882001-02-23 17:55:21 +00004980
4981 if (RAW == '%') {
4982 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004983 skipped = SKIP_BLANKS;
4984 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004985 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4986 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004987 }
Owen Taylor3473f882001-02-23 17:55:21 +00004988 isParameter = 1;
4989 }
4990
Daniel Veillard76d66f42001-05-16 21:05:17 +00004991 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004992 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004993 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4994 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004995 return;
4996 }
Daniel Veillard37334572008-07-31 08:20:02 +00004997 if (xmlStrchr(name, ':') != NULL) {
4998 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4999 "colon are forbidden from entities names '%s'\n",
5000 name, NULL, NULL);
5001 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005002 skipped = SKIP_BLANKS;
5003 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005004 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5005 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005006 }
Owen Taylor3473f882001-02-23 17:55:21 +00005007
Daniel Veillardf5582f12002-06-11 10:08:16 +00005008 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005009 /*
5010 * handle the various case of definitions...
5011 */
5012 if (isParameter) {
5013 if ((RAW == '"') || (RAW == '\'')) {
5014 value = xmlParseEntityValue(ctxt, &orig);
5015 if (value) {
5016 if ((ctxt->sax != NULL) &&
5017 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5018 ctxt->sax->entityDecl(ctxt->userData, name,
5019 XML_INTERNAL_PARAMETER_ENTITY,
5020 NULL, NULL, value);
5021 }
5022 } else {
5023 URI = xmlParseExternalID(ctxt, &literal, 1);
5024 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005025 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005026 }
5027 if (URI) {
5028 xmlURIPtr uri;
5029
5030 uri = xmlParseURI((const char *) URI);
5031 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005032 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5033 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005034 /*
5035 * This really ought to be a well formedness error
5036 * but the XML Core WG decided otherwise c.f. issue
5037 * E26 of the XML erratas.
5038 */
Owen Taylor3473f882001-02-23 17:55:21 +00005039 } else {
5040 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005041 /*
5042 * Okay this is foolish to block those but not
5043 * invalid URIs.
5044 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005045 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005046 } else {
5047 if ((ctxt->sax != NULL) &&
5048 (!ctxt->disableSAX) &&
5049 (ctxt->sax->entityDecl != NULL))
5050 ctxt->sax->entityDecl(ctxt->userData, name,
5051 XML_EXTERNAL_PARAMETER_ENTITY,
5052 literal, URI, NULL);
5053 }
5054 xmlFreeURI(uri);
5055 }
5056 }
5057 }
5058 } else {
5059 if ((RAW == '"') || (RAW == '\'')) {
5060 value = xmlParseEntityValue(ctxt, &orig);
5061 if ((ctxt->sax != NULL) &&
5062 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5063 ctxt->sax->entityDecl(ctxt->userData, name,
5064 XML_INTERNAL_GENERAL_ENTITY,
5065 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005066 /*
5067 * For expat compatibility in SAX mode.
5068 */
5069 if ((ctxt->myDoc == NULL) ||
5070 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5071 if (ctxt->myDoc == NULL) {
5072 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005073 if (ctxt->myDoc == NULL) {
5074 xmlErrMemory(ctxt, "New Doc failed");
5075 return;
5076 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005077 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005078 }
5079 if (ctxt->myDoc->intSubset == NULL)
5080 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5081 BAD_CAST "fake", NULL, NULL);
5082
Daniel Veillard1af9a412003-08-20 22:54:39 +00005083 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5084 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005085 }
Owen Taylor3473f882001-02-23 17:55:21 +00005086 } else {
5087 URI = xmlParseExternalID(ctxt, &literal, 1);
5088 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005089 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005090 }
5091 if (URI) {
5092 xmlURIPtr uri;
5093
5094 uri = xmlParseURI((const char *)URI);
5095 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005096 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5097 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005098 /*
5099 * This really ought to be a well formedness error
5100 * but the XML Core WG decided otherwise c.f. issue
5101 * E26 of the XML erratas.
5102 */
Owen Taylor3473f882001-02-23 17:55:21 +00005103 } else {
5104 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005105 /*
5106 * Okay this is foolish to block those but not
5107 * invalid URIs.
5108 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005109 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005110 }
5111 xmlFreeURI(uri);
5112 }
5113 }
William M. Brack76e95df2003-10-18 16:20:14 +00005114 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005115 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5116 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005117 }
5118 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005119 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005120 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005121 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005122 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5123 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005124 }
5125 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005126 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005127 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5128 (ctxt->sax->unparsedEntityDecl != NULL))
5129 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5130 literal, URI, ndata);
5131 } else {
5132 if ((ctxt->sax != NULL) &&
5133 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5134 ctxt->sax->entityDecl(ctxt->userData, name,
5135 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5136 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005137 /*
5138 * For expat compatibility in SAX mode.
5139 * assuming the entity repalcement was asked for
5140 */
5141 if ((ctxt->replaceEntities != 0) &&
5142 ((ctxt->myDoc == NULL) ||
5143 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5144 if (ctxt->myDoc == NULL) {
5145 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005146 if (ctxt->myDoc == NULL) {
5147 xmlErrMemory(ctxt, "New Doc failed");
5148 return;
5149 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005150 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005151 }
5152
5153 if (ctxt->myDoc->intSubset == NULL)
5154 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5155 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005156 xmlSAX2EntityDecl(ctxt, name,
5157 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5158 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005159 }
Owen Taylor3473f882001-02-23 17:55:21 +00005160 }
5161 }
5162 }
5163 SKIP_BLANKS;
5164 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005165 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005166 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005167 } else {
5168 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005169 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5170 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005171 }
5172 NEXT;
5173 }
5174 if (orig != NULL) {
5175 /*
5176 * Ugly mechanism to save the raw entity value.
5177 */
5178 xmlEntityPtr cur = NULL;
5179
5180 if (isParameter) {
5181 if ((ctxt->sax != NULL) &&
5182 (ctxt->sax->getParameterEntity != NULL))
5183 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5184 } else {
5185 if ((ctxt->sax != NULL) &&
5186 (ctxt->sax->getEntity != NULL))
5187 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005188 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005189 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005190 }
Owen Taylor3473f882001-02-23 17:55:21 +00005191 }
5192 if (cur != NULL) {
5193 if (cur->orig != NULL)
5194 xmlFree(orig);
5195 else
5196 cur->orig = orig;
5197 } else
5198 xmlFree(orig);
5199 }
Owen Taylor3473f882001-02-23 17:55:21 +00005200 if (value != NULL) xmlFree(value);
5201 if (URI != NULL) xmlFree(URI);
5202 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005203 }
5204}
5205
5206/**
5207 * xmlParseDefaultDecl:
5208 * @ctxt: an XML parser context
5209 * @value: Receive a possible fixed default value for the attribute
5210 *
5211 * Parse an attribute default declaration
5212 *
5213 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5214 *
5215 * [ VC: Required Attribute ]
5216 * if the default declaration is the keyword #REQUIRED, then the
5217 * attribute must be specified for all elements of the type in the
5218 * attribute-list declaration.
5219 *
5220 * [ VC: Attribute Default Legal ]
5221 * The declared default value must meet the lexical constraints of
5222 * the declared attribute type c.f. xmlValidateAttributeDecl()
5223 *
5224 * [ VC: Fixed Attribute Default ]
5225 * if an attribute has a default value declared with the #FIXED
5226 * keyword, instances of that attribute must match the default value.
5227 *
5228 * [ WFC: No < in Attribute Values ]
5229 * handled in xmlParseAttValue()
5230 *
5231 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5232 * or XML_ATTRIBUTE_FIXED.
5233 */
5234
5235int
5236xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5237 int val;
5238 xmlChar *ret;
5239
5240 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005241 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005242 SKIP(9);
5243 return(XML_ATTRIBUTE_REQUIRED);
5244 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005245 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005246 SKIP(8);
5247 return(XML_ATTRIBUTE_IMPLIED);
5248 }
5249 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005250 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005251 SKIP(6);
5252 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005253 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005254 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5255 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005256 }
5257 SKIP_BLANKS;
5258 }
5259 ret = xmlParseAttValue(ctxt);
5260 ctxt->instate = XML_PARSER_DTD;
5261 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005262 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005263 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005264 } else
5265 *value = ret;
5266 return(val);
5267}
5268
5269/**
5270 * xmlParseNotationType:
5271 * @ctxt: an XML parser context
5272 *
5273 * parse an Notation attribute type.
5274 *
5275 * Note: the leading 'NOTATION' S part has already being parsed...
5276 *
5277 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5278 *
5279 * [ VC: Notation Attributes ]
5280 * Values of this type must match one of the notation names included
5281 * in the declaration; all notation names in the declaration must be declared.
5282 *
5283 * Returns: the notation attribute tree built while parsing
5284 */
5285
5286xmlEnumerationPtr
5287xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005288 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005289 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005290
5291 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005292 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005293 return(NULL);
5294 }
5295 SHRINK;
5296 do {
5297 NEXT;
5298 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005299 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005300 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005301 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5302 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005303 return(ret);
5304 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005305 tmp = ret;
5306 while (tmp != NULL) {
5307 if (xmlStrEqual(name, tmp->name)) {
5308 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5309 "standalone: attribute notation value token %s duplicated\n",
5310 name, NULL);
5311 if (!xmlDictOwns(ctxt->dict, name))
5312 xmlFree((xmlChar *) name);
5313 break;
5314 }
5315 tmp = tmp->next;
5316 }
5317 if (tmp == NULL) {
5318 cur = xmlCreateEnumeration(name);
5319 if (cur == NULL) return(ret);
5320 if (last == NULL) ret = last = cur;
5321 else {
5322 last->next = cur;
5323 last = cur;
5324 }
Owen Taylor3473f882001-02-23 17:55:21 +00005325 }
5326 SKIP_BLANKS;
5327 } while (RAW == '|');
5328 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005329 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005330 if ((last != NULL) && (last != ret))
5331 xmlFreeEnumeration(last);
5332 return(ret);
5333 }
5334 NEXT;
5335 return(ret);
5336}
5337
5338/**
5339 * xmlParseEnumerationType:
5340 * @ctxt: an XML parser context
5341 *
5342 * parse an Enumeration attribute type.
5343 *
5344 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5345 *
5346 * [ VC: Enumeration ]
5347 * Values of this type must match one of the Nmtoken tokens in
5348 * the declaration
5349 *
5350 * Returns: the enumeration attribute tree built while parsing
5351 */
5352
5353xmlEnumerationPtr
5354xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5355 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005356 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005357
5358 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005359 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005360 return(NULL);
5361 }
5362 SHRINK;
5363 do {
5364 NEXT;
5365 SKIP_BLANKS;
5366 name = xmlParseNmtoken(ctxt);
5367 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005368 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005369 return(ret);
5370 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005371 tmp = ret;
5372 while (tmp != NULL) {
5373 if (xmlStrEqual(name, tmp->name)) {
5374 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5375 "standalone: attribute enumeration value token %s duplicated\n",
5376 name, NULL);
5377 if (!xmlDictOwns(ctxt->dict, name))
5378 xmlFree(name);
5379 break;
5380 }
5381 tmp = tmp->next;
5382 }
5383 if (tmp == NULL) {
5384 cur = xmlCreateEnumeration(name);
5385 if (!xmlDictOwns(ctxt->dict, name))
5386 xmlFree(name);
5387 if (cur == NULL) return(ret);
5388 if (last == NULL) ret = last = cur;
5389 else {
5390 last->next = cur;
5391 last = cur;
5392 }
Owen Taylor3473f882001-02-23 17:55:21 +00005393 }
5394 SKIP_BLANKS;
5395 } while (RAW == '|');
5396 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005397 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005398 return(ret);
5399 }
5400 NEXT;
5401 return(ret);
5402}
5403
5404/**
5405 * xmlParseEnumeratedType:
5406 * @ctxt: an XML parser context
5407 * @tree: the enumeration tree built while parsing
5408 *
5409 * parse an Enumerated attribute type.
5410 *
5411 * [57] EnumeratedType ::= NotationType | Enumeration
5412 *
5413 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5414 *
5415 *
5416 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5417 */
5418
5419int
5420xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005421 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005422 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005423 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005424 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5425 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005426 return(0);
5427 }
5428 SKIP_BLANKS;
5429 *tree = xmlParseNotationType(ctxt);
5430 if (*tree == NULL) return(0);
5431 return(XML_ATTRIBUTE_NOTATION);
5432 }
5433 *tree = xmlParseEnumerationType(ctxt);
5434 if (*tree == NULL) return(0);
5435 return(XML_ATTRIBUTE_ENUMERATION);
5436}
5437
5438/**
5439 * xmlParseAttributeType:
5440 * @ctxt: an XML parser context
5441 * @tree: the enumeration tree built while parsing
5442 *
5443 * parse the Attribute list def for an element
5444 *
5445 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5446 *
5447 * [55] StringType ::= 'CDATA'
5448 *
5449 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5450 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5451 *
5452 * Validity constraints for attribute values syntax are checked in
5453 * xmlValidateAttributeValue()
5454 *
5455 * [ VC: ID ]
5456 * Values of type ID must match the Name production. A name must not
5457 * appear more than once in an XML document as a value of this type;
5458 * i.e., ID values must uniquely identify the elements which bear them.
5459 *
5460 * [ VC: One ID per Element Type ]
5461 * No element type may have more than one ID attribute specified.
5462 *
5463 * [ VC: ID Attribute Default ]
5464 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5465 *
5466 * [ VC: IDREF ]
5467 * Values of type IDREF must match the Name production, and values
5468 * of type IDREFS must match Names; each IDREF Name must match the value
5469 * of an ID attribute on some element in the XML document; i.e. IDREF
5470 * values must match the value of some ID attribute.
5471 *
5472 * [ VC: Entity Name ]
5473 * Values of type ENTITY must match the Name production, values
5474 * of type ENTITIES must match Names; each Entity Name must match the
5475 * name of an unparsed entity declared in the DTD.
5476 *
5477 * [ VC: Name Token ]
5478 * Values of type NMTOKEN must match the Nmtoken production; values
5479 * of type NMTOKENS must match Nmtokens.
5480 *
5481 * Returns the attribute type
5482 */
5483int
5484xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5485 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005486 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005487 SKIP(5);
5488 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005489 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005490 SKIP(6);
5491 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005492 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005493 SKIP(5);
5494 return(XML_ATTRIBUTE_IDREF);
5495 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5496 SKIP(2);
5497 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005498 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005499 SKIP(6);
5500 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005501 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005502 SKIP(8);
5503 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005504 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005505 SKIP(8);
5506 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005507 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005508 SKIP(7);
5509 return(XML_ATTRIBUTE_NMTOKEN);
5510 }
5511 return(xmlParseEnumeratedType(ctxt, tree));
5512}
5513
5514/**
5515 * xmlParseAttributeListDecl:
5516 * @ctxt: an XML parser context
5517 *
5518 * : parse the Attribute list def for an element
5519 *
5520 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5521 *
5522 * [53] AttDef ::= S Name S AttType S DefaultDecl
5523 *
5524 */
5525void
5526xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005527 const xmlChar *elemName;
5528 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005529 xmlEnumerationPtr tree;
5530
Daniel Veillarda07050d2003-10-19 14:46:32 +00005531 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005532 xmlParserInputPtr input = ctxt->input;
5533
5534 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005535 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005536 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005537 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005538 }
5539 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005540 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005541 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005542 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5543 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005544 return;
5545 }
5546 SKIP_BLANKS;
5547 GROW;
5548 while (RAW != '>') {
5549 const xmlChar *check = CUR_PTR;
5550 int type;
5551 int def;
5552 xmlChar *defaultValue = NULL;
5553
5554 GROW;
5555 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005556 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005557 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005558 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5559 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005560 break;
5561 }
5562 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005563 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005564 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005565 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005566 break;
5567 }
5568 SKIP_BLANKS;
5569
5570 type = xmlParseAttributeType(ctxt, &tree);
5571 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005572 break;
5573 }
5574
5575 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005576 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005577 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5578 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005579 if (tree != NULL)
5580 xmlFreeEnumeration(tree);
5581 break;
5582 }
5583 SKIP_BLANKS;
5584
5585 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5586 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005587 if (defaultValue != NULL)
5588 xmlFree(defaultValue);
5589 if (tree != NULL)
5590 xmlFreeEnumeration(tree);
5591 break;
5592 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005593 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5594 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005595
5596 GROW;
5597 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005598 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005599 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005600 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005601 if (defaultValue != NULL)
5602 xmlFree(defaultValue);
5603 if (tree != NULL)
5604 xmlFreeEnumeration(tree);
5605 break;
5606 }
5607 SKIP_BLANKS;
5608 }
5609 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005610 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5611 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005612 if (defaultValue != NULL)
5613 xmlFree(defaultValue);
5614 if (tree != NULL)
5615 xmlFreeEnumeration(tree);
5616 break;
5617 }
5618 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5619 (ctxt->sax->attributeDecl != NULL))
5620 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5621 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005622 else if (tree != NULL)
5623 xmlFreeEnumeration(tree);
5624
5625 if ((ctxt->sax2) && (defaultValue != NULL) &&
5626 (def != XML_ATTRIBUTE_IMPLIED) &&
5627 (def != XML_ATTRIBUTE_REQUIRED)) {
5628 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5629 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005630 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005631 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5632 }
Owen Taylor3473f882001-02-23 17:55:21 +00005633 if (defaultValue != NULL)
5634 xmlFree(defaultValue);
5635 GROW;
5636 }
5637 if (RAW == '>') {
5638 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005639 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5640 "Attribute list declaration doesn't start and stop in the same entity\n",
5641 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005642 }
5643 NEXT;
5644 }
Owen Taylor3473f882001-02-23 17:55:21 +00005645 }
5646}
5647
5648/**
5649 * xmlParseElementMixedContentDecl:
5650 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005651 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005652 *
5653 * parse the declaration for a Mixed Element content
5654 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5655 *
5656 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5657 * '(' S? '#PCDATA' S? ')'
5658 *
5659 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5660 *
5661 * [ VC: No Duplicate Types ]
5662 * The same name must not appear more than once in a single
5663 * mixed-content declaration.
5664 *
5665 * returns: the list of the xmlElementContentPtr describing the element choices
5666 */
5667xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005668xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005669 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005670 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005671
5672 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005673 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005674 SKIP(7);
5675 SKIP_BLANKS;
5676 SHRINK;
5677 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005678 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005679 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5680"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005681 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005682 }
Owen Taylor3473f882001-02-23 17:55:21 +00005683 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005684 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005685 if (ret == NULL)
5686 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005687 if (RAW == '*') {
5688 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5689 NEXT;
5690 }
5691 return(ret);
5692 }
5693 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005694 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005695 if (ret == NULL) return(NULL);
5696 }
5697 while (RAW == '|') {
5698 NEXT;
5699 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005700 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005701 if (ret == NULL) return(NULL);
5702 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005703 if (cur != NULL)
5704 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005705 cur = ret;
5706 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005707 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005708 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005709 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005710 if (n->c1 != NULL)
5711 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005712 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005713 if (n != NULL)
5714 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005715 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005716 }
5717 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005718 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005719 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005720 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005721 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005722 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005723 return(NULL);
5724 }
5725 SKIP_BLANKS;
5726 GROW;
5727 }
5728 if ((RAW == ')') && (NXT(1) == '*')) {
5729 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005730 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005731 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005732 if (cur->c2 != NULL)
5733 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005734 }
5735 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005736 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005737 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5738"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005739 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005740 }
Owen Taylor3473f882001-02-23 17:55:21 +00005741 SKIP(2);
5742 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005743 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005744 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005745 return(NULL);
5746 }
5747
5748 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005749 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005750 }
5751 return(ret);
5752}
5753
5754/**
5755 * xmlParseElementChildrenContentDecl:
5756 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005757 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005758 *
5759 * parse the declaration for a Mixed Element content
5760 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5761 *
5762 *
5763 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5764 *
5765 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5766 *
5767 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5768 *
5769 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5770 *
5771 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5772 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005773 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005774 * opening or closing parentheses in a choice, seq, or Mixed
5775 * construct is contained in the replacement text for a parameter
5776 * entity, both must be contained in the same replacement text. For
5777 * interoperability, if a parameter-entity reference appears in a
5778 * choice, seq, or Mixed construct, its replacement text should not
5779 * be empty, and neither the first nor last non-blank character of
5780 * the replacement text should be a connector (| or ,).
5781 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005782 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005783 * hierarchy.
5784 */
5785xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005786xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005787 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005788 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005789 xmlChar type = 0;
5790
5791 SKIP_BLANKS;
5792 GROW;
5793 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005794 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005795
Owen Taylor3473f882001-02-23 17:55:21 +00005796 /* Recurse on first child */
5797 NEXT;
5798 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005799 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005800 SKIP_BLANKS;
5801 GROW;
5802 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005803 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005804 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005805 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005806 return(NULL);
5807 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005808 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005809 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005810 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005811 return(NULL);
5812 }
Owen Taylor3473f882001-02-23 17:55:21 +00005813 GROW;
5814 if (RAW == '?') {
5815 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5816 NEXT;
5817 } else if (RAW == '*') {
5818 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5819 NEXT;
5820 } else if (RAW == '+') {
5821 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5822 NEXT;
5823 } else {
5824 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5825 }
Owen Taylor3473f882001-02-23 17:55:21 +00005826 GROW;
5827 }
5828 SKIP_BLANKS;
5829 SHRINK;
5830 while (RAW != ')') {
5831 /*
5832 * Each loop we parse one separator and one element.
5833 */
5834 if (RAW == ',') {
5835 if (type == 0) type = CUR;
5836
5837 /*
5838 * Detect "Name | Name , Name" error
5839 */
5840 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005841 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005842 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005843 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005844 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005845 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005846 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005847 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005848 return(NULL);
5849 }
5850 NEXT;
5851
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005852 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005853 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005854 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005855 xmlFreeDocElementContent(ctxt->myDoc, last);
5856 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005857 return(NULL);
5858 }
5859 if (last == NULL) {
5860 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005861 if (ret != NULL)
5862 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005863 ret = cur = op;
5864 } else {
5865 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005866 if (op != NULL)
5867 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005868 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005869 if (last != NULL)
5870 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005871 cur =op;
5872 last = NULL;
5873 }
5874 } else if (RAW == '|') {
5875 if (type == 0) type = CUR;
5876
5877 /*
5878 * Detect "Name , Name | Name" error
5879 */
5880 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005881 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005882 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005883 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005884 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005885 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005886 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005887 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005888 return(NULL);
5889 }
5890 NEXT;
5891
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005892 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005893 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005894 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005895 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005896 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005897 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005898 return(NULL);
5899 }
5900 if (last == NULL) {
5901 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005902 if (ret != NULL)
5903 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005904 ret = cur = op;
5905 } else {
5906 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005907 if (op != NULL)
5908 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005909 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005910 if (last != NULL)
5911 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005912 cur =op;
5913 last = NULL;
5914 }
5915 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005916 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005917 if ((last != NULL) && (last != ret))
5918 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005919 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005920 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005921 return(NULL);
5922 }
5923 GROW;
5924 SKIP_BLANKS;
5925 GROW;
5926 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005927 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005928 /* Recurse on second child */
5929 NEXT;
5930 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005931 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005932 SKIP_BLANKS;
5933 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005934 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005935 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005936 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005937 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005938 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005939 return(NULL);
5940 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005941 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005942 if (last == NULL) {
5943 if (ret != NULL)
5944 xmlFreeDocElementContent(ctxt->myDoc, ret);
5945 return(NULL);
5946 }
Owen Taylor3473f882001-02-23 17:55:21 +00005947 if (RAW == '?') {
5948 last->ocur = XML_ELEMENT_CONTENT_OPT;
5949 NEXT;
5950 } else if (RAW == '*') {
5951 last->ocur = XML_ELEMENT_CONTENT_MULT;
5952 NEXT;
5953 } else if (RAW == '+') {
5954 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5955 NEXT;
5956 } else {
5957 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5958 }
5959 }
5960 SKIP_BLANKS;
5961 GROW;
5962 }
5963 if ((cur != NULL) && (last != NULL)) {
5964 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005965 if (last != NULL)
5966 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005967 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005968 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005969 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5970"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005971 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005972 }
Owen Taylor3473f882001-02-23 17:55:21 +00005973 NEXT;
5974 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005975 if (ret != NULL) {
5976 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5977 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5978 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5979 else
5980 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5981 }
Owen Taylor3473f882001-02-23 17:55:21 +00005982 NEXT;
5983 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005984 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005985 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005986 cur = ret;
5987 /*
5988 * Some normalization:
5989 * (a | b* | c?)* == (a | b | c)*
5990 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005991 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005992 if ((cur->c1 != NULL) &&
5993 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5994 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5995 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5996 if ((cur->c2 != NULL) &&
5997 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5998 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5999 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6000 cur = cur->c2;
6001 }
6002 }
Owen Taylor3473f882001-02-23 17:55:21 +00006003 NEXT;
6004 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006005 if (ret != NULL) {
6006 int found = 0;
6007
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006008 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6009 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6010 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006011 else
6012 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006013 /*
6014 * Some normalization:
6015 * (a | b*)+ == (a | b)*
6016 * (a | b?)+ == (a | b)*
6017 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006018 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006019 if ((cur->c1 != NULL) &&
6020 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6021 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6022 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6023 found = 1;
6024 }
6025 if ((cur->c2 != NULL) &&
6026 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6027 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6028 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6029 found = 1;
6030 }
6031 cur = cur->c2;
6032 }
6033 if (found)
6034 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6035 }
Owen Taylor3473f882001-02-23 17:55:21 +00006036 NEXT;
6037 }
6038 return(ret);
6039}
6040
6041/**
6042 * xmlParseElementContentDecl:
6043 * @ctxt: an XML parser context
6044 * @name: the name of the element being defined.
6045 * @result: the Element Content pointer will be stored here if any
6046 *
6047 * parse the declaration for an Element content either Mixed or Children,
6048 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6049 *
6050 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6051 *
6052 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6053 */
6054
6055int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006056xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006057 xmlElementContentPtr *result) {
6058
6059 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006060 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006061 int res;
6062
6063 *result = NULL;
6064
6065 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006066 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006067 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006068 return(-1);
6069 }
6070 NEXT;
6071 GROW;
6072 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006073 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006074 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006075 res = XML_ELEMENT_TYPE_MIXED;
6076 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006077 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006078 res = XML_ELEMENT_TYPE_ELEMENT;
6079 }
Owen Taylor3473f882001-02-23 17:55:21 +00006080 SKIP_BLANKS;
6081 *result = tree;
6082 return(res);
6083}
6084
6085/**
6086 * xmlParseElementDecl:
6087 * @ctxt: an XML parser context
6088 *
6089 * parse an Element declaration.
6090 *
6091 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6092 *
6093 * [ VC: Unique Element Type Declaration ]
6094 * No element type may be declared more than once
6095 *
6096 * Returns the type of the element, or -1 in case of error
6097 */
6098int
6099xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006100 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006101 int ret = -1;
6102 xmlElementContentPtr content = NULL;
6103
Daniel Veillard4c778d82005-01-23 17:37:44 +00006104 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006105 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006106 xmlParserInputPtr input = ctxt->input;
6107
6108 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006109 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006110 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6111 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006112 }
6113 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006114 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006115 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006116 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6117 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006118 return(-1);
6119 }
6120 while ((RAW == 0) && (ctxt->inputNr > 1))
6121 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006122 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006123 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6124 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006125 }
6126 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006127 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006128 SKIP(5);
6129 /*
6130 * Element must always be empty.
6131 */
6132 ret = XML_ELEMENT_TYPE_EMPTY;
6133 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6134 (NXT(2) == 'Y')) {
6135 SKIP(3);
6136 /*
6137 * Element is a generic container.
6138 */
6139 ret = XML_ELEMENT_TYPE_ANY;
6140 } else if (RAW == '(') {
6141 ret = xmlParseElementContentDecl(ctxt, name, &content);
6142 } else {
6143 /*
6144 * [ WFC: PEs in Internal Subset ] error handling.
6145 */
6146 if ((RAW == '%') && (ctxt->external == 0) &&
6147 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006148 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006149 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006150 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006151 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006152 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6153 }
Owen Taylor3473f882001-02-23 17:55:21 +00006154 return(-1);
6155 }
6156
6157 SKIP_BLANKS;
6158 /*
6159 * Pop-up of finished entities.
6160 */
6161 while ((RAW == 0) && (ctxt->inputNr > 1))
6162 xmlPopInput(ctxt);
6163 SKIP_BLANKS;
6164
6165 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006166 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006167 if (content != NULL) {
6168 xmlFreeDocElementContent(ctxt->myDoc, content);
6169 }
Owen Taylor3473f882001-02-23 17:55:21 +00006170 } else {
6171 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006172 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6173 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006174 }
6175
6176 NEXT;
6177 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006178 (ctxt->sax->elementDecl != NULL)) {
6179 if (content != NULL)
6180 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006181 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6182 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006183 if ((content != NULL) && (content->parent == NULL)) {
6184 /*
6185 * this is a trick: if xmlAddElementDecl is called,
6186 * instead of copying the full tree it is plugged directly
6187 * if called from the parser. Avoid duplicating the
6188 * interfaces or change the API/ABI
6189 */
6190 xmlFreeDocElementContent(ctxt->myDoc, content);
6191 }
6192 } else if (content != NULL) {
6193 xmlFreeDocElementContent(ctxt->myDoc, content);
6194 }
Owen Taylor3473f882001-02-23 17:55:21 +00006195 }
Owen Taylor3473f882001-02-23 17:55:21 +00006196 }
6197 return(ret);
6198}
6199
6200/**
Owen Taylor3473f882001-02-23 17:55:21 +00006201 * xmlParseConditionalSections
6202 * @ctxt: an XML parser context
6203 *
6204 * [61] conditionalSect ::= includeSect | ignoreSect
6205 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6206 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6207 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6208 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6209 */
6210
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006211static void
Owen Taylor3473f882001-02-23 17:55:21 +00006212xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006213 int id = ctxt->input->id;
6214
Owen Taylor3473f882001-02-23 17:55:21 +00006215 SKIP(3);
6216 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006217 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006218 SKIP(7);
6219 SKIP_BLANKS;
6220 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006221 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006222 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006223 if (ctxt->input->id != id) {
6224 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6225 "All markup of the conditional section is not in the same entity\n",
6226 NULL, NULL);
6227 }
Owen Taylor3473f882001-02-23 17:55:21 +00006228 NEXT;
6229 }
6230 if (xmlParserDebugEntities) {
6231 if ((ctxt->input != NULL) && (ctxt->input->filename))
6232 xmlGenericError(xmlGenericErrorContext,
6233 "%s(%d): ", ctxt->input->filename,
6234 ctxt->input->line);
6235 xmlGenericError(xmlGenericErrorContext,
6236 "Entering INCLUDE Conditional Section\n");
6237 }
6238
6239 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6240 (NXT(2) != '>'))) {
6241 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006242 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006243
6244 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6245 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006246 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006247 NEXT;
6248 } else if (RAW == '%') {
6249 xmlParsePEReference(ctxt);
6250 } else
6251 xmlParseMarkupDecl(ctxt);
6252
6253 /*
6254 * Pop-up of finished entities.
6255 */
6256 while ((RAW == 0) && (ctxt->inputNr > 1))
6257 xmlPopInput(ctxt);
6258
Daniel Veillardfdc91562002-07-01 21:52:03 +00006259 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006260 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006261 break;
6262 }
6263 }
6264 if (xmlParserDebugEntities) {
6265 if ((ctxt->input != NULL) && (ctxt->input->filename))
6266 xmlGenericError(xmlGenericErrorContext,
6267 "%s(%d): ", ctxt->input->filename,
6268 ctxt->input->line);
6269 xmlGenericError(xmlGenericErrorContext,
6270 "Leaving INCLUDE Conditional Section\n");
6271 }
6272
Daniel Veillarda07050d2003-10-19 14:46:32 +00006273 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006274 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006275 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006276 int depth = 0;
6277
6278 SKIP(6);
6279 SKIP_BLANKS;
6280 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006281 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006282 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006283 if (ctxt->input->id != id) {
6284 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6285 "All markup of the conditional section is not in the same entity\n",
6286 NULL, NULL);
6287 }
Owen Taylor3473f882001-02-23 17:55:21 +00006288 NEXT;
6289 }
6290 if (xmlParserDebugEntities) {
6291 if ((ctxt->input != NULL) && (ctxt->input->filename))
6292 xmlGenericError(xmlGenericErrorContext,
6293 "%s(%d): ", ctxt->input->filename,
6294 ctxt->input->line);
6295 xmlGenericError(xmlGenericErrorContext,
6296 "Entering IGNORE Conditional Section\n");
6297 }
6298
6299 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006300 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006301 * But disable SAX event generating DTD building in the meantime
6302 */
6303 state = ctxt->disableSAX;
6304 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006305 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006306 ctxt->instate = XML_PARSER_IGNORE;
6307
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006308 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006309 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6310 depth++;
6311 SKIP(3);
6312 continue;
6313 }
6314 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6315 if (--depth >= 0) SKIP(3);
6316 continue;
6317 }
6318 NEXT;
6319 continue;
6320 }
6321
6322 ctxt->disableSAX = state;
6323 ctxt->instate = instate;
6324
6325 if (xmlParserDebugEntities) {
6326 if ((ctxt->input != NULL) && (ctxt->input->filename))
6327 xmlGenericError(xmlGenericErrorContext,
6328 "%s(%d): ", ctxt->input->filename,
6329 ctxt->input->line);
6330 xmlGenericError(xmlGenericErrorContext,
6331 "Leaving IGNORE Conditional Section\n");
6332 }
6333
6334 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006335 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006336 }
6337
6338 if (RAW == 0)
6339 SHRINK;
6340
6341 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006342 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006343 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006344 if (ctxt->input->id != id) {
6345 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6346 "All markup of the conditional section is not in the same entity\n",
6347 NULL, NULL);
6348 }
Owen Taylor3473f882001-02-23 17:55:21 +00006349 SKIP(3);
6350 }
6351}
6352
6353/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006354 * xmlParseMarkupDecl:
6355 * @ctxt: an XML parser context
6356 *
6357 * parse Markup declarations
6358 *
6359 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6360 * NotationDecl | PI | Comment
6361 *
6362 * [ VC: Proper Declaration/PE Nesting ]
6363 * Parameter-entity replacement text must be properly nested with
6364 * markup declarations. That is to say, if either the first character
6365 * or the last character of a markup declaration (markupdecl above) is
6366 * contained in the replacement text for a parameter-entity reference,
6367 * both must be contained in the same replacement text.
6368 *
6369 * [ WFC: PEs in Internal Subset ]
6370 * In the internal DTD subset, parameter-entity references can occur
6371 * only where markup declarations can occur, not within markup declarations.
6372 * (This does not apply to references that occur in external parameter
6373 * entities or to the external subset.)
6374 */
6375void
6376xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6377 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006378 if (CUR == '<') {
6379 if (NXT(1) == '!') {
6380 switch (NXT(2)) {
6381 case 'E':
6382 if (NXT(3) == 'L')
6383 xmlParseElementDecl(ctxt);
6384 else if (NXT(3) == 'N')
6385 xmlParseEntityDecl(ctxt);
6386 break;
6387 case 'A':
6388 xmlParseAttributeListDecl(ctxt);
6389 break;
6390 case 'N':
6391 xmlParseNotationDecl(ctxt);
6392 break;
6393 case '-':
6394 xmlParseComment(ctxt);
6395 break;
6396 default:
6397 /* there is an error but it will be detected later */
6398 break;
6399 }
6400 } else if (NXT(1) == '?') {
6401 xmlParsePI(ctxt);
6402 }
6403 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006404 /*
6405 * This is only for internal subset. On external entities,
6406 * the replacement is done before parsing stage
6407 */
6408 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6409 xmlParsePEReference(ctxt);
6410
6411 /*
6412 * Conditional sections are allowed from entities included
6413 * by PE References in the internal subset.
6414 */
6415 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6416 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6417 xmlParseConditionalSections(ctxt);
6418 }
6419 }
6420
6421 ctxt->instate = XML_PARSER_DTD;
6422}
6423
6424/**
6425 * xmlParseTextDecl:
6426 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006427 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006428 * parse an XML declaration header for external entities
6429 *
6430 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006431 */
6432
6433void
6434xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6435 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006436 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006437
6438 /*
6439 * We know that '<?xml' is here.
6440 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006441 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006442 SKIP(5);
6443 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006444 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006445 return;
6446 }
6447
William M. Brack76e95df2003-10-18 16:20:14 +00006448 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006449 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6450 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006451 }
6452 SKIP_BLANKS;
6453
6454 /*
6455 * We may have the VersionInfo here.
6456 */
6457 version = xmlParseVersionInfo(ctxt);
6458 if (version == NULL)
6459 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006460 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006461 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006462 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6463 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006464 }
6465 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006466 ctxt->input->version = version;
6467
6468 /*
6469 * We must have the encoding declaration
6470 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006471 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006472 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6473 /*
6474 * The XML REC instructs us to stop parsing right here
6475 */
6476 return;
6477 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006478 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6479 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6480 "Missing encoding in text declaration\n");
6481 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006482
6483 SKIP_BLANKS;
6484 if ((RAW == '?') && (NXT(1) == '>')) {
6485 SKIP(2);
6486 } else if (RAW == '>') {
6487 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006488 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006489 NEXT;
6490 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006491 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006492 MOVETO_ENDTAG(CUR_PTR);
6493 NEXT;
6494 }
6495}
6496
6497/**
Owen Taylor3473f882001-02-23 17:55:21 +00006498 * xmlParseExternalSubset:
6499 * @ctxt: an XML parser context
6500 * @ExternalID: the external identifier
6501 * @SystemID: the system identifier (or URL)
6502 *
6503 * parse Markup declarations from an external subset
6504 *
6505 * [30] extSubset ::= textDecl? extSubsetDecl
6506 *
6507 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6508 */
6509void
6510xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6511 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006512 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006513 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006514
6515 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6516 (ctxt->input->end - ctxt->input->cur >= 4)) {
6517 xmlChar start[4];
6518 xmlCharEncoding enc;
6519
6520 start[0] = RAW;
6521 start[1] = NXT(1);
6522 start[2] = NXT(2);
6523 start[3] = NXT(3);
6524 enc = xmlDetectCharEncoding(start, 4);
6525 if (enc != XML_CHAR_ENCODING_NONE)
6526 xmlSwitchEncoding(ctxt, enc);
6527 }
6528
Daniel Veillarda07050d2003-10-19 14:46:32 +00006529 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006530 xmlParseTextDecl(ctxt);
6531 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6532 /*
6533 * The XML REC instructs us to stop parsing right here
6534 */
6535 ctxt->instate = XML_PARSER_EOF;
6536 return;
6537 }
6538 }
6539 if (ctxt->myDoc == NULL) {
6540 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006541 if (ctxt->myDoc == NULL) {
6542 xmlErrMemory(ctxt, "New Doc failed");
6543 return;
6544 }
6545 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006546 }
6547 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6548 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6549
6550 ctxt->instate = XML_PARSER_DTD;
6551 ctxt->external = 1;
6552 while (((RAW == '<') && (NXT(1) == '?')) ||
6553 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006554 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006555 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006556 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006557
6558 GROW;
6559 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6560 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006561 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006562 NEXT;
6563 } else if (RAW == '%') {
6564 xmlParsePEReference(ctxt);
6565 } else
6566 xmlParseMarkupDecl(ctxt);
6567
6568 /*
6569 * Pop-up of finished entities.
6570 */
6571 while ((RAW == 0) && (ctxt->inputNr > 1))
6572 xmlPopInput(ctxt);
6573
Daniel Veillardfdc91562002-07-01 21:52:03 +00006574 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006575 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006576 break;
6577 }
6578 }
6579
6580 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006581 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006582 }
6583
6584}
6585
6586/**
6587 * xmlParseReference:
6588 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006589 *
Owen Taylor3473f882001-02-23 17:55:21 +00006590 * parse and handle entity references in content, depending on the SAX
6591 * interface, this may end-up in a call to character() if this is a
6592 * CharRef, a predefined entity, if there is no reference() callback.
6593 * or if the parser was asked to switch to that mode.
6594 *
6595 * [67] Reference ::= EntityRef | CharRef
6596 */
6597void
6598xmlParseReference(xmlParserCtxtPtr ctxt) {
6599 xmlEntityPtr ent;
6600 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006601 int was_checked;
6602 xmlNodePtr list = NULL;
6603 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006604
Daniel Veillard0161e632008-08-28 15:36:32 +00006605
6606 if (RAW != '&')
6607 return;
6608
6609 /*
6610 * Simple case of a CharRef
6611 */
Owen Taylor3473f882001-02-23 17:55:21 +00006612 if (NXT(1) == '#') {
6613 int i = 0;
6614 xmlChar out[10];
6615 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006616 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006617
Daniel Veillarddc171602008-03-26 17:41:38 +00006618 if (value == 0)
6619 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006620 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6621 /*
6622 * So we are using non-UTF-8 buffers
6623 * Check that the char fit on 8bits, if not
6624 * generate a CharRef.
6625 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006626 if (value <= 0xFF) {
6627 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006628 out[1] = 0;
6629 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6630 (!ctxt->disableSAX))
6631 ctxt->sax->characters(ctxt->userData, out, 1);
6632 } else {
6633 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006634 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006635 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006636 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006637 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6638 (!ctxt->disableSAX))
6639 ctxt->sax->reference(ctxt->userData, out);
6640 }
6641 } else {
6642 /*
6643 * Just encode the value in UTF-8
6644 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006645 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006646 out[i] = 0;
6647 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6648 (!ctxt->disableSAX))
6649 ctxt->sax->characters(ctxt->userData, out, i);
6650 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006651 return;
6652 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006653
Daniel Veillard0161e632008-08-28 15:36:32 +00006654 /*
6655 * We are seeing an entity reference
6656 */
6657 ent = xmlParseEntityRef(ctxt);
6658 if (ent == NULL) return;
6659 if (!ctxt->wellFormed)
6660 return;
6661 was_checked = ent->checked;
6662
6663 /* special case of predefined entities */
6664 if ((ent->name == NULL) ||
6665 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6666 val = ent->content;
6667 if (val == NULL) return;
6668 /*
6669 * inline the entity.
6670 */
6671 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6672 (!ctxt->disableSAX))
6673 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6674 return;
6675 }
6676
6677 /*
6678 * The first reference to the entity trigger a parsing phase
6679 * where the ent->children is filled with the result from
6680 * the parsing.
6681 */
6682 if (ent->checked == 0) {
6683 unsigned long oldnbent = ctxt->nbentities;
6684
6685 /*
6686 * This is a bit hackish but this seems the best
6687 * way to make sure both SAX and DOM entity support
6688 * behaves okay.
6689 */
6690 void *user_data;
6691 if (ctxt->userData == ctxt)
6692 user_data = NULL;
6693 else
6694 user_data = ctxt->userData;
6695
6696 /*
6697 * Check that this entity is well formed
6698 * 4.3.2: An internal general parsed entity is well-formed
6699 * if its replacement text matches the production labeled
6700 * content.
6701 */
6702 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6703 ctxt->depth++;
6704 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6705 user_data, &list);
6706 ctxt->depth--;
6707
6708 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6709 ctxt->depth++;
6710 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6711 user_data, ctxt->depth, ent->URI,
6712 ent->ExternalID, &list);
6713 ctxt->depth--;
6714 } else {
6715 ret = XML_ERR_ENTITY_PE_INTERNAL;
6716 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6717 "invalid entity type found\n", NULL);
6718 }
6719
6720 /*
6721 * Store the number of entities needing parsing for this entity
6722 * content and do checkings
6723 */
6724 ent->checked = ctxt->nbentities - oldnbent;
6725 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006726 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006727 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006728 return;
6729 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006730 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6731 xmlFreeNodeList(list);
6732 return;
6733 }
Owen Taylor3473f882001-02-23 17:55:21 +00006734
Daniel Veillard0161e632008-08-28 15:36:32 +00006735 if ((ret == XML_ERR_OK) && (list != NULL)) {
6736 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6737 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6738 (ent->children == NULL)) {
6739 ent->children = list;
6740 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006741 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006742 * Prune it directly in the generated document
6743 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006744 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006745 if (((list->type == XML_TEXT_NODE) &&
6746 (list->next == NULL)) ||
6747 (ctxt->parseMode == XML_PARSE_READER)) {
6748 list->parent = (xmlNodePtr) ent;
6749 list = NULL;
6750 ent->owner = 1;
6751 } else {
6752 ent->owner = 0;
6753 while (list != NULL) {
6754 list->parent = (xmlNodePtr) ctxt->node;
6755 list->doc = ctxt->myDoc;
6756 if (list->next == NULL)
6757 ent->last = list;
6758 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006759 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006760 list = ent->children;
6761#ifdef LIBXML_LEGACY_ENABLED
6762 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6763 xmlAddEntityReference(ent, list, NULL);
6764#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006765 }
6766 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006767 ent->owner = 1;
6768 while (list != NULL) {
6769 list->parent = (xmlNodePtr) ent;
6770 if (list->next == NULL)
6771 ent->last = list;
6772 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006773 }
6774 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006775 } else {
6776 xmlFreeNodeList(list);
6777 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006778 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006779 } else if ((ret != XML_ERR_OK) &&
6780 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6781 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6782 "Entity '%s' failed to parse\n", ent->name);
6783 } else if (list != NULL) {
6784 xmlFreeNodeList(list);
6785 list = NULL;
6786 }
6787 if (ent->checked == 0)
6788 ent->checked = 1;
6789 } else if (ent->checked != 1) {
6790 ctxt->nbentities += ent->checked;
6791 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006792
Daniel Veillard0161e632008-08-28 15:36:32 +00006793 /*
6794 * Now that the entity content has been gathered
6795 * provide it to the application, this can take different forms based
6796 * on the parsing modes.
6797 */
6798 if (ent->children == NULL) {
6799 /*
6800 * Probably running in SAX mode and the callbacks don't
6801 * build the entity content. So unless we already went
6802 * though parsing for first checking go though the entity
6803 * content to generate callbacks associated to the entity
6804 */
6805 if (was_checked != 0) {
6806 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006807 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006808 * This is a bit hackish but this seems the best
6809 * way to make sure both SAX and DOM entity support
6810 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006811 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006812 if (ctxt->userData == ctxt)
6813 user_data = NULL;
6814 else
6815 user_data = ctxt->userData;
6816
6817 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6818 ctxt->depth++;
6819 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6820 ent->content, user_data, NULL);
6821 ctxt->depth--;
6822 } else if (ent->etype ==
6823 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6824 ctxt->depth++;
6825 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6826 ctxt->sax, user_data, ctxt->depth,
6827 ent->URI, ent->ExternalID, NULL);
6828 ctxt->depth--;
6829 } else {
6830 ret = XML_ERR_ENTITY_PE_INTERNAL;
6831 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6832 "invalid entity type found\n", NULL);
6833 }
6834 if (ret == XML_ERR_ENTITY_LOOP) {
6835 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6836 return;
6837 }
6838 }
6839 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6840 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6841 /*
6842 * Entity reference callback comes second, it's somewhat
6843 * superfluous but a compatibility to historical behaviour
6844 */
6845 ctxt->sax->reference(ctxt->userData, ent->name);
6846 }
6847 return;
6848 }
6849
6850 /*
6851 * If we didn't get any children for the entity being built
6852 */
6853 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6854 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6855 /*
6856 * Create a node.
6857 */
6858 ctxt->sax->reference(ctxt->userData, ent->name);
6859 return;
6860 }
6861
6862 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6863 /*
6864 * There is a problem on the handling of _private for entities
6865 * (bug 155816): Should we copy the content of the field from
6866 * the entity (possibly overwriting some value set by the user
6867 * when a copy is created), should we leave it alone, or should
6868 * we try to take care of different situations? The problem
6869 * is exacerbated by the usage of this field by the xmlReader.
6870 * To fix this bug, we look at _private on the created node
6871 * and, if it's NULL, we copy in whatever was in the entity.
6872 * If it's not NULL we leave it alone. This is somewhat of a
6873 * hack - maybe we should have further tests to determine
6874 * what to do.
6875 */
6876 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6877 /*
6878 * Seems we are generating the DOM content, do
6879 * a simple tree copy for all references except the first
6880 * In the first occurrence list contains the replacement.
6881 * progressive == 2 means we are operating on the Reader
6882 * and since nodes are discarded we must copy all the time.
6883 */
6884 if (((list == NULL) && (ent->owner == 0)) ||
6885 (ctxt->parseMode == XML_PARSE_READER)) {
6886 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6887
6888 /*
6889 * when operating on a reader, the entities definitions
6890 * are always owning the entities subtree.
6891 if (ctxt->parseMode == XML_PARSE_READER)
6892 ent->owner = 1;
6893 */
6894
6895 cur = ent->children;
6896 while (cur != NULL) {
6897 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6898 if (nw != NULL) {
6899 if (nw->_private == NULL)
6900 nw->_private = cur->_private;
6901 if (firstChild == NULL){
6902 firstChild = nw;
6903 }
6904 nw = xmlAddChild(ctxt->node, nw);
6905 }
6906 if (cur == ent->last) {
6907 /*
6908 * needed to detect some strange empty
6909 * node cases in the reader tests
6910 */
6911 if ((ctxt->parseMode == XML_PARSE_READER) &&
6912 (nw != NULL) &&
6913 (nw->type == XML_ELEMENT_NODE) &&
6914 (nw->children == NULL))
6915 nw->extra = 1;
6916
6917 break;
6918 }
6919 cur = cur->next;
6920 }
6921#ifdef LIBXML_LEGACY_ENABLED
6922 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6923 xmlAddEntityReference(ent, firstChild, nw);
6924#endif /* LIBXML_LEGACY_ENABLED */
6925 } else if (list == NULL) {
6926 xmlNodePtr nw = NULL, cur, next, last,
6927 firstChild = NULL;
6928 /*
6929 * Copy the entity child list and make it the new
6930 * entity child list. The goal is to make sure any
6931 * ID or REF referenced will be the one from the
6932 * document content and not the entity copy.
6933 */
6934 cur = ent->children;
6935 ent->children = NULL;
6936 last = ent->last;
6937 ent->last = NULL;
6938 while (cur != NULL) {
6939 next = cur->next;
6940 cur->next = NULL;
6941 cur->parent = NULL;
6942 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6943 if (nw != NULL) {
6944 if (nw->_private == NULL)
6945 nw->_private = cur->_private;
6946 if (firstChild == NULL){
6947 firstChild = cur;
6948 }
6949 xmlAddChild((xmlNodePtr) ent, nw);
6950 xmlAddChild(ctxt->node, cur);
6951 }
6952 if (cur == last)
6953 break;
6954 cur = next;
6955 }
Daniel Veillardcba68392008-08-29 12:43:40 +00006956 if (ent->owner == 0)
6957 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00006958#ifdef LIBXML_LEGACY_ENABLED
6959 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6960 xmlAddEntityReference(ent, firstChild, nw);
6961#endif /* LIBXML_LEGACY_ENABLED */
6962 } else {
6963 const xmlChar *nbktext;
6964
6965 /*
6966 * the name change is to avoid coalescing of the
6967 * node with a possible previous text one which
6968 * would make ent->children a dangling pointer
6969 */
6970 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6971 -1);
6972 if (ent->children->type == XML_TEXT_NODE)
6973 ent->children->name = nbktext;
6974 if ((ent->last != ent->children) &&
6975 (ent->last->type == XML_TEXT_NODE))
6976 ent->last->name = nbktext;
6977 xmlAddChildList(ctxt->node, ent->children);
6978 }
6979
6980 /*
6981 * This is to avoid a nasty side effect, see
6982 * characters() in SAX.c
6983 */
6984 ctxt->nodemem = 0;
6985 ctxt->nodelen = 0;
6986 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006987 }
6988 }
6989}
6990
6991/**
6992 * xmlParseEntityRef:
6993 * @ctxt: an XML parser context
6994 *
6995 * parse ENTITY references declarations
6996 *
6997 * [68] EntityRef ::= '&' Name ';'
6998 *
6999 * [ WFC: Entity Declared ]
7000 * In a document without any DTD, a document with only an internal DTD
7001 * subset which contains no parameter entity references, or a document
7002 * with "standalone='yes'", the Name given in the entity reference
7003 * must match that in an entity declaration, except that well-formed
7004 * documents need not declare any of the following entities: amp, lt,
7005 * gt, apos, quot. The declaration of a parameter entity must precede
7006 * any reference to it. Similarly, the declaration of a general entity
7007 * must precede any reference to it which appears in a default value in an
7008 * attribute-list declaration. Note that if entities are declared in the
7009 * external subset or in external parameter entities, a non-validating
7010 * processor is not obligated to read and process their declarations;
7011 * for such documents, the rule that an entity must be declared is a
7012 * well-formedness constraint only if standalone='yes'.
7013 *
7014 * [ WFC: Parsed Entity ]
7015 * An entity reference must not contain the name of an unparsed entity
7016 *
7017 * Returns the xmlEntityPtr if found, or NULL otherwise.
7018 */
7019xmlEntityPtr
7020xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007021 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007022 xmlEntityPtr ent = NULL;
7023
7024 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007025
Daniel Veillard0161e632008-08-28 15:36:32 +00007026 if (RAW != '&')
7027 return(NULL);
7028 NEXT;
7029 name = xmlParseName(ctxt);
7030 if (name == NULL) {
7031 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7032 "xmlParseEntityRef: no name\n");
7033 return(NULL);
7034 }
7035 if (RAW != ';') {
7036 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7037 return(NULL);
7038 }
7039 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007040
Daniel Veillard0161e632008-08-28 15:36:32 +00007041 /*
7042 * Predefined entites override any extra definition
7043 */
7044 ent = xmlGetPredefinedEntity(name);
7045 if (ent != NULL)
7046 return(ent);
Owen Taylor3473f882001-02-23 17:55:21 +00007047
Daniel Veillard0161e632008-08-28 15:36:32 +00007048 /*
7049 * Increate the number of entity references parsed
7050 */
7051 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007052
Daniel Veillard0161e632008-08-28 15:36:32 +00007053 /*
7054 * Ask first SAX for entity resolution, otherwise try the
7055 * entities which may have stored in the parser context.
7056 */
7057 if (ctxt->sax != NULL) {
7058 if (ctxt->sax->getEntity != NULL)
7059 ent = ctxt->sax->getEntity(ctxt->userData, name);
7060 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7061 (ctxt->userData==ctxt)) {
7062 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007063 }
7064 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007065 /*
7066 * [ WFC: Entity Declared ]
7067 * In a document without any DTD, a document with only an
7068 * internal DTD subset which contains no parameter entity
7069 * references, or a document with "standalone='yes'", the
7070 * Name given in the entity reference must match that in an
7071 * entity declaration, except that well-formed documents
7072 * need not declare any of the following entities: amp, lt,
7073 * gt, apos, quot.
7074 * The declaration of a parameter entity must precede any
7075 * reference to it.
7076 * Similarly, the declaration of a general entity must
7077 * precede any reference to it which appears in a default
7078 * value in an attribute-list declaration. Note that if
7079 * entities are declared in the external subset or in
7080 * external parameter entities, a non-validating processor
7081 * is not obligated to read and process their declarations;
7082 * for such documents, the rule that an entity must be
7083 * declared is a well-formedness constraint only if
7084 * standalone='yes'.
7085 */
7086 if (ent == NULL) {
7087 if ((ctxt->standalone == 1) ||
7088 ((ctxt->hasExternalSubset == 0) &&
7089 (ctxt->hasPErefs == 0))) {
7090 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7091 "Entity '%s' not defined\n", name);
7092 } else {
7093 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7094 "Entity '%s' not defined\n", name);
7095 if ((ctxt->inSubset == 0) &&
7096 (ctxt->sax != NULL) &&
7097 (ctxt->sax->reference != NULL)) {
7098 ctxt->sax->reference(ctxt->userData, name);
7099 }
7100 }
7101 ctxt->valid = 0;
7102 }
7103
7104 /*
7105 * [ WFC: Parsed Entity ]
7106 * An entity reference must not contain the name of an
7107 * unparsed entity
7108 */
7109 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7110 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7111 "Entity reference to unparsed entity %s\n", name);
7112 }
7113
7114 /*
7115 * [ WFC: No External Entity References ]
7116 * Attribute values cannot contain direct or indirect
7117 * entity references to external entities.
7118 */
7119 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7120 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7121 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7122 "Attribute references external entity '%s'\n", name);
7123 }
7124 /*
7125 * [ WFC: No < in Attribute Values ]
7126 * The replacement text of any entity referred to directly or
7127 * indirectly in an attribute value (other than "&lt;") must
7128 * not contain a <.
7129 */
7130 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7131 (ent != NULL) && (ent->content != NULL) &&
7132 (xmlStrchr(ent->content, '<'))) {
7133 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7134 "'<' in entity '%s' is not allowed in attributes values\n", name);
7135 }
7136
7137 /*
7138 * Internal check, no parameter entities here ...
7139 */
7140 else {
7141 switch (ent->etype) {
7142 case XML_INTERNAL_PARAMETER_ENTITY:
7143 case XML_EXTERNAL_PARAMETER_ENTITY:
7144 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7145 "Attempt to reference the parameter entity '%s'\n",
7146 name);
7147 break;
7148 default:
7149 break;
7150 }
7151 }
7152
7153 /*
7154 * [ WFC: No Recursion ]
7155 * A parsed entity must not contain a recursive reference
7156 * to itself, either directly or indirectly.
7157 * Done somewhere else
7158 */
Owen Taylor3473f882001-02-23 17:55:21 +00007159 return(ent);
7160}
7161
7162/**
7163 * xmlParseStringEntityRef:
7164 * @ctxt: an XML parser context
7165 * @str: a pointer to an index in the string
7166 *
7167 * parse ENTITY references declarations, but this version parses it from
7168 * a string value.
7169 *
7170 * [68] EntityRef ::= '&' Name ';'
7171 *
7172 * [ WFC: Entity Declared ]
7173 * In a document without any DTD, a document with only an internal DTD
7174 * subset which contains no parameter entity references, or a document
7175 * with "standalone='yes'", the Name given in the entity reference
7176 * must match that in an entity declaration, except that well-formed
7177 * documents need not declare any of the following entities: amp, lt,
7178 * gt, apos, quot. The declaration of a parameter entity must precede
7179 * any reference to it. Similarly, the declaration of a general entity
7180 * must precede any reference to it which appears in a default value in an
7181 * attribute-list declaration. Note that if entities are declared in the
7182 * external subset or in external parameter entities, a non-validating
7183 * processor is not obligated to read and process their declarations;
7184 * for such documents, the rule that an entity must be declared is a
7185 * well-formedness constraint only if standalone='yes'.
7186 *
7187 * [ WFC: Parsed Entity ]
7188 * An entity reference must not contain the name of an unparsed entity
7189 *
7190 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7191 * is updated to the current location in the string.
7192 */
7193xmlEntityPtr
7194xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7195 xmlChar *name;
7196 const xmlChar *ptr;
7197 xmlChar cur;
7198 xmlEntityPtr ent = NULL;
7199
7200 if ((str == NULL) || (*str == NULL))
7201 return(NULL);
7202 ptr = *str;
7203 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007204 if (cur != '&')
7205 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007206
Daniel Veillard0161e632008-08-28 15:36:32 +00007207 ptr++;
7208 cur = *ptr;
7209 name = xmlParseStringName(ctxt, &ptr);
7210 if (name == NULL) {
7211 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7212 "xmlParseStringEntityRef: no name\n");
7213 *str = ptr;
7214 return(NULL);
7215 }
7216 if (*ptr != ';') {
7217 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7218 *str = ptr;
7219 return(NULL);
7220 }
7221 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007222
Owen Taylor3473f882001-02-23 17:55:21 +00007223
Daniel Veillard0161e632008-08-28 15:36:32 +00007224 /*
7225 * Predefined entites override any extra definition
7226 */
7227 ent = xmlGetPredefinedEntity(name);
7228 if (ent != NULL)
7229 return(ent);
Owen Taylor3473f882001-02-23 17:55:21 +00007230
Daniel Veillard0161e632008-08-28 15:36:32 +00007231 /*
7232 * Increate the number of entity references parsed
7233 */
7234 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007235
Daniel Veillard0161e632008-08-28 15:36:32 +00007236 /*
7237 * Ask first SAX for entity resolution, otherwise try the
7238 * entities which may have stored in the parser context.
7239 */
7240 if (ctxt->sax != NULL) {
7241 if (ctxt->sax->getEntity != NULL)
7242 ent = ctxt->sax->getEntity(ctxt->userData, name);
7243 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7244 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007245 }
7246 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007247
7248 /*
7249 * [ WFC: Entity Declared ]
7250 * In a document without any DTD, a document with only an
7251 * internal DTD subset which contains no parameter entity
7252 * references, or a document with "standalone='yes'", the
7253 * Name given in the entity reference must match that in an
7254 * entity declaration, except that well-formed documents
7255 * need not declare any of the following entities: amp, lt,
7256 * gt, apos, quot.
7257 * The declaration of a parameter entity must precede any
7258 * reference to it.
7259 * Similarly, the declaration of a general entity must
7260 * precede any reference to it which appears in a default
7261 * value in an attribute-list declaration. Note that if
7262 * entities are declared in the external subset or in
7263 * external parameter entities, a non-validating processor
7264 * is not obligated to read and process their declarations;
7265 * for such documents, the rule that an entity must be
7266 * declared is a well-formedness constraint only if
7267 * standalone='yes'.
7268 */
7269 if (ent == NULL) {
7270 if ((ctxt->standalone == 1) ||
7271 ((ctxt->hasExternalSubset == 0) &&
7272 (ctxt->hasPErefs == 0))) {
7273 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7274 "Entity '%s' not defined\n", name);
7275 } else {
7276 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7277 "Entity '%s' not defined\n",
7278 name);
7279 }
7280 /* TODO ? check regressions ctxt->valid = 0; */
7281 }
7282
7283 /*
7284 * [ WFC: Parsed Entity ]
7285 * An entity reference must not contain the name of an
7286 * unparsed entity
7287 */
7288 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7289 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7290 "Entity reference to unparsed entity %s\n", name);
7291 }
7292
7293 /*
7294 * [ WFC: No External Entity References ]
7295 * Attribute values cannot contain direct or indirect
7296 * entity references to external entities.
7297 */
7298 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7299 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7300 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7301 "Attribute references external entity '%s'\n", name);
7302 }
7303 /*
7304 * [ WFC: No < in Attribute Values ]
7305 * The replacement text of any entity referred to directly or
7306 * indirectly in an attribute value (other than "&lt;") must
7307 * not contain a <.
7308 */
7309 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7310 (ent != NULL) && (ent->content != NULL) &&
7311 (xmlStrchr(ent->content, '<'))) {
7312 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7313 "'<' in entity '%s' is not allowed in attributes values\n",
7314 name);
7315 }
7316
7317 /*
7318 * Internal check, no parameter entities here ...
7319 */
7320 else {
7321 switch (ent->etype) {
7322 case XML_INTERNAL_PARAMETER_ENTITY:
7323 case XML_EXTERNAL_PARAMETER_ENTITY:
7324 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7325 "Attempt to reference the parameter entity '%s'\n",
7326 name);
7327 break;
7328 default:
7329 break;
7330 }
7331 }
7332
7333 /*
7334 * [ WFC: No Recursion ]
7335 * A parsed entity must not contain a recursive reference
7336 * to itself, either directly or indirectly.
7337 * Done somewhere else
7338 */
7339
7340 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007341 *str = ptr;
7342 return(ent);
7343}
7344
7345/**
7346 * xmlParsePEReference:
7347 * @ctxt: an XML parser context
7348 *
7349 * parse PEReference declarations
7350 * The entity content is handled directly by pushing it's content as
7351 * a new input stream.
7352 *
7353 * [69] PEReference ::= '%' Name ';'
7354 *
7355 * [ WFC: No Recursion ]
7356 * A parsed entity must not contain a recursive
7357 * reference to itself, either directly or indirectly.
7358 *
7359 * [ WFC: Entity Declared ]
7360 * In a document without any DTD, a document with only an internal DTD
7361 * subset which contains no parameter entity references, or a document
7362 * with "standalone='yes'", ... ... The declaration of a parameter
7363 * entity must precede any reference to it...
7364 *
7365 * [ VC: Entity Declared ]
7366 * In a document with an external subset or external parameter entities
7367 * with "standalone='no'", ... ... The declaration of a parameter entity
7368 * must precede any reference to it...
7369 *
7370 * [ WFC: In DTD ]
7371 * Parameter-entity references may only appear in the DTD.
7372 * NOTE: misleading but this is handled.
7373 */
7374void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007375xmlParsePEReference(xmlParserCtxtPtr ctxt)
7376{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007377 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007378 xmlEntityPtr entity = NULL;
7379 xmlParserInputPtr input;
7380
Daniel Veillard0161e632008-08-28 15:36:32 +00007381 if (RAW != '%')
7382 return;
7383 NEXT;
7384 name = xmlParseName(ctxt);
7385 if (name == NULL) {
7386 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7387 "xmlParsePEReference: no name\n");
7388 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007389 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007390 if (RAW != ';') {
7391 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7392 return;
7393 }
7394
7395 NEXT;
7396
7397 /*
7398 * Increate the number of entity references parsed
7399 */
7400 ctxt->nbentities++;
7401
7402 /*
7403 * Request the entity from SAX
7404 */
7405 if ((ctxt->sax != NULL) &&
7406 (ctxt->sax->getParameterEntity != NULL))
7407 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7408 name);
7409 if (entity == NULL) {
7410 /*
7411 * [ WFC: Entity Declared ]
7412 * In a document without any DTD, a document with only an
7413 * internal DTD subset which contains no parameter entity
7414 * references, or a document with "standalone='yes'", ...
7415 * ... The declaration of a parameter entity must precede
7416 * any reference to it...
7417 */
7418 if ((ctxt->standalone == 1) ||
7419 ((ctxt->hasExternalSubset == 0) &&
7420 (ctxt->hasPErefs == 0))) {
7421 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7422 "PEReference: %%%s; not found\n",
7423 name);
7424 } else {
7425 /*
7426 * [ VC: Entity Declared ]
7427 * In a document with an external subset or external
7428 * parameter entities with "standalone='no'", ...
7429 * ... The declaration of a parameter entity must
7430 * precede any reference to it...
7431 */
7432 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7433 "PEReference: %%%s; not found\n",
7434 name, NULL);
7435 ctxt->valid = 0;
7436 }
7437 } else {
7438 /*
7439 * Internal checking in case the entity quest barfed
7440 */
7441 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7442 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7443 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7444 "Internal: %%%s; is not a parameter entity\n",
7445 name, NULL);
7446 } else if (ctxt->input->free != deallocblankswrapper) {
7447 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7448 if (xmlPushInput(ctxt, input) < 0)
7449 return;
7450 } else {
7451 /*
7452 * TODO !!!
7453 * handle the extra spaces added before and after
7454 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7455 */
7456 input = xmlNewEntityInputStream(ctxt, entity);
7457 if (xmlPushInput(ctxt, input) < 0)
7458 return;
7459 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7460 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7461 (IS_BLANK_CH(NXT(5)))) {
7462 xmlParseTextDecl(ctxt);
7463 if (ctxt->errNo ==
7464 XML_ERR_UNSUPPORTED_ENCODING) {
7465 /*
7466 * The XML REC instructs us to stop parsing
7467 * right here
7468 */
7469 ctxt->instate = XML_PARSER_EOF;
7470 return;
7471 }
7472 }
7473 }
7474 }
7475 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007476}
7477
7478/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007479 * xmlLoadEntityContent:
7480 * @ctxt: an XML parser context
7481 * @entity: an unloaded system entity
7482 *
7483 * Load the original content of the given system entity from the
7484 * ExternalID/SystemID given. This is to be used for Included in Literal
7485 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7486 *
7487 * Returns 0 in case of success and -1 in case of failure
7488 */
7489static int
7490xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7491 xmlParserInputPtr input;
7492 xmlBufferPtr buf;
7493 int l, c;
7494 int count = 0;
7495
7496 if ((ctxt == NULL) || (entity == NULL) ||
7497 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7498 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7499 (entity->content != NULL)) {
7500 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7501 "xmlLoadEntityContent parameter error");
7502 return(-1);
7503 }
7504
7505 if (xmlParserDebugEntities)
7506 xmlGenericError(xmlGenericErrorContext,
7507 "Reading %s entity content input\n", entity->name);
7508
7509 buf = xmlBufferCreate();
7510 if (buf == NULL) {
7511 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7512 "xmlLoadEntityContent parameter error");
7513 return(-1);
7514 }
7515
7516 input = xmlNewEntityInputStream(ctxt, entity);
7517 if (input == NULL) {
7518 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7519 "xmlLoadEntityContent input error");
7520 xmlBufferFree(buf);
7521 return(-1);
7522 }
7523
7524 /*
7525 * Push the entity as the current input, read char by char
7526 * saving to the buffer until the end of the entity or an error
7527 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007528 if (xmlPushInput(ctxt, input) < 0) {
7529 xmlBufferFree(buf);
7530 return(-1);
7531 }
7532
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007533 GROW;
7534 c = CUR_CHAR(l);
7535 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7536 (IS_CHAR(c))) {
7537 xmlBufferAdd(buf, ctxt->input->cur, l);
7538 if (count++ > 100) {
7539 count = 0;
7540 GROW;
7541 }
7542 NEXTL(l);
7543 c = CUR_CHAR(l);
7544 }
7545
7546 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7547 xmlPopInput(ctxt);
7548 } else if (!IS_CHAR(c)) {
7549 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7550 "xmlLoadEntityContent: invalid char value %d\n",
7551 c);
7552 xmlBufferFree(buf);
7553 return(-1);
7554 }
7555 entity->content = buf->content;
7556 buf->content = NULL;
7557 xmlBufferFree(buf);
7558
7559 return(0);
7560}
7561
7562/**
Owen Taylor3473f882001-02-23 17:55:21 +00007563 * xmlParseStringPEReference:
7564 * @ctxt: an XML parser context
7565 * @str: a pointer to an index in the string
7566 *
7567 * parse PEReference declarations
7568 *
7569 * [69] PEReference ::= '%' Name ';'
7570 *
7571 * [ WFC: No Recursion ]
7572 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007573 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007574 *
7575 * [ WFC: Entity Declared ]
7576 * In a document without any DTD, a document with only an internal DTD
7577 * subset which contains no parameter entity references, or a document
7578 * with "standalone='yes'", ... ... The declaration of a parameter
7579 * entity must precede any reference to it...
7580 *
7581 * [ VC: Entity Declared ]
7582 * In a document with an external subset or external parameter entities
7583 * with "standalone='no'", ... ... The declaration of a parameter entity
7584 * must precede any reference to it...
7585 *
7586 * [ WFC: In DTD ]
7587 * Parameter-entity references may only appear in the DTD.
7588 * NOTE: misleading but this is handled.
7589 *
7590 * Returns the string of the entity content.
7591 * str is updated to the current value of the index
7592 */
7593xmlEntityPtr
7594xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7595 const xmlChar *ptr;
7596 xmlChar cur;
7597 xmlChar *name;
7598 xmlEntityPtr entity = NULL;
7599
7600 if ((str == NULL) || (*str == NULL)) return(NULL);
7601 ptr = *str;
7602 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007603 if (cur != '%')
7604 return(NULL);
7605 ptr++;
7606 cur = *ptr;
7607 name = xmlParseStringName(ctxt, &ptr);
7608 if (name == NULL) {
7609 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7610 "xmlParseStringPEReference: no name\n");
7611 *str = ptr;
7612 return(NULL);
7613 }
7614 cur = *ptr;
7615 if (cur != ';') {
7616 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7617 xmlFree(name);
7618 *str = ptr;
7619 return(NULL);
7620 }
7621 ptr++;
7622
7623 /*
7624 * Increate the number of entity references parsed
7625 */
7626 ctxt->nbentities++;
7627
7628 /*
7629 * Request the entity from SAX
7630 */
7631 if ((ctxt->sax != NULL) &&
7632 (ctxt->sax->getParameterEntity != NULL))
7633 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7634 name);
7635 if (entity == NULL) {
7636 /*
7637 * [ WFC: Entity Declared ]
7638 * In a document without any DTD, a document with only an
7639 * internal DTD subset which contains no parameter entity
7640 * references, or a document with "standalone='yes'", ...
7641 * ... The declaration of a parameter entity must precede
7642 * any reference to it...
7643 */
7644 if ((ctxt->standalone == 1) ||
7645 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7646 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7647 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007648 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007649 /*
7650 * [ VC: Entity Declared ]
7651 * In a document with an external subset or external
7652 * parameter entities with "standalone='no'", ...
7653 * ... The declaration of a parameter entity must
7654 * precede any reference to it...
7655 */
7656 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7657 "PEReference: %%%s; not found\n",
7658 name, NULL);
7659 ctxt->valid = 0;
7660 }
7661 } else {
7662 /*
7663 * Internal checking in case the entity quest barfed
7664 */
7665 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7666 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7667 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7668 "%%%s; is not a parameter entity\n",
7669 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007670 }
7671 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007672 ctxt->hasPErefs = 1;
7673 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007674 *str = ptr;
7675 return(entity);
7676}
7677
7678/**
7679 * xmlParseDocTypeDecl:
7680 * @ctxt: an XML parser context
7681 *
7682 * parse a DOCTYPE declaration
7683 *
7684 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7685 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7686 *
7687 * [ VC: Root Element Type ]
7688 * The Name in the document type declaration must match the element
7689 * type of the root element.
7690 */
7691
7692void
7693xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007694 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007695 xmlChar *ExternalID = NULL;
7696 xmlChar *URI = NULL;
7697
7698 /*
7699 * We know that '<!DOCTYPE' has been detected.
7700 */
7701 SKIP(9);
7702
7703 SKIP_BLANKS;
7704
7705 /*
7706 * Parse the DOCTYPE name.
7707 */
7708 name = xmlParseName(ctxt);
7709 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007710 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7711 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007712 }
7713 ctxt->intSubName = name;
7714
7715 SKIP_BLANKS;
7716
7717 /*
7718 * Check for SystemID and ExternalID
7719 */
7720 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7721
7722 if ((URI != NULL) || (ExternalID != NULL)) {
7723 ctxt->hasExternalSubset = 1;
7724 }
7725 ctxt->extSubURI = URI;
7726 ctxt->extSubSystem = ExternalID;
7727
7728 SKIP_BLANKS;
7729
7730 /*
7731 * Create and update the internal subset.
7732 */
7733 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7734 (!ctxt->disableSAX))
7735 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7736
7737 /*
7738 * Is there any internal subset declarations ?
7739 * they are handled separately in xmlParseInternalSubset()
7740 */
7741 if (RAW == '[')
7742 return;
7743
7744 /*
7745 * We should be at the end of the DOCTYPE declaration.
7746 */
7747 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007748 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007749 }
7750 NEXT;
7751}
7752
7753/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007754 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007755 * @ctxt: an XML parser context
7756 *
7757 * parse the internal subset declaration
7758 *
7759 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7760 */
7761
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007762static void
Owen Taylor3473f882001-02-23 17:55:21 +00007763xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7764 /*
7765 * Is there any DTD definition ?
7766 */
7767 if (RAW == '[') {
7768 ctxt->instate = XML_PARSER_DTD;
7769 NEXT;
7770 /*
7771 * Parse the succession of Markup declarations and
7772 * PEReferences.
7773 * Subsequence (markupdecl | PEReference | S)*
7774 */
7775 while (RAW != ']') {
7776 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007777 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007778
7779 SKIP_BLANKS;
7780 xmlParseMarkupDecl(ctxt);
7781 xmlParsePEReference(ctxt);
7782
7783 /*
7784 * Pop-up of finished entities.
7785 */
7786 while ((RAW == 0) && (ctxt->inputNr > 1))
7787 xmlPopInput(ctxt);
7788
7789 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007790 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007791 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007792 break;
7793 }
7794 }
7795 if (RAW == ']') {
7796 NEXT;
7797 SKIP_BLANKS;
7798 }
7799 }
7800
7801 /*
7802 * We should be at the end of the DOCTYPE declaration.
7803 */
7804 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007805 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007806 }
7807 NEXT;
7808}
7809
Daniel Veillard81273902003-09-30 00:43:48 +00007810#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007811/**
7812 * xmlParseAttribute:
7813 * @ctxt: an XML parser context
7814 * @value: a xmlChar ** used to store the value of the attribute
7815 *
7816 * parse an attribute
7817 *
7818 * [41] Attribute ::= Name Eq AttValue
7819 *
7820 * [ WFC: No External Entity References ]
7821 * Attribute values cannot contain direct or indirect entity references
7822 * to external entities.
7823 *
7824 * [ WFC: No < in Attribute Values ]
7825 * The replacement text of any entity referred to directly or indirectly in
7826 * an attribute value (other than "&lt;") must not contain a <.
7827 *
7828 * [ VC: Attribute Value Type ]
7829 * The attribute must have been declared; the value must be of the type
7830 * declared for it.
7831 *
7832 * [25] Eq ::= S? '=' S?
7833 *
7834 * With namespace:
7835 *
7836 * [NS 11] Attribute ::= QName Eq AttValue
7837 *
7838 * Also the case QName == xmlns:??? is handled independently as a namespace
7839 * definition.
7840 *
7841 * Returns the attribute name, and the value in *value.
7842 */
7843
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007844const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007845xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007846 const xmlChar *name;
7847 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007848
7849 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007850 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007851 name = xmlParseName(ctxt);
7852 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007853 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007854 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007855 return(NULL);
7856 }
7857
7858 /*
7859 * read the value
7860 */
7861 SKIP_BLANKS;
7862 if (RAW == '=') {
7863 NEXT;
7864 SKIP_BLANKS;
7865 val = xmlParseAttValue(ctxt);
7866 ctxt->instate = XML_PARSER_CONTENT;
7867 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007868 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007869 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007870 return(NULL);
7871 }
7872
7873 /*
7874 * Check that xml:lang conforms to the specification
7875 * No more registered as an error, just generate a warning now
7876 * since this was deprecated in XML second edition
7877 */
7878 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7879 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007880 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7881 "Malformed value for xml:lang : %s\n",
7882 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007883 }
7884 }
7885
7886 /*
7887 * Check that xml:space conforms to the specification
7888 */
7889 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7890 if (xmlStrEqual(val, BAD_CAST "default"))
7891 *(ctxt->space) = 0;
7892 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7893 *(ctxt->space) = 1;
7894 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007895 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007896"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007897 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007898 }
7899 }
7900
7901 *value = val;
7902 return(name);
7903}
7904
7905/**
7906 * xmlParseStartTag:
7907 * @ctxt: an XML parser context
7908 *
7909 * parse a start of tag either for rule element or
7910 * EmptyElement. In both case we don't parse the tag closing chars.
7911 *
7912 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7913 *
7914 * [ WFC: Unique Att Spec ]
7915 * No attribute name may appear more than once in the same start-tag or
7916 * empty-element tag.
7917 *
7918 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7919 *
7920 * [ WFC: Unique Att Spec ]
7921 * No attribute name may appear more than once in the same start-tag or
7922 * empty-element tag.
7923 *
7924 * With namespace:
7925 *
7926 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7927 *
7928 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7929 *
7930 * Returns the element name parsed
7931 */
7932
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007933const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007934xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007935 const xmlChar *name;
7936 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007937 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007938 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007939 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007940 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007941 int i;
7942
7943 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007944 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007945
7946 name = xmlParseName(ctxt);
7947 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007948 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007949 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007950 return(NULL);
7951 }
7952
7953 /*
7954 * Now parse the attributes, it ends up with the ending
7955 *
7956 * (S Attribute)* S?
7957 */
7958 SKIP_BLANKS;
7959 GROW;
7960
Daniel Veillard21a0f912001-02-25 19:54:14 +00007961 while ((RAW != '>') &&
7962 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007963 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007964 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007965 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007966
7967 attname = xmlParseAttribute(ctxt, &attvalue);
7968 if ((attname != NULL) && (attvalue != NULL)) {
7969 /*
7970 * [ WFC: Unique Att Spec ]
7971 * No attribute name may appear more than once in the same
7972 * start-tag or empty-element tag.
7973 */
7974 for (i = 0; i < nbatts;i += 2) {
7975 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007976 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007977 xmlFree(attvalue);
7978 goto failed;
7979 }
7980 }
Owen Taylor3473f882001-02-23 17:55:21 +00007981 /*
7982 * Add the pair to atts
7983 */
7984 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007985 maxatts = 22; /* allow for 10 attrs by default */
7986 atts = (const xmlChar **)
7987 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007988 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007989 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007990 if (attvalue != NULL)
7991 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007992 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007993 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007994 ctxt->atts = atts;
7995 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007996 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007997 const xmlChar **n;
7998
Owen Taylor3473f882001-02-23 17:55:21 +00007999 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008000 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008001 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008002 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008003 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008004 if (attvalue != NULL)
8005 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008006 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008007 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008008 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008009 ctxt->atts = atts;
8010 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008011 }
8012 atts[nbatts++] = attname;
8013 atts[nbatts++] = attvalue;
8014 atts[nbatts] = NULL;
8015 atts[nbatts + 1] = NULL;
8016 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008017 if (attvalue != NULL)
8018 xmlFree(attvalue);
8019 }
8020
8021failed:
8022
Daniel Veillard3772de32002-12-17 10:31:45 +00008023 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008024 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8025 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008026 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008027 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8028 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008029 }
8030 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008031 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8032 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008033 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8034 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008035 break;
8036 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008037 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008038 GROW;
8039 }
8040
8041 /*
8042 * SAX: Start of Element !
8043 */
8044 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008045 (!ctxt->disableSAX)) {
8046 if (nbatts > 0)
8047 ctxt->sax->startElement(ctxt->userData, name, atts);
8048 else
8049 ctxt->sax->startElement(ctxt->userData, name, NULL);
8050 }
Owen Taylor3473f882001-02-23 17:55:21 +00008051
8052 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008053 /* Free only the content strings */
8054 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008055 if (atts[i] != NULL)
8056 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008057 }
8058 return(name);
8059}
8060
8061/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008062 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008063 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008064 * @line: line of the start tag
8065 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008066 *
8067 * parse an end of tag
8068 *
8069 * [42] ETag ::= '</' Name S? '>'
8070 *
8071 * With namespace
8072 *
8073 * [NS 9] ETag ::= '</' QName S? '>'
8074 */
8075
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008076static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008077xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008078 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008079
8080 GROW;
8081 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008082 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008083 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008084 return;
8085 }
8086 SKIP(2);
8087
Daniel Veillard46de64e2002-05-29 08:21:33 +00008088 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008089
8090 /*
8091 * We should definitely be at the ending "S? '>'" part
8092 */
8093 GROW;
8094 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008095 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008096 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008097 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008098 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008099
8100 /*
8101 * [ WFC: Element Type Match ]
8102 * The Name in an element's end-tag must match the element type in the
8103 * start-tag.
8104 *
8105 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008106 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008107 if (name == NULL) name = BAD_CAST "unparseable";
8108 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008109 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008110 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008111 }
8112
8113 /*
8114 * SAX: End of Tag
8115 */
8116 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8117 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008118 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008119
Daniel Veillarde57ec792003-09-10 10:50:59 +00008120 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008121 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008122 return;
8123}
8124
8125/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008126 * xmlParseEndTag:
8127 * @ctxt: an XML parser context
8128 *
8129 * parse an end of tag
8130 *
8131 * [42] ETag ::= '</' Name S? '>'
8132 *
8133 * With namespace
8134 *
8135 * [NS 9] ETag ::= '</' QName S? '>'
8136 */
8137
8138void
8139xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008140 xmlParseEndTag1(ctxt, 0);
8141}
Daniel Veillard81273902003-09-30 00:43:48 +00008142#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008143
8144/************************************************************************
8145 * *
8146 * SAX 2 specific operations *
8147 * *
8148 ************************************************************************/
8149
Daniel Veillard0fb18932003-09-07 09:14:37 +00008150/*
8151 * xmlGetNamespace:
8152 * @ctxt: an XML parser context
8153 * @prefix: the prefix to lookup
8154 *
8155 * Lookup the namespace name for the @prefix (which ca be NULL)
8156 * The prefix must come from the @ctxt->dict dictionnary
8157 *
8158 * Returns the namespace name or NULL if not bound
8159 */
8160static const xmlChar *
8161xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8162 int i;
8163
Daniel Veillarde57ec792003-09-10 10:50:59 +00008164 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008165 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008166 if (ctxt->nsTab[i] == prefix) {
8167 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8168 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008169 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008170 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008171 return(NULL);
8172}
8173
8174/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008175 * xmlParseQName:
8176 * @ctxt: an XML parser context
8177 * @prefix: pointer to store the prefix part
8178 *
8179 * parse an XML Namespace QName
8180 *
8181 * [6] QName ::= (Prefix ':')? LocalPart
8182 * [7] Prefix ::= NCName
8183 * [8] LocalPart ::= NCName
8184 *
8185 * Returns the Name parsed or NULL
8186 */
8187
8188static const xmlChar *
8189xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8190 const xmlChar *l, *p;
8191
8192 GROW;
8193
8194 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008195 if (l == NULL) {
8196 if (CUR == ':') {
8197 l = xmlParseName(ctxt);
8198 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008199 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8200 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008201 *prefix = NULL;
8202 return(l);
8203 }
8204 }
8205 return(NULL);
8206 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008207 if (CUR == ':') {
8208 NEXT;
8209 p = l;
8210 l = xmlParseNCName(ctxt);
8211 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008212 xmlChar *tmp;
8213
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008214 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8215 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008216 l = xmlParseNmtoken(ctxt);
8217 if (l == NULL)
8218 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8219 else {
8220 tmp = xmlBuildQName(l, p, NULL, 0);
8221 xmlFree((char *)l);
8222 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008223 p = xmlDictLookup(ctxt->dict, tmp, -1);
8224 if (tmp != NULL) xmlFree(tmp);
8225 *prefix = NULL;
8226 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008227 }
8228 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008229 xmlChar *tmp;
8230
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008231 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8232 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008233 NEXT;
8234 tmp = (xmlChar *) xmlParseName(ctxt);
8235 if (tmp != NULL) {
8236 tmp = xmlBuildQName(tmp, l, NULL, 0);
8237 l = xmlDictLookup(ctxt->dict, tmp, -1);
8238 if (tmp != NULL) xmlFree(tmp);
8239 *prefix = p;
8240 return(l);
8241 }
8242 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8243 l = xmlDictLookup(ctxt->dict, tmp, -1);
8244 if (tmp != NULL) xmlFree(tmp);
8245 *prefix = p;
8246 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008247 }
8248 *prefix = p;
8249 } else
8250 *prefix = NULL;
8251 return(l);
8252}
8253
8254/**
8255 * xmlParseQNameAndCompare:
8256 * @ctxt: an XML parser context
8257 * @name: the localname
8258 * @prefix: the prefix, if any.
8259 *
8260 * parse an XML name and compares for match
8261 * (specialized for endtag parsing)
8262 *
8263 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8264 * and the name for mismatch
8265 */
8266
8267static const xmlChar *
8268xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8269 xmlChar const *prefix) {
8270 const xmlChar *cmp = name;
8271 const xmlChar *in;
8272 const xmlChar *ret;
8273 const xmlChar *prefix2;
8274
8275 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8276
8277 GROW;
8278 in = ctxt->input->cur;
8279
8280 cmp = prefix;
8281 while (*in != 0 && *in == *cmp) {
8282 ++in;
8283 ++cmp;
8284 }
8285 if ((*cmp == 0) && (*in == ':')) {
8286 in++;
8287 cmp = name;
8288 while (*in != 0 && *in == *cmp) {
8289 ++in;
8290 ++cmp;
8291 }
William M. Brack76e95df2003-10-18 16:20:14 +00008292 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008293 /* success */
8294 ctxt->input->cur = in;
8295 return((const xmlChar*) 1);
8296 }
8297 }
8298 /*
8299 * all strings coms from the dictionary, equality can be done directly
8300 */
8301 ret = xmlParseQName (ctxt, &prefix2);
8302 if ((ret == name) && (prefix == prefix2))
8303 return((const xmlChar*) 1);
8304 return ret;
8305}
8306
8307/**
8308 * xmlParseAttValueInternal:
8309 * @ctxt: an XML parser context
8310 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008311 * @alloc: whether the attribute was reallocated as a new string
8312 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008313 *
8314 * parse a value for an attribute.
8315 * NOTE: if no normalization is needed, the routine will return pointers
8316 * directly from the data buffer.
8317 *
8318 * 3.3.3 Attribute-Value Normalization:
8319 * Before the value of an attribute is passed to the application or
8320 * checked for validity, the XML processor must normalize it as follows:
8321 * - a character reference is processed by appending the referenced
8322 * character to the attribute value
8323 * - an entity reference is processed by recursively processing the
8324 * replacement text of the entity
8325 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8326 * appending #x20 to the normalized value, except that only a single
8327 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8328 * parsed entity or the literal entity value of an internal parsed entity
8329 * - other characters are processed by appending them to the normalized value
8330 * If the declared value is not CDATA, then the XML processor must further
8331 * process the normalized attribute value by discarding any leading and
8332 * trailing space (#x20) characters, and by replacing sequences of space
8333 * (#x20) characters by a single space (#x20) character.
8334 * All attributes for which no declaration has been read should be treated
8335 * by a non-validating parser as if declared CDATA.
8336 *
8337 * Returns the AttValue parsed or NULL. The value has to be freed by the
8338 * caller if it was copied, this can be detected by val[*len] == 0.
8339 */
8340
8341static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008342xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8343 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008344{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008345 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008346 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008347 xmlChar *ret = NULL;
8348
8349 GROW;
8350 in = (xmlChar *) CUR_PTR;
8351 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008352 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008353 return (NULL);
8354 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008355 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008356
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008357 /*
8358 * try to handle in this routine the most common case where no
8359 * allocation of a new string is required and where content is
8360 * pure ASCII.
8361 */
8362 limit = *in++;
8363 end = ctxt->input->end;
8364 start = in;
8365 if (in >= end) {
8366 const xmlChar *oldbase = ctxt->input->base;
8367 GROW;
8368 if (oldbase != ctxt->input->base) {
8369 long delta = ctxt->input->base - oldbase;
8370 start = start + delta;
8371 in = in + delta;
8372 }
8373 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008374 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008375 if (normalize) {
8376 /*
8377 * Skip any leading spaces
8378 */
8379 while ((in < end) && (*in != limit) &&
8380 ((*in == 0x20) || (*in == 0x9) ||
8381 (*in == 0xA) || (*in == 0xD))) {
8382 in++;
8383 start = in;
8384 if (in >= end) {
8385 const xmlChar *oldbase = ctxt->input->base;
8386 GROW;
8387 if (oldbase != ctxt->input->base) {
8388 long delta = ctxt->input->base - oldbase;
8389 start = start + delta;
8390 in = in + delta;
8391 }
8392 end = ctxt->input->end;
8393 }
8394 }
8395 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8396 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8397 if ((*in++ == 0x20) && (*in == 0x20)) break;
8398 if (in >= end) {
8399 const xmlChar *oldbase = ctxt->input->base;
8400 GROW;
8401 if (oldbase != ctxt->input->base) {
8402 long delta = ctxt->input->base - oldbase;
8403 start = start + delta;
8404 in = in + delta;
8405 }
8406 end = ctxt->input->end;
8407 }
8408 }
8409 last = in;
8410 /*
8411 * skip the trailing blanks
8412 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008413 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008414 while ((in < end) && (*in != limit) &&
8415 ((*in == 0x20) || (*in == 0x9) ||
8416 (*in == 0xA) || (*in == 0xD))) {
8417 in++;
8418 if (in >= end) {
8419 const xmlChar *oldbase = ctxt->input->base;
8420 GROW;
8421 if (oldbase != ctxt->input->base) {
8422 long delta = ctxt->input->base - oldbase;
8423 start = start + delta;
8424 in = in + delta;
8425 last = last + delta;
8426 }
8427 end = ctxt->input->end;
8428 }
8429 }
8430 if (*in != limit) goto need_complex;
8431 } else {
8432 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8433 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8434 in++;
8435 if (in >= end) {
8436 const xmlChar *oldbase = ctxt->input->base;
8437 GROW;
8438 if (oldbase != ctxt->input->base) {
8439 long delta = ctxt->input->base - oldbase;
8440 start = start + delta;
8441 in = in + delta;
8442 }
8443 end = ctxt->input->end;
8444 }
8445 }
8446 last = in;
8447 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008448 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008449 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008450 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008451 *len = last - start;
8452 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008453 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008454 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008455 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008456 }
8457 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008458 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008459 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008460need_complex:
8461 if (alloc) *alloc = 1;
8462 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008463}
8464
8465/**
8466 * xmlParseAttribute2:
8467 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008468 * @pref: the element prefix
8469 * @elem: the element name
8470 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008471 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008472 * @len: an int * to save the length of the attribute
8473 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008474 *
8475 * parse an attribute in the new SAX2 framework.
8476 *
8477 * Returns the attribute name, and the value in *value, .
8478 */
8479
8480static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008481xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008482 const xmlChar * pref, const xmlChar * elem,
8483 const xmlChar ** prefix, xmlChar ** value,
8484 int *len, int *alloc)
8485{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008486 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008487 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008488 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008489
8490 *value = NULL;
8491 GROW;
8492 name = xmlParseQName(ctxt, prefix);
8493 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008494 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8495 "error parsing attribute name\n");
8496 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008497 }
8498
8499 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008500 * get the type if needed
8501 */
8502 if (ctxt->attsSpecial != NULL) {
8503 int type;
8504
8505 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008506 pref, elem, *prefix, name);
8507 if (type != 0)
8508 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008509 }
8510
8511 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008512 * read the value
8513 */
8514 SKIP_BLANKS;
8515 if (RAW == '=') {
8516 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008517 SKIP_BLANKS;
8518 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8519 if (normalize) {
8520 /*
8521 * Sometimes a second normalisation pass for spaces is needed
8522 * but that only happens if charrefs or entities refernces
8523 * have been used in the attribute value, i.e. the attribute
8524 * value have been extracted in an allocated string already.
8525 */
8526 if (*alloc) {
8527 const xmlChar *val2;
8528
8529 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008530 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008531 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008532 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008533 }
8534 }
8535 }
8536 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008537 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008538 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8539 "Specification mandate value for attribute %s\n",
8540 name);
8541 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008542 }
8543
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008544 if (*prefix == ctxt->str_xml) {
8545 /*
8546 * Check that xml:lang conforms to the specification
8547 * No more registered as an error, just generate a warning now
8548 * since this was deprecated in XML second edition
8549 */
8550 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8551 internal_val = xmlStrndup(val, *len);
8552 if (!xmlCheckLanguageID(internal_val)) {
8553 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8554 "Malformed value for xml:lang : %s\n",
8555 internal_val, NULL);
8556 }
8557 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008558
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008559 /*
8560 * Check that xml:space conforms to the specification
8561 */
8562 if (xmlStrEqual(name, BAD_CAST "space")) {
8563 internal_val = xmlStrndup(val, *len);
8564 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8565 *(ctxt->space) = 0;
8566 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8567 *(ctxt->space) = 1;
8568 else {
8569 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8570 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8571 internal_val, NULL);
8572 }
8573 }
8574 if (internal_val) {
8575 xmlFree(internal_val);
8576 }
8577 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008578
8579 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008580 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008581}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008582/**
8583 * xmlParseStartTag2:
8584 * @ctxt: an XML parser context
8585 *
8586 * parse a start of tag either for rule element or
8587 * EmptyElement. In both case we don't parse the tag closing chars.
8588 * This routine is called when running SAX2 parsing
8589 *
8590 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8591 *
8592 * [ WFC: Unique Att Spec ]
8593 * No attribute name may appear more than once in the same start-tag or
8594 * empty-element tag.
8595 *
8596 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8597 *
8598 * [ WFC: Unique Att Spec ]
8599 * No attribute name may appear more than once in the same start-tag or
8600 * empty-element tag.
8601 *
8602 * With namespace:
8603 *
8604 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8605 *
8606 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8607 *
8608 * Returns the element name parsed
8609 */
8610
8611static const xmlChar *
8612xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008613 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008614 const xmlChar *localname;
8615 const xmlChar *prefix;
8616 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008617 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008618 const xmlChar *nsname;
8619 xmlChar *attvalue;
8620 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008621 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008622 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008623 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008624 const xmlChar *base;
8625 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008626 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008627
8628 if (RAW != '<') return(NULL);
8629 NEXT1;
8630
8631 /*
8632 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8633 * point since the attribute values may be stored as pointers to
8634 * the buffer and calling SHRINK would destroy them !
8635 * The Shrinking is only possible once the full set of attribute
8636 * callbacks have been done.
8637 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008638reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008639 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008640 base = ctxt->input->base;
8641 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008642 oldline = ctxt->input->line;
8643 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008644 nbatts = 0;
8645 nratts = 0;
8646 nbdef = 0;
8647 nbNs = 0;
8648 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008649 /* Forget any namespaces added during an earlier parse of this element. */
8650 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008651
8652 localname = xmlParseQName(ctxt, &prefix);
8653 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008654 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8655 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008656 return(NULL);
8657 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008658 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008659
8660 /*
8661 * Now parse the attributes, it ends up with the ending
8662 *
8663 * (S Attribute)* S?
8664 */
8665 SKIP_BLANKS;
8666 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008667 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008668
8669 while ((RAW != '>') &&
8670 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008671 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008672 const xmlChar *q = CUR_PTR;
8673 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008674 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008675
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008676 attname = xmlParseAttribute2(ctxt, prefix, localname,
8677 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008678 if (ctxt->input->base != base) {
8679 if ((attvalue != NULL) && (alloc != 0))
8680 xmlFree(attvalue);
8681 attvalue = NULL;
8682 goto base_changed;
8683 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008684 if ((attname != NULL) && (attvalue != NULL)) {
8685 if (len < 0) len = xmlStrlen(attvalue);
8686 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008687 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8688 xmlURIPtr uri;
8689
8690 if (*URL != 0) {
8691 uri = xmlParseURI((const char *) URL);
8692 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008693 xmlNsErr(ctxt, XML_WAR_NS_URI,
8694 "xmlns: '%s' is not a valid URI\n",
8695 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008696 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008697 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008698 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8699 "xmlns: URI %s is not absolute\n",
8700 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008701 }
8702 xmlFreeURI(uri);
8703 }
Daniel Veillard37334572008-07-31 08:20:02 +00008704 if (URL == ctxt->str_xml_ns) {
8705 if (attname != ctxt->str_xml) {
8706 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8707 "xml namespace URI cannot be the default namespace\n",
8708 NULL, NULL, NULL);
8709 }
8710 goto skip_default_ns;
8711 }
8712 if ((len == 29) &&
8713 (xmlStrEqual(URL,
8714 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8715 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8716 "reuse of the xmlns namespace name is forbidden\n",
8717 NULL, NULL, NULL);
8718 goto skip_default_ns;
8719 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008720 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008721 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008722 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008723 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008724 for (j = 1;j <= nbNs;j++)
8725 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8726 break;
8727 if (j <= nbNs)
8728 xmlErrAttributeDup(ctxt, NULL, attname);
8729 else
8730 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008731skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008732 if (alloc != 0) xmlFree(attvalue);
8733 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008734 continue;
8735 }
8736 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008737 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8738 xmlURIPtr uri;
8739
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008740 if (attname == ctxt->str_xml) {
8741 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008742 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8743 "xml namespace prefix mapped to wrong URI\n",
8744 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008745 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008746 /*
8747 * Do not keep a namespace definition node
8748 */
Daniel Veillard37334572008-07-31 08:20:02 +00008749 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008750 }
Daniel Veillard37334572008-07-31 08:20:02 +00008751 if (URL == ctxt->str_xml_ns) {
8752 if (attname != ctxt->str_xml) {
8753 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8754 "xml namespace URI mapped to wrong prefix\n",
8755 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008756 }
Daniel Veillard37334572008-07-31 08:20:02 +00008757 goto skip_ns;
8758 }
8759 if (attname == ctxt->str_xmlns) {
8760 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8761 "redefinition of the xmlns prefix is forbidden\n",
8762 NULL, NULL, NULL);
8763 goto skip_ns;
8764 }
8765 if ((len == 29) &&
8766 (xmlStrEqual(URL,
8767 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8768 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8769 "reuse of the xmlns namespace name is forbidden\n",
8770 NULL, NULL, NULL);
8771 goto skip_ns;
8772 }
8773 if ((URL == NULL) || (URL[0] == 0)) {
8774 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8775 "xmlns:%s: Empty XML namespace is not allowed\n",
8776 attname, NULL, NULL);
8777 goto skip_ns;
8778 } else {
8779 uri = xmlParseURI((const char *) URL);
8780 if (uri == NULL) {
8781 xmlNsErr(ctxt, XML_WAR_NS_URI,
8782 "xmlns:%s: '%s' is not a valid URI\n",
8783 attname, URL, NULL);
8784 } else {
8785 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8786 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8787 "xmlns:%s: URI %s is not absolute\n",
8788 attname, URL, NULL);
8789 }
8790 xmlFreeURI(uri);
8791 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008792 }
8793
Daniel Veillard0fb18932003-09-07 09:14:37 +00008794 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008795 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008796 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008797 for (j = 1;j <= nbNs;j++)
8798 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8799 break;
8800 if (j <= nbNs)
8801 xmlErrAttributeDup(ctxt, aprefix, attname);
8802 else
8803 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008804skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008805 if (alloc != 0) xmlFree(attvalue);
8806 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008807 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008808 continue;
8809 }
8810
8811 /*
8812 * Add the pair to atts
8813 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008814 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8815 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008816 if (attvalue[len] == 0)
8817 xmlFree(attvalue);
8818 goto failed;
8819 }
8820 maxatts = ctxt->maxatts;
8821 atts = ctxt->atts;
8822 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008823 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008824 atts[nbatts++] = attname;
8825 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008826 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008827 atts[nbatts++] = attvalue;
8828 attvalue += len;
8829 atts[nbatts++] = attvalue;
8830 /*
8831 * tag if some deallocation is needed
8832 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008833 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008834 } else {
8835 if ((attvalue != NULL) && (attvalue[len] == 0))
8836 xmlFree(attvalue);
8837 }
8838
Daniel Veillard37334572008-07-31 08:20:02 +00008839failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008840
8841 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008842 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008843 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8844 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008845 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008846 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8847 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008848 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008849 }
8850 SKIP_BLANKS;
8851 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8852 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008853 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008854 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008855 break;
8856 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008857 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008858 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859 }
8860
Daniel Veillard0fb18932003-09-07 09:14:37 +00008861 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008862 * The attributes defaulting
8863 */
8864 if (ctxt->attsDefault != NULL) {
8865 xmlDefAttrsPtr defaults;
8866
8867 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8868 if (defaults != NULL) {
8869 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008870 attname = defaults->values[5 * i];
8871 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008872
8873 /*
8874 * special work for namespaces defaulted defs
8875 */
8876 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8877 /*
8878 * check that it's not a defined namespace
8879 */
8880 for (j = 1;j <= nbNs;j++)
8881 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8882 break;
8883 if (j <= nbNs) continue;
8884
8885 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008886 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008887 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008888 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008889 nbNs++;
8890 }
8891 } else if (aprefix == ctxt->str_xmlns) {
8892 /*
8893 * check that it's not a defined namespace
8894 */
8895 for (j = 1;j <= nbNs;j++)
8896 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8897 break;
8898 if (j <= nbNs) continue;
8899
8900 nsname = xmlGetNamespace(ctxt, attname);
8901 if (nsname != defaults->values[2]) {
8902 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008903 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008904 nbNs++;
8905 }
8906 } else {
8907 /*
8908 * check that it's not a defined attribute
8909 */
8910 for (j = 0;j < nbatts;j+=5) {
8911 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8912 break;
8913 }
8914 if (j < nbatts) continue;
8915
8916 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8917 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008918 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008919 }
8920 maxatts = ctxt->maxatts;
8921 atts = ctxt->atts;
8922 }
8923 atts[nbatts++] = attname;
8924 atts[nbatts++] = aprefix;
8925 if (aprefix == NULL)
8926 atts[nbatts++] = NULL;
8927 else
8928 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008929 atts[nbatts++] = defaults->values[5 * i + 2];
8930 atts[nbatts++] = defaults->values[5 * i + 3];
8931 if ((ctxt->standalone == 1) &&
8932 (defaults->values[5 * i + 4] != NULL)) {
8933 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
8934 "standalone: attribute %s on %s defaulted from external subset\n",
8935 attname, localname);
8936 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008937 nbdef++;
8938 }
8939 }
8940 }
8941 }
8942
Daniel Veillarde70c8772003-11-25 07:21:18 +00008943 /*
8944 * The attributes checkings
8945 */
8946 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008947 /*
8948 * The default namespace does not apply to attribute names.
8949 */
8950 if (atts[i + 1] != NULL) {
8951 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8952 if (nsname == NULL) {
8953 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8954 "Namespace prefix %s for %s on %s is not defined\n",
8955 atts[i + 1], atts[i], localname);
8956 }
8957 atts[i + 2] = nsname;
8958 } else
8959 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008960 /*
8961 * [ WFC: Unique Att Spec ]
8962 * No attribute name may appear more than once in the same
8963 * start-tag or empty-element tag.
8964 * As extended by the Namespace in XML REC.
8965 */
8966 for (j = 0; j < i;j += 5) {
8967 if (atts[i] == atts[j]) {
8968 if (atts[i+1] == atts[j+1]) {
8969 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8970 break;
8971 }
8972 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8973 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8974 "Namespaced Attribute %s in '%s' redefined\n",
8975 atts[i], nsname, NULL);
8976 break;
8977 }
8978 }
8979 }
8980 }
8981
Daniel Veillarde57ec792003-09-10 10:50:59 +00008982 nsname = xmlGetNamespace(ctxt, prefix);
8983 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008984 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8985 "Namespace prefix %s on %s is not defined\n",
8986 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008987 }
8988 *pref = prefix;
8989 *URI = nsname;
8990
8991 /*
8992 * SAX: Start of Element !
8993 */
8994 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8995 (!ctxt->disableSAX)) {
8996 if (nbNs > 0)
8997 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8998 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8999 nbatts / 5, nbdef, atts);
9000 else
9001 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9002 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9003 }
9004
9005 /*
9006 * Free up attribute allocated strings if needed
9007 */
9008 if (attval != 0) {
9009 for (i = 3,j = 0; j < nratts;i += 5,j++)
9010 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9011 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009012 }
9013
9014 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009015
9016base_changed:
9017 /*
9018 * the attribute strings are valid iif the base didn't changed
9019 */
9020 if (attval != 0) {
9021 for (i = 3,j = 0; j < nratts;i += 5,j++)
9022 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9023 xmlFree((xmlChar *) atts[i]);
9024 }
9025 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009026 ctxt->input->line = oldline;
9027 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009028 if (ctxt->wellFormed == 1) {
9029 goto reparse;
9030 }
9031 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009032}
9033
9034/**
9035 * xmlParseEndTag2:
9036 * @ctxt: an XML parser context
9037 * @line: line of the start tag
9038 * @nsNr: number of namespaces on the start tag
9039 *
9040 * parse an end of tag
9041 *
9042 * [42] ETag ::= '</' Name S? '>'
9043 *
9044 * With namespace
9045 *
9046 * [NS 9] ETag ::= '</' QName S? '>'
9047 */
9048
9049static void
9050xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009051 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009052 const xmlChar *name;
9053
9054 GROW;
9055 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009056 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009057 return;
9058 }
9059 SKIP(2);
9060
William M. Brack13dfa872004-09-18 04:52:08 +00009061 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009062 if (ctxt->input->cur[tlen] == '>') {
9063 ctxt->input->cur += tlen + 1;
9064 goto done;
9065 }
9066 ctxt->input->cur += tlen;
9067 name = (xmlChar*)1;
9068 } else {
9069 if (prefix == NULL)
9070 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9071 else
9072 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9073 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009074
9075 /*
9076 * We should definitely be at the ending "S? '>'" part
9077 */
9078 GROW;
9079 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009080 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009081 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009082 } else
9083 NEXT1;
9084
9085 /*
9086 * [ WFC: Element Type Match ]
9087 * The Name in an element's end-tag must match the element type in the
9088 * start-tag.
9089 *
9090 */
9091 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009092 if (name == NULL) name = BAD_CAST "unparseable";
9093 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009094 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009095 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009096 }
9097
9098 /*
9099 * SAX: End of Tag
9100 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009101done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009102 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9103 (!ctxt->disableSAX))
9104 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9105
Daniel Veillard0fb18932003-09-07 09:14:37 +00009106 spacePop(ctxt);
9107 if (nsNr != 0)
9108 nsPop(ctxt, nsNr);
9109 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009110}
9111
9112/**
Owen Taylor3473f882001-02-23 17:55:21 +00009113 * xmlParseCDSect:
9114 * @ctxt: an XML parser context
9115 *
9116 * Parse escaped pure raw content.
9117 *
9118 * [18] CDSect ::= CDStart CData CDEnd
9119 *
9120 * [19] CDStart ::= '<![CDATA['
9121 *
9122 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9123 *
9124 * [21] CDEnd ::= ']]>'
9125 */
9126void
9127xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9128 xmlChar *buf = NULL;
9129 int len = 0;
9130 int size = XML_PARSER_BUFFER_SIZE;
9131 int r, rl;
9132 int s, sl;
9133 int cur, l;
9134 int count = 0;
9135
Daniel Veillard8f597c32003-10-06 08:19:27 +00009136 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009137 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009138 SKIP(9);
9139 } else
9140 return;
9141
9142 ctxt->instate = XML_PARSER_CDATA_SECTION;
9143 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009144 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009145 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009146 ctxt->instate = XML_PARSER_CONTENT;
9147 return;
9148 }
9149 NEXTL(rl);
9150 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009151 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009152 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009153 ctxt->instate = XML_PARSER_CONTENT;
9154 return;
9155 }
9156 NEXTL(sl);
9157 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009158 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009159 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009160 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009161 return;
9162 }
William M. Brack871611b2003-10-18 04:53:14 +00009163 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009164 ((r != ']') || (s != ']') || (cur != '>'))) {
9165 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009166 xmlChar *tmp;
9167
Owen Taylor3473f882001-02-23 17:55:21 +00009168 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009169 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9170 if (tmp == NULL) {
9171 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009172 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009173 return;
9174 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009175 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009176 }
9177 COPY_BUF(rl,buf,len,r);
9178 r = s;
9179 rl = sl;
9180 s = cur;
9181 sl = l;
9182 count++;
9183 if (count > 50) {
9184 GROW;
9185 count = 0;
9186 }
9187 NEXTL(l);
9188 cur = CUR_CHAR(l);
9189 }
9190 buf[len] = 0;
9191 ctxt->instate = XML_PARSER_CONTENT;
9192 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009193 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009194 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009195 xmlFree(buf);
9196 return;
9197 }
9198 NEXTL(l);
9199
9200 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009201 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009202 */
9203 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9204 if (ctxt->sax->cdataBlock != NULL)
9205 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009206 else if (ctxt->sax->characters != NULL)
9207 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009208 }
9209 xmlFree(buf);
9210}
9211
9212/**
9213 * xmlParseContent:
9214 * @ctxt: an XML parser context
9215 *
9216 * Parse a content:
9217 *
9218 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9219 */
9220
9221void
9222xmlParseContent(xmlParserCtxtPtr ctxt) {
9223 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009224 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009225 ((RAW != '<') || (NXT(1) != '/')) &&
9226 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009227 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009228 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009229 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009230
9231 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009232 * First case : a Processing Instruction.
9233 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009234 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009235 xmlParsePI(ctxt);
9236 }
9237
9238 /*
9239 * Second case : a CDSection
9240 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009241 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009242 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009243 xmlParseCDSect(ctxt);
9244 }
9245
9246 /*
9247 * Third case : a comment
9248 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009249 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009250 (NXT(2) == '-') && (NXT(3) == '-')) {
9251 xmlParseComment(ctxt);
9252 ctxt->instate = XML_PARSER_CONTENT;
9253 }
9254
9255 /*
9256 * Fourth case : a sub-element.
9257 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009258 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009259 xmlParseElement(ctxt);
9260 }
9261
9262 /*
9263 * Fifth case : a reference. If if has not been resolved,
9264 * parsing returns it's Name, create the node
9265 */
9266
Daniel Veillard21a0f912001-02-25 19:54:14 +00009267 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009268 xmlParseReference(ctxt);
9269 }
9270
9271 /*
9272 * Last case, text. Note that References are handled directly.
9273 */
9274 else {
9275 xmlParseCharData(ctxt, 0);
9276 }
9277
9278 GROW;
9279 /*
9280 * Pop-up of finished entities.
9281 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009282 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009283 xmlPopInput(ctxt);
9284 SHRINK;
9285
Daniel Veillardfdc91562002-07-01 21:52:03 +00009286 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009287 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9288 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009289 ctxt->instate = XML_PARSER_EOF;
9290 break;
9291 }
9292 }
9293}
9294
9295/**
9296 * xmlParseElement:
9297 * @ctxt: an XML parser context
9298 *
9299 * parse an XML element, this is highly recursive
9300 *
9301 * [39] element ::= EmptyElemTag | STag content ETag
9302 *
9303 * [ WFC: Element Type Match ]
9304 * The Name in an element's end-tag must match the element type in the
9305 * start-tag.
9306 *
Owen Taylor3473f882001-02-23 17:55:21 +00009307 */
9308
9309void
9310xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009311 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009312 const xmlChar *prefix;
9313 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009314 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009315 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009316 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009317 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009318
Daniel Veillard8915c152008-08-26 13:05:34 +00009319 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9320 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9321 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9322 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9323 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009324 ctxt->instate = XML_PARSER_EOF;
9325 return;
9326 }
9327
Owen Taylor3473f882001-02-23 17:55:21 +00009328 /* Capture start position */
9329 if (ctxt->record_info) {
9330 node_info.begin_pos = ctxt->input->consumed +
9331 (CUR_PTR - ctxt->input->base);
9332 node_info.begin_line = ctxt->input->line;
9333 }
9334
9335 if (ctxt->spaceNr == 0)
9336 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009337 else if (*ctxt->space == -2)
9338 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009339 else
9340 spacePush(ctxt, *ctxt->space);
9341
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009342 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009343#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009344 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009345#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009346 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009347#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009348 else
9349 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009350#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009351 if (name == NULL) {
9352 spacePop(ctxt);
9353 return;
9354 }
9355 namePush(ctxt, name);
9356 ret = ctxt->node;
9357
Daniel Veillard4432df22003-09-28 18:58:27 +00009358#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009359 /*
9360 * [ VC: Root Element Type ]
9361 * The Name in the document type declaration must match the element
9362 * type of the root element.
9363 */
9364 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9365 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9366 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009367#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009368
9369 /*
9370 * Check for an Empty Element.
9371 */
9372 if ((RAW == '/') && (NXT(1) == '>')) {
9373 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009374 if (ctxt->sax2) {
9375 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9376 (!ctxt->disableSAX))
9377 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009378#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009379 } else {
9380 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9381 (!ctxt->disableSAX))
9382 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009383#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009384 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009385 namePop(ctxt);
9386 spacePop(ctxt);
9387 if (nsNr != ctxt->nsNr)
9388 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009389 if ( ret != NULL && ctxt->record_info ) {
9390 node_info.end_pos = ctxt->input->consumed +
9391 (CUR_PTR - ctxt->input->base);
9392 node_info.end_line = ctxt->input->line;
9393 node_info.node = ret;
9394 xmlParserAddNodeInfo(ctxt, &node_info);
9395 }
9396 return;
9397 }
9398 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009399 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009400 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009401 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9402 "Couldn't find end of Start Tag %s line %d\n",
9403 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009404
9405 /*
9406 * end of parsing of this node.
9407 */
9408 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009409 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009410 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009411 if (nsNr != ctxt->nsNr)
9412 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009413
9414 /*
9415 * Capture end position and add node
9416 */
9417 if ( ret != NULL && ctxt->record_info ) {
9418 node_info.end_pos = ctxt->input->consumed +
9419 (CUR_PTR - ctxt->input->base);
9420 node_info.end_line = ctxt->input->line;
9421 node_info.node = ret;
9422 xmlParserAddNodeInfo(ctxt, &node_info);
9423 }
9424 return;
9425 }
9426
9427 /*
9428 * Parse the content of the element:
9429 */
9430 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009431 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009432 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009433 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009434 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009435
9436 /*
9437 * end of parsing of this node.
9438 */
9439 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009440 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009441 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009442 if (nsNr != ctxt->nsNr)
9443 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009444 return;
9445 }
9446
9447 /*
9448 * parse the end of tag: '</' should be here.
9449 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009450 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009451 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009452 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009453 }
9454#ifdef LIBXML_SAX1_ENABLED
9455 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009456 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009457#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009458
9459 /*
9460 * Capture end position and add node
9461 */
9462 if ( ret != NULL && ctxt->record_info ) {
9463 node_info.end_pos = ctxt->input->consumed +
9464 (CUR_PTR - ctxt->input->base);
9465 node_info.end_line = ctxt->input->line;
9466 node_info.node = ret;
9467 xmlParserAddNodeInfo(ctxt, &node_info);
9468 }
9469}
9470
9471/**
9472 * xmlParseVersionNum:
9473 * @ctxt: an XML parser context
9474 *
9475 * parse the XML version value.
9476 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009477 * [26] VersionNum ::= '1.' [0-9]+
9478 *
9479 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009480 *
9481 * Returns the string giving the XML version number, or NULL
9482 */
9483xmlChar *
9484xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9485 xmlChar *buf = NULL;
9486 int len = 0;
9487 int size = 10;
9488 xmlChar cur;
9489
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009490 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009491 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009492 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009493 return(NULL);
9494 }
9495 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009496 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009497 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009498 return(NULL);
9499 }
9500 buf[len++] = cur;
9501 NEXT;
9502 cur=CUR;
9503 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009504 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009505 return(NULL);
9506 }
9507 buf[len++] = cur;
9508 NEXT;
9509 cur=CUR;
9510 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009511 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009512 xmlChar *tmp;
9513
Owen Taylor3473f882001-02-23 17:55:21 +00009514 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009515 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9516 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009517 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009518 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009519 return(NULL);
9520 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009521 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009522 }
9523 buf[len++] = cur;
9524 NEXT;
9525 cur=CUR;
9526 }
9527 buf[len] = 0;
9528 return(buf);
9529}
9530
9531/**
9532 * xmlParseVersionInfo:
9533 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009534 *
Owen Taylor3473f882001-02-23 17:55:21 +00009535 * parse the XML version.
9536 *
9537 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009538 *
Owen Taylor3473f882001-02-23 17:55:21 +00009539 * [25] Eq ::= S? '=' S?
9540 *
9541 * Returns the version string, e.g. "1.0"
9542 */
9543
9544xmlChar *
9545xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9546 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009547
Daniel Veillarda07050d2003-10-19 14:46:32 +00009548 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009549 SKIP(7);
9550 SKIP_BLANKS;
9551 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009552 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009553 return(NULL);
9554 }
9555 NEXT;
9556 SKIP_BLANKS;
9557 if (RAW == '"') {
9558 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009559 version = xmlParseVersionNum(ctxt);
9560 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009561 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009562 } else
9563 NEXT;
9564 } else if (RAW == '\''){
9565 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009566 version = xmlParseVersionNum(ctxt);
9567 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009568 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009569 } else
9570 NEXT;
9571 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009572 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009573 }
9574 }
9575 return(version);
9576}
9577
9578/**
9579 * xmlParseEncName:
9580 * @ctxt: an XML parser context
9581 *
9582 * parse the XML encoding name
9583 *
9584 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9585 *
9586 * Returns the encoding name value or NULL
9587 */
9588xmlChar *
9589xmlParseEncName(xmlParserCtxtPtr ctxt) {
9590 xmlChar *buf = NULL;
9591 int len = 0;
9592 int size = 10;
9593 xmlChar cur;
9594
9595 cur = CUR;
9596 if (((cur >= 'a') && (cur <= 'z')) ||
9597 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009598 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009599 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009600 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009601 return(NULL);
9602 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009603
Owen Taylor3473f882001-02-23 17:55:21 +00009604 buf[len++] = cur;
9605 NEXT;
9606 cur = CUR;
9607 while (((cur >= 'a') && (cur <= 'z')) ||
9608 ((cur >= 'A') && (cur <= 'Z')) ||
9609 ((cur >= '0') && (cur <= '9')) ||
9610 (cur == '.') || (cur == '_') ||
9611 (cur == '-')) {
9612 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009613 xmlChar *tmp;
9614
Owen Taylor3473f882001-02-23 17:55:21 +00009615 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009616 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9617 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009618 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009619 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009620 return(NULL);
9621 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009622 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009623 }
9624 buf[len++] = cur;
9625 NEXT;
9626 cur = CUR;
9627 if (cur == 0) {
9628 SHRINK;
9629 GROW;
9630 cur = CUR;
9631 }
9632 }
9633 buf[len] = 0;
9634 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009635 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009636 }
9637 return(buf);
9638}
9639
9640/**
9641 * xmlParseEncodingDecl:
9642 * @ctxt: an XML parser context
9643 *
9644 * parse the XML encoding declaration
9645 *
9646 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9647 *
9648 * this setups the conversion filters.
9649 *
9650 * Returns the encoding value or NULL
9651 */
9652
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009653const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009654xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9655 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009656
9657 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009658 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009659 SKIP(8);
9660 SKIP_BLANKS;
9661 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009662 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009663 return(NULL);
9664 }
9665 NEXT;
9666 SKIP_BLANKS;
9667 if (RAW == '"') {
9668 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009669 encoding = xmlParseEncName(ctxt);
9670 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009671 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009672 } else
9673 NEXT;
9674 } else if (RAW == '\''){
9675 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009676 encoding = xmlParseEncName(ctxt);
9677 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009678 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009679 } else
9680 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009681 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009682 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009683 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009684 /*
9685 * UTF-16 encoding stwich has already taken place at this stage,
9686 * more over the little-endian/big-endian selection is already done
9687 */
9688 if ((encoding != NULL) &&
9689 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9690 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009691 /*
9692 * If no encoding was passed to the parser, that we are
9693 * using UTF-16 and no decoder is present i.e. the
9694 * document is apparently UTF-8 compatible, then raise an
9695 * encoding mismatch fatal error
9696 */
9697 if ((ctxt->encoding == NULL) &&
9698 (ctxt->input->buf != NULL) &&
9699 (ctxt->input->buf->encoder == NULL)) {
9700 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9701 "Document labelled UTF-16 but has UTF-8 content\n");
9702 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009703 if (ctxt->encoding != NULL)
9704 xmlFree((xmlChar *) ctxt->encoding);
9705 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009706 }
9707 /*
9708 * UTF-8 encoding is handled natively
9709 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009710 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009711 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9712 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009713 if (ctxt->encoding != NULL)
9714 xmlFree((xmlChar *) ctxt->encoding);
9715 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009716 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009717 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009718 xmlCharEncodingHandlerPtr handler;
9719
9720 if (ctxt->input->encoding != NULL)
9721 xmlFree((xmlChar *) ctxt->input->encoding);
9722 ctxt->input->encoding = encoding;
9723
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009724 handler = xmlFindCharEncodingHandler((const char *) encoding);
9725 if (handler != NULL) {
9726 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009727 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009728 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009729 "Unsupported encoding %s\n", encoding);
9730 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009731 }
9732 }
9733 }
9734 return(encoding);
9735}
9736
9737/**
9738 * xmlParseSDDecl:
9739 * @ctxt: an XML parser context
9740 *
9741 * parse the XML standalone declaration
9742 *
9743 * [32] SDDecl ::= S 'standalone' Eq
9744 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9745 *
9746 * [ VC: Standalone Document Declaration ]
9747 * TODO The standalone document declaration must have the value "no"
9748 * if any external markup declarations contain declarations of:
9749 * - attributes with default values, if elements to which these
9750 * attributes apply appear in the document without specifications
9751 * of values for these attributes, or
9752 * - entities (other than amp, lt, gt, apos, quot), if references
9753 * to those entities appear in the document, or
9754 * - attributes with values subject to normalization, where the
9755 * attribute appears in the document with a value which will change
9756 * as a result of normalization, or
9757 * - element types with element content, if white space occurs directly
9758 * within any instance of those types.
9759 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009760 * Returns:
9761 * 1 if standalone="yes"
9762 * 0 if standalone="no"
9763 * -2 if standalone attribute is missing or invalid
9764 * (A standalone value of -2 means that the XML declaration was found,
9765 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009766 */
9767
9768int
9769xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009770 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009771
9772 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009773 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009774 SKIP(10);
9775 SKIP_BLANKS;
9776 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009777 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009778 return(standalone);
9779 }
9780 NEXT;
9781 SKIP_BLANKS;
9782 if (RAW == '\''){
9783 NEXT;
9784 if ((RAW == 'n') && (NXT(1) == 'o')) {
9785 standalone = 0;
9786 SKIP(2);
9787 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9788 (NXT(2) == 's')) {
9789 standalone = 1;
9790 SKIP(3);
9791 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009792 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009793 }
9794 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009795 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009796 } else
9797 NEXT;
9798 } else if (RAW == '"'){
9799 NEXT;
9800 if ((RAW == 'n') && (NXT(1) == 'o')) {
9801 standalone = 0;
9802 SKIP(2);
9803 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9804 (NXT(2) == 's')) {
9805 standalone = 1;
9806 SKIP(3);
9807 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009808 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009809 }
9810 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009811 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009812 } else
9813 NEXT;
9814 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009815 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009816 }
9817 }
9818 return(standalone);
9819}
9820
9821/**
9822 * xmlParseXMLDecl:
9823 * @ctxt: an XML parser context
9824 *
9825 * parse an XML declaration header
9826 *
9827 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9828 */
9829
9830void
9831xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9832 xmlChar *version;
9833
9834 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009835 * This value for standalone indicates that the document has an
9836 * XML declaration but it does not have a standalone attribute.
9837 * It will be overwritten later if a standalone attribute is found.
9838 */
9839 ctxt->input->standalone = -2;
9840
9841 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009842 * We know that '<?xml' is here.
9843 */
9844 SKIP(5);
9845
William M. Brack76e95df2003-10-18 16:20:14 +00009846 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9848 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009849 }
9850 SKIP_BLANKS;
9851
9852 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009853 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009854 */
9855 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009856 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009857 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009858 } else {
9859 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9860 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009861 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009862 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009863 if (ctxt->options & XML_PARSE_OLD10) {
9864 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9865 "Unsupported version '%s'\n",
9866 version);
9867 } else {
9868 if ((version[0] == '1') && ((version[1] == '.'))) {
9869 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9870 "Unsupported version '%s'\n",
9871 version, NULL);
9872 } else {
9873 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9874 "Unsupported version '%s'\n",
9875 version);
9876 }
9877 }
Daniel Veillard19840942001-11-29 16:11:38 +00009878 }
9879 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009880 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009881 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009882 }
Owen Taylor3473f882001-02-23 17:55:21 +00009883
9884 /*
9885 * We may have the encoding declaration
9886 */
William M. Brack76e95df2003-10-18 16:20:14 +00009887 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009888 if ((RAW == '?') && (NXT(1) == '>')) {
9889 SKIP(2);
9890 return;
9891 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009892 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009893 }
9894 xmlParseEncodingDecl(ctxt);
9895 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9896 /*
9897 * The XML REC instructs us to stop parsing right here
9898 */
9899 return;
9900 }
9901
9902 /*
9903 * We may have the standalone status.
9904 */
William M. Brack76e95df2003-10-18 16:20:14 +00009905 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009906 if ((RAW == '?') && (NXT(1) == '>')) {
9907 SKIP(2);
9908 return;
9909 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009910 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009911 }
9912 SKIP_BLANKS;
9913 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9914
9915 SKIP_BLANKS;
9916 if ((RAW == '?') && (NXT(1) == '>')) {
9917 SKIP(2);
9918 } else if (RAW == '>') {
9919 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009920 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009921 NEXT;
9922 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009923 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009924 MOVETO_ENDTAG(CUR_PTR);
9925 NEXT;
9926 }
9927}
9928
9929/**
9930 * xmlParseMisc:
9931 * @ctxt: an XML parser context
9932 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009933 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009934 *
9935 * [27] Misc ::= Comment | PI | S
9936 */
9937
9938void
9939xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009940 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009941 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009942 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009943 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009944 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009945 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009946 NEXT;
9947 } else
9948 xmlParseComment(ctxt);
9949 }
9950}
9951
9952/**
9953 * xmlParseDocument:
9954 * @ctxt: an XML parser context
9955 *
9956 * parse an XML document (and build a tree if using the standard SAX
9957 * interface).
9958 *
9959 * [1] document ::= prolog element Misc*
9960 *
9961 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9962 *
9963 * Returns 0, -1 in case of error. the parser context is augmented
9964 * as a result of the parsing.
9965 */
9966
9967int
9968xmlParseDocument(xmlParserCtxtPtr ctxt) {
9969 xmlChar start[4];
9970 xmlCharEncoding enc;
9971
9972 xmlInitParser();
9973
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009974 if ((ctxt == NULL) || (ctxt->input == NULL))
9975 return(-1);
9976
Owen Taylor3473f882001-02-23 17:55:21 +00009977 GROW;
9978
9979 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009980 * SAX: detecting the level.
9981 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009982 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009983
9984 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009985 * SAX: beginning of the document processing.
9986 */
9987 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9988 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9989
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009990 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9991 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009992 /*
9993 * Get the 4 first bytes and decode the charset
9994 * if enc != XML_CHAR_ENCODING_NONE
9995 * plug some encoding conversion routines.
9996 */
9997 start[0] = RAW;
9998 start[1] = NXT(1);
9999 start[2] = NXT(2);
10000 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010001 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010002 if (enc != XML_CHAR_ENCODING_NONE) {
10003 xmlSwitchEncoding(ctxt, enc);
10004 }
Owen Taylor3473f882001-02-23 17:55:21 +000010005 }
10006
10007
10008 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010009 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010010 }
10011
10012 /*
10013 * Check for the XMLDecl in the Prolog.
10014 */
10015 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010016 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010017
10018 /*
10019 * Note that we will switch encoding on the fly.
10020 */
10021 xmlParseXMLDecl(ctxt);
10022 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10023 /*
10024 * The XML REC instructs us to stop parsing right here
10025 */
10026 return(-1);
10027 }
10028 ctxt->standalone = ctxt->input->standalone;
10029 SKIP_BLANKS;
10030 } else {
10031 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10032 }
10033 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10034 ctxt->sax->startDocument(ctxt->userData);
10035
10036 /*
10037 * The Misc part of the Prolog
10038 */
10039 GROW;
10040 xmlParseMisc(ctxt);
10041
10042 /*
10043 * Then possibly doc type declaration(s) and more Misc
10044 * (doctypedecl Misc*)?
10045 */
10046 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010047 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010048
10049 ctxt->inSubset = 1;
10050 xmlParseDocTypeDecl(ctxt);
10051 if (RAW == '[') {
10052 ctxt->instate = XML_PARSER_DTD;
10053 xmlParseInternalSubset(ctxt);
10054 }
10055
10056 /*
10057 * Create and update the external subset.
10058 */
10059 ctxt->inSubset = 2;
10060 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10061 (!ctxt->disableSAX))
10062 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10063 ctxt->extSubSystem, ctxt->extSubURI);
10064 ctxt->inSubset = 0;
10065
Daniel Veillardac4118d2008-01-11 05:27:32 +000010066 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010067
10068 ctxt->instate = XML_PARSER_PROLOG;
10069 xmlParseMisc(ctxt);
10070 }
10071
10072 /*
10073 * Time to start parsing the tree itself
10074 */
10075 GROW;
10076 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010077 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10078 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010079 } else {
10080 ctxt->instate = XML_PARSER_CONTENT;
10081 xmlParseElement(ctxt);
10082 ctxt->instate = XML_PARSER_EPILOG;
10083
10084
10085 /*
10086 * The Misc part at the end
10087 */
10088 xmlParseMisc(ctxt);
10089
Daniel Veillard561b7f82002-03-20 21:55:57 +000010090 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010091 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010092 }
10093 ctxt->instate = XML_PARSER_EOF;
10094 }
10095
10096 /*
10097 * SAX: end of the document processing.
10098 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010099 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010100 ctxt->sax->endDocument(ctxt->userData);
10101
Daniel Veillard5997aca2002-03-18 18:36:20 +000010102 /*
10103 * Remove locally kept entity definitions if the tree was not built
10104 */
10105 if ((ctxt->myDoc != NULL) &&
10106 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10107 xmlFreeDoc(ctxt->myDoc);
10108 ctxt->myDoc = NULL;
10109 }
10110
Daniel Veillardae0765b2008-07-31 19:54:59 +000010111 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10112 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10113 if (ctxt->valid)
10114 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10115 if (ctxt->nsWellFormed)
10116 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10117 if (ctxt->options & XML_PARSE_OLD10)
10118 ctxt->myDoc->properties |= XML_DOC_OLD10;
10119 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010120 if (! ctxt->wellFormed) {
10121 ctxt->valid = 0;
10122 return(-1);
10123 }
Owen Taylor3473f882001-02-23 17:55:21 +000010124 return(0);
10125}
10126
10127/**
10128 * xmlParseExtParsedEnt:
10129 * @ctxt: an XML parser context
10130 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010131 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010132 * An external general parsed entity is well-formed if it matches the
10133 * production labeled extParsedEnt.
10134 *
10135 * [78] extParsedEnt ::= TextDecl? content
10136 *
10137 * Returns 0, -1 in case of error. the parser context is augmented
10138 * as a result of the parsing.
10139 */
10140
10141int
10142xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10143 xmlChar start[4];
10144 xmlCharEncoding enc;
10145
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010146 if ((ctxt == NULL) || (ctxt->input == NULL))
10147 return(-1);
10148
Owen Taylor3473f882001-02-23 17:55:21 +000010149 xmlDefaultSAXHandlerInit();
10150
Daniel Veillard309f81d2003-09-23 09:02:53 +000010151 xmlDetectSAX2(ctxt);
10152
Owen Taylor3473f882001-02-23 17:55:21 +000010153 GROW;
10154
10155 /*
10156 * SAX: beginning of the document processing.
10157 */
10158 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10159 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10160
10161 /*
10162 * Get the 4 first bytes and decode the charset
10163 * if enc != XML_CHAR_ENCODING_NONE
10164 * plug some encoding conversion routines.
10165 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010166 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10167 start[0] = RAW;
10168 start[1] = NXT(1);
10169 start[2] = NXT(2);
10170 start[3] = NXT(3);
10171 enc = xmlDetectCharEncoding(start, 4);
10172 if (enc != XML_CHAR_ENCODING_NONE) {
10173 xmlSwitchEncoding(ctxt, enc);
10174 }
Owen Taylor3473f882001-02-23 17:55:21 +000010175 }
10176
10177
10178 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010179 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010180 }
10181
10182 /*
10183 * Check for the XMLDecl in the Prolog.
10184 */
10185 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010186 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010187
10188 /*
10189 * Note that we will switch encoding on the fly.
10190 */
10191 xmlParseXMLDecl(ctxt);
10192 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10193 /*
10194 * The XML REC instructs us to stop parsing right here
10195 */
10196 return(-1);
10197 }
10198 SKIP_BLANKS;
10199 } else {
10200 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10201 }
10202 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10203 ctxt->sax->startDocument(ctxt->userData);
10204
10205 /*
10206 * Doing validity checking on chunk doesn't make sense
10207 */
10208 ctxt->instate = XML_PARSER_CONTENT;
10209 ctxt->validate = 0;
10210 ctxt->loadsubset = 0;
10211 ctxt->depth = 0;
10212
10213 xmlParseContent(ctxt);
10214
10215 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010216 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010217 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010218 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010219 }
10220
10221 /*
10222 * SAX: end of the document processing.
10223 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010224 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010225 ctxt->sax->endDocument(ctxt->userData);
10226
10227 if (! ctxt->wellFormed) return(-1);
10228 return(0);
10229}
10230
Daniel Veillard73b013f2003-09-30 12:36:01 +000010231#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010232/************************************************************************
10233 * *
10234 * Progressive parsing interfaces *
10235 * *
10236 ************************************************************************/
10237
10238/**
10239 * xmlParseLookupSequence:
10240 * @ctxt: an XML parser context
10241 * @first: the first char to lookup
10242 * @next: the next char to lookup or zero
10243 * @third: the next char to lookup or zero
10244 *
10245 * Try to find if a sequence (first, next, third) or just (first next) or
10246 * (first) is available in the input stream.
10247 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10248 * to avoid rescanning sequences of bytes, it DOES change the state of the
10249 * parser, do not use liberally.
10250 *
10251 * Returns the index to the current parsing point if the full sequence
10252 * is available, -1 otherwise.
10253 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010254static int
Owen Taylor3473f882001-02-23 17:55:21 +000010255xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10256 xmlChar next, xmlChar third) {
10257 int base, len;
10258 xmlParserInputPtr in;
10259 const xmlChar *buf;
10260
10261 in = ctxt->input;
10262 if (in == NULL) return(-1);
10263 base = in->cur - in->base;
10264 if (base < 0) return(-1);
10265 if (ctxt->checkIndex > base)
10266 base = ctxt->checkIndex;
10267 if (in->buf == NULL) {
10268 buf = in->base;
10269 len = in->length;
10270 } else {
10271 buf = in->buf->buffer->content;
10272 len = in->buf->buffer->use;
10273 }
10274 /* take into account the sequence length */
10275 if (third) len -= 2;
10276 else if (next) len --;
10277 for (;base < len;base++) {
10278 if (buf[base] == first) {
10279 if (third != 0) {
10280 if ((buf[base + 1] != next) ||
10281 (buf[base + 2] != third)) continue;
10282 } else if (next != 0) {
10283 if (buf[base + 1] != next) continue;
10284 }
10285 ctxt->checkIndex = 0;
10286#ifdef DEBUG_PUSH
10287 if (next == 0)
10288 xmlGenericError(xmlGenericErrorContext,
10289 "PP: lookup '%c' found at %d\n",
10290 first, base);
10291 else if (third == 0)
10292 xmlGenericError(xmlGenericErrorContext,
10293 "PP: lookup '%c%c' found at %d\n",
10294 first, next, base);
10295 else
10296 xmlGenericError(xmlGenericErrorContext,
10297 "PP: lookup '%c%c%c' found at %d\n",
10298 first, next, third, base);
10299#endif
10300 return(base - (in->cur - in->base));
10301 }
10302 }
10303 ctxt->checkIndex = base;
10304#ifdef DEBUG_PUSH
10305 if (next == 0)
10306 xmlGenericError(xmlGenericErrorContext,
10307 "PP: lookup '%c' failed\n", first);
10308 else if (third == 0)
10309 xmlGenericError(xmlGenericErrorContext,
10310 "PP: lookup '%c%c' failed\n", first, next);
10311 else
10312 xmlGenericError(xmlGenericErrorContext,
10313 "PP: lookup '%c%c%c' failed\n", first, next, third);
10314#endif
10315 return(-1);
10316}
10317
10318/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010319 * xmlParseGetLasts:
10320 * @ctxt: an XML parser context
10321 * @lastlt: pointer to store the last '<' from the input
10322 * @lastgt: pointer to store the last '>' from the input
10323 *
10324 * Lookup the last < and > in the current chunk
10325 */
10326static void
10327xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10328 const xmlChar **lastgt) {
10329 const xmlChar *tmp;
10330
10331 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10332 xmlGenericError(xmlGenericErrorContext,
10333 "Internal error: xmlParseGetLasts\n");
10334 return;
10335 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010336 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010337 tmp = ctxt->input->end;
10338 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010339 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010340 if (tmp < ctxt->input->base) {
10341 *lastlt = NULL;
10342 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010343 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010344 *lastlt = tmp;
10345 tmp++;
10346 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10347 if (*tmp == '\'') {
10348 tmp++;
10349 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10350 if (tmp < ctxt->input->end) tmp++;
10351 } else if (*tmp == '"') {
10352 tmp++;
10353 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10354 if (tmp < ctxt->input->end) tmp++;
10355 } else
10356 tmp++;
10357 }
10358 if (tmp < ctxt->input->end)
10359 *lastgt = tmp;
10360 else {
10361 tmp = *lastlt;
10362 tmp--;
10363 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10364 if (tmp >= ctxt->input->base)
10365 *lastgt = tmp;
10366 else
10367 *lastgt = NULL;
10368 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010369 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010370 } else {
10371 *lastlt = NULL;
10372 *lastgt = NULL;
10373 }
10374}
10375/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010376 * xmlCheckCdataPush:
10377 * @cur: pointer to the bock of characters
10378 * @len: length of the block in bytes
10379 *
10380 * Check that the block of characters is okay as SCdata content [20]
10381 *
10382 * Returns the number of bytes to pass if okay, a negative index where an
10383 * UTF-8 error occured otherwise
10384 */
10385static int
10386xmlCheckCdataPush(const xmlChar *utf, int len) {
10387 int ix;
10388 unsigned char c;
10389 int codepoint;
10390
10391 if ((utf == NULL) || (len <= 0))
10392 return(0);
10393
10394 for (ix = 0; ix < len;) { /* string is 0-terminated */
10395 c = utf[ix];
10396 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10397 if (c >= 0x20)
10398 ix++;
10399 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10400 ix++;
10401 else
10402 return(-ix);
10403 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10404 if (ix + 2 > len) return(ix);
10405 if ((utf[ix+1] & 0xc0 ) != 0x80)
10406 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010407 codepoint = (utf[ix] & 0x1f) << 6;
10408 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010409 if (!xmlIsCharQ(codepoint))
10410 return(-ix);
10411 ix += 2;
10412 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10413 if (ix + 3 > len) return(ix);
10414 if (((utf[ix+1] & 0xc0) != 0x80) ||
10415 ((utf[ix+2] & 0xc0) != 0x80))
10416 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010417 codepoint = (utf[ix] & 0xf) << 12;
10418 codepoint |= (utf[ix+1] & 0x3f) << 6;
10419 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010420 if (!xmlIsCharQ(codepoint))
10421 return(-ix);
10422 ix += 3;
10423 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10424 if (ix + 4 > len) return(ix);
10425 if (((utf[ix+1] & 0xc0) != 0x80) ||
10426 ((utf[ix+2] & 0xc0) != 0x80) ||
10427 ((utf[ix+3] & 0xc0) != 0x80))
10428 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010429 codepoint = (utf[ix] & 0x7) << 18;
10430 codepoint |= (utf[ix+1] & 0x3f) << 12;
10431 codepoint |= (utf[ix+2] & 0x3f) << 6;
10432 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010433 if (!xmlIsCharQ(codepoint))
10434 return(-ix);
10435 ix += 4;
10436 } else /* unknown encoding */
10437 return(-ix);
10438 }
10439 return(ix);
10440}
10441
10442/**
Owen Taylor3473f882001-02-23 17:55:21 +000010443 * xmlParseTryOrFinish:
10444 * @ctxt: an XML parser context
10445 * @terminate: last chunk indicator
10446 *
10447 * Try to progress on parsing
10448 *
10449 * Returns zero if no parsing was possible
10450 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010451static int
Owen Taylor3473f882001-02-23 17:55:21 +000010452xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10453 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010454 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010455 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010456 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010457
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010458 if (ctxt->input == NULL)
10459 return(0);
10460
Owen Taylor3473f882001-02-23 17:55:21 +000010461#ifdef DEBUG_PUSH
10462 switch (ctxt->instate) {
10463 case XML_PARSER_EOF:
10464 xmlGenericError(xmlGenericErrorContext,
10465 "PP: try EOF\n"); break;
10466 case XML_PARSER_START:
10467 xmlGenericError(xmlGenericErrorContext,
10468 "PP: try START\n"); break;
10469 case XML_PARSER_MISC:
10470 xmlGenericError(xmlGenericErrorContext,
10471 "PP: try MISC\n");break;
10472 case XML_PARSER_COMMENT:
10473 xmlGenericError(xmlGenericErrorContext,
10474 "PP: try COMMENT\n");break;
10475 case XML_PARSER_PROLOG:
10476 xmlGenericError(xmlGenericErrorContext,
10477 "PP: try PROLOG\n");break;
10478 case XML_PARSER_START_TAG:
10479 xmlGenericError(xmlGenericErrorContext,
10480 "PP: try START_TAG\n");break;
10481 case XML_PARSER_CONTENT:
10482 xmlGenericError(xmlGenericErrorContext,
10483 "PP: try CONTENT\n");break;
10484 case XML_PARSER_CDATA_SECTION:
10485 xmlGenericError(xmlGenericErrorContext,
10486 "PP: try CDATA_SECTION\n");break;
10487 case XML_PARSER_END_TAG:
10488 xmlGenericError(xmlGenericErrorContext,
10489 "PP: try END_TAG\n");break;
10490 case XML_PARSER_ENTITY_DECL:
10491 xmlGenericError(xmlGenericErrorContext,
10492 "PP: try ENTITY_DECL\n");break;
10493 case XML_PARSER_ENTITY_VALUE:
10494 xmlGenericError(xmlGenericErrorContext,
10495 "PP: try ENTITY_VALUE\n");break;
10496 case XML_PARSER_ATTRIBUTE_VALUE:
10497 xmlGenericError(xmlGenericErrorContext,
10498 "PP: try ATTRIBUTE_VALUE\n");break;
10499 case XML_PARSER_DTD:
10500 xmlGenericError(xmlGenericErrorContext,
10501 "PP: try DTD\n");break;
10502 case XML_PARSER_EPILOG:
10503 xmlGenericError(xmlGenericErrorContext,
10504 "PP: try EPILOG\n");break;
10505 case XML_PARSER_PI:
10506 xmlGenericError(xmlGenericErrorContext,
10507 "PP: try PI\n");break;
10508 case XML_PARSER_IGNORE:
10509 xmlGenericError(xmlGenericErrorContext,
10510 "PP: try IGNORE\n");break;
10511 }
10512#endif
10513
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010514 if ((ctxt->input != NULL) &&
10515 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010516 xmlSHRINK(ctxt);
10517 ctxt->checkIndex = 0;
10518 }
10519 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010520
Daniel Veillarda880b122003-04-21 21:36:41 +000010521 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010522 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010523 return(0);
10524
10525
Owen Taylor3473f882001-02-23 17:55:21 +000010526 /*
10527 * Pop-up of finished entities.
10528 */
10529 while ((RAW == 0) && (ctxt->inputNr > 1))
10530 xmlPopInput(ctxt);
10531
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010532 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010533 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010534 avail = ctxt->input->length -
10535 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010536 else {
10537 /*
10538 * If we are operating on converted input, try to flush
10539 * remainng chars to avoid them stalling in the non-converted
10540 * buffer.
10541 */
10542 if ((ctxt->input->buf->raw != NULL) &&
10543 (ctxt->input->buf->raw->use > 0)) {
10544 int base = ctxt->input->base -
10545 ctxt->input->buf->buffer->content;
10546 int current = ctxt->input->cur - ctxt->input->base;
10547
10548 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10549 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10550 ctxt->input->cur = ctxt->input->base + current;
10551 ctxt->input->end =
10552 &ctxt->input->buf->buffer->content[
10553 ctxt->input->buf->buffer->use];
10554 }
10555 avail = ctxt->input->buf->buffer->use -
10556 (ctxt->input->cur - ctxt->input->base);
10557 }
Owen Taylor3473f882001-02-23 17:55:21 +000010558 if (avail < 1)
10559 goto done;
10560 switch (ctxt->instate) {
10561 case XML_PARSER_EOF:
10562 /*
10563 * Document parsing is done !
10564 */
10565 goto done;
10566 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010567 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10568 xmlChar start[4];
10569 xmlCharEncoding enc;
10570
10571 /*
10572 * Very first chars read from the document flow.
10573 */
10574 if (avail < 4)
10575 goto done;
10576
10577 /*
10578 * Get the 4 first bytes and decode the charset
10579 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010580 * plug some encoding conversion routines,
10581 * else xmlSwitchEncoding will set to (default)
10582 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010583 */
10584 start[0] = RAW;
10585 start[1] = NXT(1);
10586 start[2] = NXT(2);
10587 start[3] = NXT(3);
10588 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010589 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010590 break;
10591 }
Owen Taylor3473f882001-02-23 17:55:21 +000010592
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010593 if (avail < 2)
10594 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010595 cur = ctxt->input->cur[0];
10596 next = ctxt->input->cur[1];
10597 if (cur == 0) {
10598 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10599 ctxt->sax->setDocumentLocator(ctxt->userData,
10600 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010601 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010602 ctxt->instate = XML_PARSER_EOF;
10603#ifdef DEBUG_PUSH
10604 xmlGenericError(xmlGenericErrorContext,
10605 "PP: entering EOF\n");
10606#endif
10607 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10608 ctxt->sax->endDocument(ctxt->userData);
10609 goto done;
10610 }
10611 if ((cur == '<') && (next == '?')) {
10612 /* PI or XML decl */
10613 if (avail < 5) return(ret);
10614 if ((!terminate) &&
10615 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10616 return(ret);
10617 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10618 ctxt->sax->setDocumentLocator(ctxt->userData,
10619 &xmlDefaultSAXLocator);
10620 if ((ctxt->input->cur[2] == 'x') &&
10621 (ctxt->input->cur[3] == 'm') &&
10622 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010623 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010624 ret += 5;
10625#ifdef DEBUG_PUSH
10626 xmlGenericError(xmlGenericErrorContext,
10627 "PP: Parsing XML Decl\n");
10628#endif
10629 xmlParseXMLDecl(ctxt);
10630 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10631 /*
10632 * The XML REC instructs us to stop parsing right
10633 * here
10634 */
10635 ctxt->instate = XML_PARSER_EOF;
10636 return(0);
10637 }
10638 ctxt->standalone = ctxt->input->standalone;
10639 if ((ctxt->encoding == NULL) &&
10640 (ctxt->input->encoding != NULL))
10641 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10642 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10643 (!ctxt->disableSAX))
10644 ctxt->sax->startDocument(ctxt->userData);
10645 ctxt->instate = XML_PARSER_MISC;
10646#ifdef DEBUG_PUSH
10647 xmlGenericError(xmlGenericErrorContext,
10648 "PP: entering MISC\n");
10649#endif
10650 } else {
10651 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10652 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10653 (!ctxt->disableSAX))
10654 ctxt->sax->startDocument(ctxt->userData);
10655 ctxt->instate = XML_PARSER_MISC;
10656#ifdef DEBUG_PUSH
10657 xmlGenericError(xmlGenericErrorContext,
10658 "PP: entering MISC\n");
10659#endif
10660 }
10661 } else {
10662 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10663 ctxt->sax->setDocumentLocator(ctxt->userData,
10664 &xmlDefaultSAXLocator);
10665 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010666 if (ctxt->version == NULL) {
10667 xmlErrMemory(ctxt, NULL);
10668 break;
10669 }
Owen Taylor3473f882001-02-23 17:55:21 +000010670 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10671 (!ctxt->disableSAX))
10672 ctxt->sax->startDocument(ctxt->userData);
10673 ctxt->instate = XML_PARSER_MISC;
10674#ifdef DEBUG_PUSH
10675 xmlGenericError(xmlGenericErrorContext,
10676 "PP: entering MISC\n");
10677#endif
10678 }
10679 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010680 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010681 const xmlChar *name;
10682 const xmlChar *prefix;
10683 const xmlChar *URI;
10684 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010685
10686 if ((avail < 2) && (ctxt->inputNr == 1))
10687 goto done;
10688 cur = ctxt->input->cur[0];
10689 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010690 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010691 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010692 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10693 ctxt->sax->endDocument(ctxt->userData);
10694 goto done;
10695 }
10696 if (!terminate) {
10697 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010698 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010699 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010700 goto done;
10701 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10702 goto done;
10703 }
10704 }
10705 if (ctxt->spaceNr == 0)
10706 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010707 else if (*ctxt->space == -2)
10708 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010709 else
10710 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010711#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010712 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010713#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010714 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010715#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010716 else
10717 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010718#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010719 if (name == NULL) {
10720 spacePop(ctxt);
10721 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010722 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10723 ctxt->sax->endDocument(ctxt->userData);
10724 goto done;
10725 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010726#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010727 /*
10728 * [ VC: Root Element Type ]
10729 * The Name in the document type declaration must match
10730 * the element type of the root element.
10731 */
10732 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10733 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10734 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010735#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010736
10737 /*
10738 * Check for an Empty Element.
10739 */
10740 if ((RAW == '/') && (NXT(1) == '>')) {
10741 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010742
10743 if (ctxt->sax2) {
10744 if ((ctxt->sax != NULL) &&
10745 (ctxt->sax->endElementNs != NULL) &&
10746 (!ctxt->disableSAX))
10747 ctxt->sax->endElementNs(ctxt->userData, name,
10748 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010749 if (ctxt->nsNr - nsNr > 0)
10750 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010751#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010752 } else {
10753 if ((ctxt->sax != NULL) &&
10754 (ctxt->sax->endElement != NULL) &&
10755 (!ctxt->disableSAX))
10756 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010757#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010758 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010759 spacePop(ctxt);
10760 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010761 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010762 } else {
10763 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010764 }
10765 break;
10766 }
10767 if (RAW == '>') {
10768 NEXT;
10769 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010770 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010771 "Couldn't find end of Start Tag %s\n",
10772 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010773 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010774 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010775 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010776 if (ctxt->sax2)
10777 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010778#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010779 else
10780 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010781#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010782
Daniel Veillarda880b122003-04-21 21:36:41 +000010783 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010784 break;
10785 }
10786 case XML_PARSER_CONTENT: {
10787 const xmlChar *test;
10788 unsigned int cons;
10789 if ((avail < 2) && (ctxt->inputNr == 1))
10790 goto done;
10791 cur = ctxt->input->cur[0];
10792 next = ctxt->input->cur[1];
10793
10794 test = CUR_PTR;
10795 cons = ctxt->input->consumed;
10796 if ((cur == '<') && (next == '/')) {
10797 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010798 break;
10799 } else if ((cur == '<') && (next == '?')) {
10800 if ((!terminate) &&
10801 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10802 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010803 xmlParsePI(ctxt);
10804 } else if ((cur == '<') && (next != '!')) {
10805 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010806 break;
10807 } else if ((cur == '<') && (next == '!') &&
10808 (ctxt->input->cur[2] == '-') &&
10809 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010810 int term;
10811
10812 if (avail < 4)
10813 goto done;
10814 ctxt->input->cur += 4;
10815 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10816 ctxt->input->cur -= 4;
10817 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010818 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010819 xmlParseComment(ctxt);
10820 ctxt->instate = XML_PARSER_CONTENT;
10821 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10822 (ctxt->input->cur[2] == '[') &&
10823 (ctxt->input->cur[3] == 'C') &&
10824 (ctxt->input->cur[4] == 'D') &&
10825 (ctxt->input->cur[5] == 'A') &&
10826 (ctxt->input->cur[6] == 'T') &&
10827 (ctxt->input->cur[7] == 'A') &&
10828 (ctxt->input->cur[8] == '[')) {
10829 SKIP(9);
10830 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010831 break;
10832 } else if ((cur == '<') && (next == '!') &&
10833 (avail < 9)) {
10834 goto done;
10835 } else if (cur == '&') {
10836 if ((!terminate) &&
10837 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10838 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010839 xmlParseReference(ctxt);
10840 } else {
10841 /* TODO Avoid the extra copy, handle directly !!! */
10842 /*
10843 * Goal of the following test is:
10844 * - minimize calls to the SAX 'character' callback
10845 * when they are mergeable
10846 * - handle an problem for isBlank when we only parse
10847 * a sequence of blank chars and the next one is
10848 * not available to check against '<' presence.
10849 * - tries to homogenize the differences in SAX
10850 * callbacks between the push and pull versions
10851 * of the parser.
10852 */
10853 if ((ctxt->inputNr == 1) &&
10854 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10855 if (!terminate) {
10856 if (ctxt->progressive) {
10857 if ((lastlt == NULL) ||
10858 (ctxt->input->cur > lastlt))
10859 goto done;
10860 } else if (xmlParseLookupSequence(ctxt,
10861 '<', 0, 0) < 0) {
10862 goto done;
10863 }
10864 }
10865 }
10866 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010867 xmlParseCharData(ctxt, 0);
10868 }
10869 /*
10870 * Pop-up of finished entities.
10871 */
10872 while ((RAW == 0) && (ctxt->inputNr > 1))
10873 xmlPopInput(ctxt);
10874 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010875 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10876 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010877 ctxt->instate = XML_PARSER_EOF;
10878 break;
10879 }
10880 break;
10881 }
10882 case XML_PARSER_END_TAG:
10883 if (avail < 2)
10884 goto done;
10885 if (!terminate) {
10886 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010887 /* > can be found unescaped in attribute values */
10888 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010889 goto done;
10890 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10891 goto done;
10892 }
10893 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010894 if (ctxt->sax2) {
10895 xmlParseEndTag2(ctxt,
10896 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10897 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010898 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010899 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010900 }
10901#ifdef LIBXML_SAX1_ENABLED
10902 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010903 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010904#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010905 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010906 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010907 } else {
10908 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010909 }
10910 break;
10911 case XML_PARSER_CDATA_SECTION: {
10912 /*
10913 * The Push mode need to have the SAX callback for
10914 * cdataBlock merge back contiguous callbacks.
10915 */
10916 int base;
10917
10918 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10919 if (base < 0) {
10920 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010921 int tmp;
10922
10923 tmp = xmlCheckCdataPush(ctxt->input->cur,
10924 XML_PARSER_BIG_BUFFER_SIZE);
10925 if (tmp < 0) {
10926 tmp = -tmp;
10927 ctxt->input->cur += tmp;
10928 goto encoding_error;
10929 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010930 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10931 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010932 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010933 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010934 else if (ctxt->sax->characters != NULL)
10935 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010936 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010937 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010938 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010939 ctxt->checkIndex = 0;
10940 }
10941 goto done;
10942 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010943 int tmp;
10944
10945 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10946 if ((tmp < 0) || (tmp != base)) {
10947 tmp = -tmp;
10948 ctxt->input->cur += tmp;
10949 goto encoding_error;
10950 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010951 if ((ctxt->sax != NULL) && (base == 0) &&
10952 (ctxt->sax->cdataBlock != NULL) &&
10953 (!ctxt->disableSAX)) {
10954 /*
10955 * Special case to provide identical behaviour
10956 * between pull and push parsers on enpty CDATA
10957 * sections
10958 */
10959 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10960 (!strncmp((const char *)&ctxt->input->cur[-9],
10961 "<![CDATA[", 9)))
10962 ctxt->sax->cdataBlock(ctxt->userData,
10963 BAD_CAST "", 0);
10964 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010965 (!ctxt->disableSAX)) {
10966 if (ctxt->sax->cdataBlock != NULL)
10967 ctxt->sax->cdataBlock(ctxt->userData,
10968 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010969 else if (ctxt->sax->characters != NULL)
10970 ctxt->sax->characters(ctxt->userData,
10971 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010972 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010973 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010974 ctxt->checkIndex = 0;
10975 ctxt->instate = XML_PARSER_CONTENT;
10976#ifdef DEBUG_PUSH
10977 xmlGenericError(xmlGenericErrorContext,
10978 "PP: entering CONTENT\n");
10979#endif
10980 }
10981 break;
10982 }
Owen Taylor3473f882001-02-23 17:55:21 +000010983 case XML_PARSER_MISC:
10984 SKIP_BLANKS;
10985 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010986 avail = ctxt->input->length -
10987 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010988 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010989 avail = ctxt->input->buf->buffer->use -
10990 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010991 if (avail < 2)
10992 goto done;
10993 cur = ctxt->input->cur[0];
10994 next = ctxt->input->cur[1];
10995 if ((cur == '<') && (next == '?')) {
10996 if ((!terminate) &&
10997 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10998 goto done;
10999#ifdef DEBUG_PUSH
11000 xmlGenericError(xmlGenericErrorContext,
11001 "PP: Parsing PI\n");
11002#endif
11003 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011004 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011005 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011006 (ctxt->input->cur[2] == '-') &&
11007 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011008 if ((!terminate) &&
11009 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11010 goto done;
11011#ifdef DEBUG_PUSH
11012 xmlGenericError(xmlGenericErrorContext,
11013 "PP: Parsing Comment\n");
11014#endif
11015 xmlParseComment(ctxt);
11016 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011017 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011018 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011019 (ctxt->input->cur[2] == 'D') &&
11020 (ctxt->input->cur[3] == 'O') &&
11021 (ctxt->input->cur[4] == 'C') &&
11022 (ctxt->input->cur[5] == 'T') &&
11023 (ctxt->input->cur[6] == 'Y') &&
11024 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011025 (ctxt->input->cur[8] == 'E')) {
11026 if ((!terminate) &&
11027 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11028 goto done;
11029#ifdef DEBUG_PUSH
11030 xmlGenericError(xmlGenericErrorContext,
11031 "PP: Parsing internal subset\n");
11032#endif
11033 ctxt->inSubset = 1;
11034 xmlParseDocTypeDecl(ctxt);
11035 if (RAW == '[') {
11036 ctxt->instate = XML_PARSER_DTD;
11037#ifdef DEBUG_PUSH
11038 xmlGenericError(xmlGenericErrorContext,
11039 "PP: entering DTD\n");
11040#endif
11041 } else {
11042 /*
11043 * Create and update the external subset.
11044 */
11045 ctxt->inSubset = 2;
11046 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11047 (ctxt->sax->externalSubset != NULL))
11048 ctxt->sax->externalSubset(ctxt->userData,
11049 ctxt->intSubName, ctxt->extSubSystem,
11050 ctxt->extSubURI);
11051 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011052 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011053 ctxt->instate = XML_PARSER_PROLOG;
11054#ifdef DEBUG_PUSH
11055 xmlGenericError(xmlGenericErrorContext,
11056 "PP: entering PROLOG\n");
11057#endif
11058 }
11059 } else if ((cur == '<') && (next == '!') &&
11060 (avail < 9)) {
11061 goto done;
11062 } else {
11063 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011064 ctxt->progressive = 1;
11065 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011066#ifdef DEBUG_PUSH
11067 xmlGenericError(xmlGenericErrorContext,
11068 "PP: entering START_TAG\n");
11069#endif
11070 }
11071 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011072 case XML_PARSER_PROLOG:
11073 SKIP_BLANKS;
11074 if (ctxt->input->buf == NULL)
11075 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11076 else
11077 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11078 if (avail < 2)
11079 goto done;
11080 cur = ctxt->input->cur[0];
11081 next = ctxt->input->cur[1];
11082 if ((cur == '<') && (next == '?')) {
11083 if ((!terminate) &&
11084 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11085 goto done;
11086#ifdef DEBUG_PUSH
11087 xmlGenericError(xmlGenericErrorContext,
11088 "PP: Parsing PI\n");
11089#endif
11090 xmlParsePI(ctxt);
11091 } else if ((cur == '<') && (next == '!') &&
11092 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11093 if ((!terminate) &&
11094 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11095 goto done;
11096#ifdef DEBUG_PUSH
11097 xmlGenericError(xmlGenericErrorContext,
11098 "PP: Parsing Comment\n");
11099#endif
11100 xmlParseComment(ctxt);
11101 ctxt->instate = XML_PARSER_PROLOG;
11102 } else if ((cur == '<') && (next == '!') &&
11103 (avail < 4)) {
11104 goto done;
11105 } else {
11106 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011107 if (ctxt->progressive == 0)
11108 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011109 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011110#ifdef DEBUG_PUSH
11111 xmlGenericError(xmlGenericErrorContext,
11112 "PP: entering START_TAG\n");
11113#endif
11114 }
11115 break;
11116 case XML_PARSER_EPILOG:
11117 SKIP_BLANKS;
11118 if (ctxt->input->buf == NULL)
11119 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11120 else
11121 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11122 if (avail < 2)
11123 goto done;
11124 cur = ctxt->input->cur[0];
11125 next = ctxt->input->cur[1];
11126 if ((cur == '<') && (next == '?')) {
11127 if ((!terminate) &&
11128 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11129 goto done;
11130#ifdef DEBUG_PUSH
11131 xmlGenericError(xmlGenericErrorContext,
11132 "PP: Parsing PI\n");
11133#endif
11134 xmlParsePI(ctxt);
11135 ctxt->instate = XML_PARSER_EPILOG;
11136 } else if ((cur == '<') && (next == '!') &&
11137 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11138 if ((!terminate) &&
11139 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11140 goto done;
11141#ifdef DEBUG_PUSH
11142 xmlGenericError(xmlGenericErrorContext,
11143 "PP: Parsing Comment\n");
11144#endif
11145 xmlParseComment(ctxt);
11146 ctxt->instate = XML_PARSER_EPILOG;
11147 } else if ((cur == '<') && (next == '!') &&
11148 (avail < 4)) {
11149 goto done;
11150 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011151 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011152 ctxt->instate = XML_PARSER_EOF;
11153#ifdef DEBUG_PUSH
11154 xmlGenericError(xmlGenericErrorContext,
11155 "PP: entering EOF\n");
11156#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011157 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011158 ctxt->sax->endDocument(ctxt->userData);
11159 goto done;
11160 }
11161 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011162 case XML_PARSER_DTD: {
11163 /*
11164 * Sorry but progressive parsing of the internal subset
11165 * is not expected to be supported. We first check that
11166 * the full content of the internal subset is available and
11167 * the parsing is launched only at that point.
11168 * Internal subset ends up with "']' S? '>'" in an unescaped
11169 * section and not in a ']]>' sequence which are conditional
11170 * sections (whoever argued to keep that crap in XML deserve
11171 * a place in hell !).
11172 */
11173 int base, i;
11174 xmlChar *buf;
11175 xmlChar quote = 0;
11176
11177 base = ctxt->input->cur - ctxt->input->base;
11178 if (base < 0) return(0);
11179 if (ctxt->checkIndex > base)
11180 base = ctxt->checkIndex;
11181 buf = ctxt->input->buf->buffer->content;
11182 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11183 base++) {
11184 if (quote != 0) {
11185 if (buf[base] == quote)
11186 quote = 0;
11187 continue;
11188 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011189 if ((quote == 0) && (buf[base] == '<')) {
11190 int found = 0;
11191 /* special handling of comments */
11192 if (((unsigned int) base + 4 <
11193 ctxt->input->buf->buffer->use) &&
11194 (buf[base + 1] == '!') &&
11195 (buf[base + 2] == '-') &&
11196 (buf[base + 3] == '-')) {
11197 for (;(unsigned int) base + 3 <
11198 ctxt->input->buf->buffer->use; base++) {
11199 if ((buf[base] == '-') &&
11200 (buf[base + 1] == '-') &&
11201 (buf[base + 2] == '>')) {
11202 found = 1;
11203 base += 2;
11204 break;
11205 }
11206 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011207 if (!found) {
11208#if 0
11209 fprintf(stderr, "unfinished comment\n");
11210#endif
11211 break; /* for */
11212 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011213 continue;
11214 }
11215 }
Owen Taylor3473f882001-02-23 17:55:21 +000011216 if (buf[base] == '"') {
11217 quote = '"';
11218 continue;
11219 }
11220 if (buf[base] == '\'') {
11221 quote = '\'';
11222 continue;
11223 }
11224 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011225#if 0
11226 fprintf(stderr, "%c%c%c%c: ", buf[base],
11227 buf[base + 1], buf[base + 2], buf[base + 3]);
11228#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011229 if ((unsigned int) base +1 >=
11230 ctxt->input->buf->buffer->use)
11231 break;
11232 if (buf[base + 1] == ']') {
11233 /* conditional crap, skip both ']' ! */
11234 base++;
11235 continue;
11236 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011237 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011238 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11239 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011240 if (buf[base + i] == '>') {
11241#if 0
11242 fprintf(stderr, "found\n");
11243#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011244 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011245 }
11246 if (!IS_BLANK_CH(buf[base + i])) {
11247#if 0
11248 fprintf(stderr, "not found\n");
11249#endif
11250 goto not_end_of_int_subset;
11251 }
Owen Taylor3473f882001-02-23 17:55:21 +000011252 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011253#if 0
11254 fprintf(stderr, "end of stream\n");
11255#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011256 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011257
Owen Taylor3473f882001-02-23 17:55:21 +000011258 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011259not_end_of_int_subset:
11260 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011261 }
11262 /*
11263 * We didn't found the end of the Internal subset
11264 */
Owen Taylor3473f882001-02-23 17:55:21 +000011265#ifdef DEBUG_PUSH
11266 if (next == 0)
11267 xmlGenericError(xmlGenericErrorContext,
11268 "PP: lookup of int subset end filed\n");
11269#endif
11270 goto done;
11271
11272found_end_int_subset:
11273 xmlParseInternalSubset(ctxt);
11274 ctxt->inSubset = 2;
11275 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11276 (ctxt->sax->externalSubset != NULL))
11277 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11278 ctxt->extSubSystem, ctxt->extSubURI);
11279 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011280 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011281 ctxt->instate = XML_PARSER_PROLOG;
11282 ctxt->checkIndex = 0;
11283#ifdef DEBUG_PUSH
11284 xmlGenericError(xmlGenericErrorContext,
11285 "PP: entering PROLOG\n");
11286#endif
11287 break;
11288 }
11289 case XML_PARSER_COMMENT:
11290 xmlGenericError(xmlGenericErrorContext,
11291 "PP: internal error, state == COMMENT\n");
11292 ctxt->instate = XML_PARSER_CONTENT;
11293#ifdef DEBUG_PUSH
11294 xmlGenericError(xmlGenericErrorContext,
11295 "PP: entering CONTENT\n");
11296#endif
11297 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011298 case XML_PARSER_IGNORE:
11299 xmlGenericError(xmlGenericErrorContext,
11300 "PP: internal error, state == IGNORE");
11301 ctxt->instate = XML_PARSER_DTD;
11302#ifdef DEBUG_PUSH
11303 xmlGenericError(xmlGenericErrorContext,
11304 "PP: entering DTD\n");
11305#endif
11306 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011307 case XML_PARSER_PI:
11308 xmlGenericError(xmlGenericErrorContext,
11309 "PP: internal error, state == PI\n");
11310 ctxt->instate = XML_PARSER_CONTENT;
11311#ifdef DEBUG_PUSH
11312 xmlGenericError(xmlGenericErrorContext,
11313 "PP: entering CONTENT\n");
11314#endif
11315 break;
11316 case XML_PARSER_ENTITY_DECL:
11317 xmlGenericError(xmlGenericErrorContext,
11318 "PP: internal error, state == ENTITY_DECL\n");
11319 ctxt->instate = XML_PARSER_DTD;
11320#ifdef DEBUG_PUSH
11321 xmlGenericError(xmlGenericErrorContext,
11322 "PP: entering DTD\n");
11323#endif
11324 break;
11325 case XML_PARSER_ENTITY_VALUE:
11326 xmlGenericError(xmlGenericErrorContext,
11327 "PP: internal error, state == ENTITY_VALUE\n");
11328 ctxt->instate = XML_PARSER_CONTENT;
11329#ifdef DEBUG_PUSH
11330 xmlGenericError(xmlGenericErrorContext,
11331 "PP: entering DTD\n");
11332#endif
11333 break;
11334 case XML_PARSER_ATTRIBUTE_VALUE:
11335 xmlGenericError(xmlGenericErrorContext,
11336 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11337 ctxt->instate = XML_PARSER_START_TAG;
11338#ifdef DEBUG_PUSH
11339 xmlGenericError(xmlGenericErrorContext,
11340 "PP: entering START_TAG\n");
11341#endif
11342 break;
11343 case XML_PARSER_SYSTEM_LITERAL:
11344 xmlGenericError(xmlGenericErrorContext,
11345 "PP: internal error, state == SYSTEM_LITERAL\n");
11346 ctxt->instate = XML_PARSER_START_TAG;
11347#ifdef DEBUG_PUSH
11348 xmlGenericError(xmlGenericErrorContext,
11349 "PP: entering START_TAG\n");
11350#endif
11351 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011352 case XML_PARSER_PUBLIC_LITERAL:
11353 xmlGenericError(xmlGenericErrorContext,
11354 "PP: internal error, state == PUBLIC_LITERAL\n");
11355 ctxt->instate = XML_PARSER_START_TAG;
11356#ifdef DEBUG_PUSH
11357 xmlGenericError(xmlGenericErrorContext,
11358 "PP: entering START_TAG\n");
11359#endif
11360 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011361 }
11362 }
11363done:
11364#ifdef DEBUG_PUSH
11365 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11366#endif
11367 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011368encoding_error:
11369 {
11370 char buffer[150];
11371
11372 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11373 ctxt->input->cur[0], ctxt->input->cur[1],
11374 ctxt->input->cur[2], ctxt->input->cur[3]);
11375 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11376 "Input is not proper UTF-8, indicate encoding !\n%s",
11377 BAD_CAST buffer, NULL);
11378 }
11379 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011380}
11381
11382/**
Owen Taylor3473f882001-02-23 17:55:21 +000011383 * xmlParseChunk:
11384 * @ctxt: an XML parser context
11385 * @chunk: an char array
11386 * @size: the size in byte of the chunk
11387 * @terminate: last chunk indicator
11388 *
11389 * Parse a Chunk of memory
11390 *
11391 * Returns zero if no error, the xmlParserErrors otherwise.
11392 */
11393int
11394xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11395 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011396 int end_in_lf = 0;
11397
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011398 if (ctxt == NULL)
11399 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011400 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011401 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011402 if (ctxt->instate == XML_PARSER_START)
11403 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011404 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11405 (chunk[size - 1] == '\r')) {
11406 end_in_lf = 1;
11407 size--;
11408 }
Owen Taylor3473f882001-02-23 17:55:21 +000011409 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11410 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11411 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11412 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011413 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011414
William M. Bracka3215c72004-07-31 16:24:01 +000011415 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11416 if (res < 0) {
11417 ctxt->errNo = XML_PARSER_EOF;
11418 ctxt->disableSAX = 1;
11419 return (XML_PARSER_EOF);
11420 }
Owen Taylor3473f882001-02-23 17:55:21 +000011421 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11422 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011423 ctxt->input->end =
11424 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011425#ifdef DEBUG_PUSH
11426 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11427#endif
11428
Owen Taylor3473f882001-02-23 17:55:21 +000011429 } else if (ctxt->instate != XML_PARSER_EOF) {
11430 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11431 xmlParserInputBufferPtr in = ctxt->input->buf;
11432 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11433 (in->raw != NULL)) {
11434 int nbchars;
11435
11436 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11437 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011438 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011439 xmlGenericError(xmlGenericErrorContext,
11440 "xmlParseChunk: encoder error\n");
11441 return(XML_ERR_INVALID_ENCODING);
11442 }
11443 }
11444 }
11445 }
11446 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011447 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11448 (ctxt->input->buf != NULL)) {
11449 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11450 }
Daniel Veillard14412512005-01-21 23:53:26 +000011451 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011452 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011453 if (terminate) {
11454 /*
11455 * Check for termination
11456 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011457 int avail = 0;
11458
11459 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011460 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011461 avail = ctxt->input->length -
11462 (ctxt->input->cur - ctxt->input->base);
11463 else
11464 avail = ctxt->input->buf->buffer->use -
11465 (ctxt->input->cur - ctxt->input->base);
11466 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011467
Owen Taylor3473f882001-02-23 17:55:21 +000011468 if ((ctxt->instate != XML_PARSER_EOF) &&
11469 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011470 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011471 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011472 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011473 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011474 }
Owen Taylor3473f882001-02-23 17:55:21 +000011475 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011476 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011477 ctxt->sax->endDocument(ctxt->userData);
11478 }
11479 ctxt->instate = XML_PARSER_EOF;
11480 }
11481 return((xmlParserErrors) ctxt->errNo);
11482}
11483
11484/************************************************************************
11485 * *
11486 * I/O front end functions to the parser *
11487 * *
11488 ************************************************************************/
11489
11490/**
Owen Taylor3473f882001-02-23 17:55:21 +000011491 * xmlCreatePushParserCtxt:
11492 * @sax: a SAX handler
11493 * @user_data: The user data returned on SAX callbacks
11494 * @chunk: a pointer to an array of chars
11495 * @size: number of chars in the array
11496 * @filename: an optional file name or URI
11497 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011498 * Create a parser context for using the XML parser in push mode.
11499 * If @buffer and @size are non-NULL, the data is used to detect
11500 * the encoding. The remaining characters will be parsed so they
11501 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011502 * To allow content encoding detection, @size should be >= 4
11503 * The value of @filename is used for fetching external entities
11504 * and error/warning reports.
11505 *
11506 * Returns the new parser context or NULL
11507 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011508
Owen Taylor3473f882001-02-23 17:55:21 +000011509xmlParserCtxtPtr
11510xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11511 const char *chunk, int size, const char *filename) {
11512 xmlParserCtxtPtr ctxt;
11513 xmlParserInputPtr inputStream;
11514 xmlParserInputBufferPtr buf;
11515 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11516
11517 /*
11518 * plug some encoding conversion routines
11519 */
11520 if ((chunk != NULL) && (size >= 4))
11521 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11522
11523 buf = xmlAllocParserInputBuffer(enc);
11524 if (buf == NULL) return(NULL);
11525
11526 ctxt = xmlNewParserCtxt();
11527 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011528 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011529 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011530 return(NULL);
11531 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011532 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011533 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11534 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011535 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011536 xmlFreeParserInputBuffer(buf);
11537 xmlFreeParserCtxt(ctxt);
11538 return(NULL);
11539 }
Owen Taylor3473f882001-02-23 17:55:21 +000011540 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011541#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011542 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011543#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011544 xmlFree(ctxt->sax);
11545 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11546 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011547 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011548 xmlFreeParserInputBuffer(buf);
11549 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011550 return(NULL);
11551 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011552 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11553 if (sax->initialized == XML_SAX2_MAGIC)
11554 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11555 else
11556 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011557 if (user_data != NULL)
11558 ctxt->userData = user_data;
11559 }
11560 if (filename == NULL) {
11561 ctxt->directory = NULL;
11562 } else {
11563 ctxt->directory = xmlParserGetDirectory(filename);
11564 }
11565
11566 inputStream = xmlNewInputStream(ctxt);
11567 if (inputStream == NULL) {
11568 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011569 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011570 return(NULL);
11571 }
11572
11573 if (filename == NULL)
11574 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011575 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011576 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011577 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011578 if (inputStream->filename == NULL) {
11579 xmlFreeParserCtxt(ctxt);
11580 xmlFreeParserInputBuffer(buf);
11581 return(NULL);
11582 }
11583 }
Owen Taylor3473f882001-02-23 17:55:21 +000011584 inputStream->buf = buf;
11585 inputStream->base = inputStream->buf->buffer->content;
11586 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011587 inputStream->end =
11588 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011589
11590 inputPush(ctxt, inputStream);
11591
William M. Brack3a1cd212005-02-11 14:35:54 +000011592 /*
11593 * If the caller didn't provide an initial 'chunk' for determining
11594 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11595 * that it can be automatically determined later
11596 */
11597 if ((size == 0) || (chunk == NULL)) {
11598 ctxt->charset = XML_CHAR_ENCODING_NONE;
11599 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011600 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11601 int cur = ctxt->input->cur - ctxt->input->base;
11602
Owen Taylor3473f882001-02-23 17:55:21 +000011603 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011604
11605 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11606 ctxt->input->cur = ctxt->input->base + cur;
11607 ctxt->input->end =
11608 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011609#ifdef DEBUG_PUSH
11610 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11611#endif
11612 }
11613
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011614 if (enc != XML_CHAR_ENCODING_NONE) {
11615 xmlSwitchEncoding(ctxt, enc);
11616 }
11617
Owen Taylor3473f882001-02-23 17:55:21 +000011618 return(ctxt);
11619}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011620#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011621
11622/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011623 * xmlStopParser:
11624 * @ctxt: an XML parser context
11625 *
11626 * Blocks further parser processing
11627 */
11628void
11629xmlStopParser(xmlParserCtxtPtr ctxt) {
11630 if (ctxt == NULL)
11631 return;
11632 ctxt->instate = XML_PARSER_EOF;
11633 ctxt->disableSAX = 1;
11634 if (ctxt->input != NULL) {
11635 ctxt->input->cur = BAD_CAST"";
11636 ctxt->input->base = ctxt->input->cur;
11637 }
11638}
11639
11640/**
Owen Taylor3473f882001-02-23 17:55:21 +000011641 * xmlCreateIOParserCtxt:
11642 * @sax: a SAX handler
11643 * @user_data: The user data returned on SAX callbacks
11644 * @ioread: an I/O read function
11645 * @ioclose: an I/O close function
11646 * @ioctx: an I/O handler
11647 * @enc: the charset encoding if known
11648 *
11649 * Create a parser context for using the XML parser with an existing
11650 * I/O stream
11651 *
11652 * Returns the new parser context or NULL
11653 */
11654xmlParserCtxtPtr
11655xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11656 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11657 void *ioctx, xmlCharEncoding enc) {
11658 xmlParserCtxtPtr ctxt;
11659 xmlParserInputPtr inputStream;
11660 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011661
11662 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011663
11664 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11665 if (buf == NULL) return(NULL);
11666
11667 ctxt = xmlNewParserCtxt();
11668 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011669 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011670 return(NULL);
11671 }
11672 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011673#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011674 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011675#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011676 xmlFree(ctxt->sax);
11677 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11678 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011679 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011680 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011681 return(NULL);
11682 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011683 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11684 if (sax->initialized == XML_SAX2_MAGIC)
11685 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11686 else
11687 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011688 if (user_data != NULL)
11689 ctxt->userData = user_data;
11690 }
11691
11692 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11693 if (inputStream == NULL) {
11694 xmlFreeParserCtxt(ctxt);
11695 return(NULL);
11696 }
11697 inputPush(ctxt, inputStream);
11698
11699 return(ctxt);
11700}
11701
Daniel Veillard4432df22003-09-28 18:58:27 +000011702#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011703/************************************************************************
11704 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011705 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011706 * *
11707 ************************************************************************/
11708
11709/**
11710 * xmlIOParseDTD:
11711 * @sax: the SAX handler block or NULL
11712 * @input: an Input Buffer
11713 * @enc: the charset encoding if known
11714 *
11715 * Load and parse a DTD
11716 *
11717 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011718 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011719 */
11720
11721xmlDtdPtr
11722xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11723 xmlCharEncoding enc) {
11724 xmlDtdPtr ret = NULL;
11725 xmlParserCtxtPtr ctxt;
11726 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011727 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011728
11729 if (input == NULL)
11730 return(NULL);
11731
11732 ctxt = xmlNewParserCtxt();
11733 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011734 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011735 return(NULL);
11736 }
11737
11738 /*
11739 * Set-up the SAX context
11740 */
11741 if (sax != NULL) {
11742 if (ctxt->sax != NULL)
11743 xmlFree(ctxt->sax);
11744 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011745 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011746 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011747 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011748
11749 /*
11750 * generate a parser input from the I/O handler
11751 */
11752
Daniel Veillard43caefb2003-12-07 19:32:22 +000011753 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011754 if (pinput == NULL) {
11755 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011756 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011757 xmlFreeParserCtxt(ctxt);
11758 return(NULL);
11759 }
11760
11761 /*
11762 * plug some encoding conversion routines here.
11763 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011764 if (xmlPushInput(ctxt, pinput) < 0) {
11765 if (sax != NULL) ctxt->sax = NULL;
11766 xmlFreeParserCtxt(ctxt);
11767 return(NULL);
11768 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011769 if (enc != XML_CHAR_ENCODING_NONE) {
11770 xmlSwitchEncoding(ctxt, enc);
11771 }
Owen Taylor3473f882001-02-23 17:55:21 +000011772
11773 pinput->filename = NULL;
11774 pinput->line = 1;
11775 pinput->col = 1;
11776 pinput->base = ctxt->input->cur;
11777 pinput->cur = ctxt->input->cur;
11778 pinput->free = NULL;
11779
11780 /*
11781 * let's parse that entity knowing it's an external subset.
11782 */
11783 ctxt->inSubset = 2;
11784 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011785 if (ctxt->myDoc == NULL) {
11786 xmlErrMemory(ctxt, "New Doc failed");
11787 return(NULL);
11788 }
11789 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011790 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11791 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011792
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011793 if ((enc == XML_CHAR_ENCODING_NONE) &&
11794 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011795 /*
11796 * Get the 4 first bytes and decode the charset
11797 * if enc != XML_CHAR_ENCODING_NONE
11798 * plug some encoding conversion routines.
11799 */
11800 start[0] = RAW;
11801 start[1] = NXT(1);
11802 start[2] = NXT(2);
11803 start[3] = NXT(3);
11804 enc = xmlDetectCharEncoding(start, 4);
11805 if (enc != XML_CHAR_ENCODING_NONE) {
11806 xmlSwitchEncoding(ctxt, enc);
11807 }
11808 }
11809
Owen Taylor3473f882001-02-23 17:55:21 +000011810 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11811
11812 if (ctxt->myDoc != NULL) {
11813 if (ctxt->wellFormed) {
11814 ret = ctxt->myDoc->extSubset;
11815 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011816 if (ret != NULL) {
11817 xmlNodePtr tmp;
11818
11819 ret->doc = NULL;
11820 tmp = ret->children;
11821 while (tmp != NULL) {
11822 tmp->doc = NULL;
11823 tmp = tmp->next;
11824 }
11825 }
Owen Taylor3473f882001-02-23 17:55:21 +000011826 } else {
11827 ret = NULL;
11828 }
11829 xmlFreeDoc(ctxt->myDoc);
11830 ctxt->myDoc = NULL;
11831 }
11832 if (sax != NULL) ctxt->sax = NULL;
11833 xmlFreeParserCtxt(ctxt);
11834
11835 return(ret);
11836}
11837
11838/**
11839 * xmlSAXParseDTD:
11840 * @sax: the SAX handler block
11841 * @ExternalID: a NAME* containing the External ID of the DTD
11842 * @SystemID: a NAME* containing the URL to the DTD
11843 *
11844 * Load and parse an external subset.
11845 *
11846 * Returns the resulting xmlDtdPtr or NULL in case of error.
11847 */
11848
11849xmlDtdPtr
11850xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11851 const xmlChar *SystemID) {
11852 xmlDtdPtr ret = NULL;
11853 xmlParserCtxtPtr ctxt;
11854 xmlParserInputPtr input = NULL;
11855 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011856 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011857
11858 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11859
11860 ctxt = xmlNewParserCtxt();
11861 if (ctxt == NULL) {
11862 return(NULL);
11863 }
11864
11865 /*
11866 * Set-up the SAX context
11867 */
11868 if (sax != NULL) {
11869 if (ctxt->sax != NULL)
11870 xmlFree(ctxt->sax);
11871 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011872 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011873 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011874
11875 /*
11876 * Canonicalise the system ID
11877 */
11878 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011879 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011880 xmlFreeParserCtxt(ctxt);
11881 return(NULL);
11882 }
Owen Taylor3473f882001-02-23 17:55:21 +000011883
11884 /*
11885 * Ask the Entity resolver to load the damn thing
11886 */
11887
11888 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011889 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11890 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011891 if (input == NULL) {
11892 if (sax != NULL) ctxt->sax = NULL;
11893 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011894 if (systemIdCanonic != NULL)
11895 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011896 return(NULL);
11897 }
11898
11899 /*
11900 * plug some encoding conversion routines here.
11901 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011902 if (xmlPushInput(ctxt, input) < 0) {
11903 if (sax != NULL) ctxt->sax = NULL;
11904 xmlFreeParserCtxt(ctxt);
11905 if (systemIdCanonic != NULL)
11906 xmlFree(systemIdCanonic);
11907 return(NULL);
11908 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011909 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11910 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11911 xmlSwitchEncoding(ctxt, enc);
11912 }
Owen Taylor3473f882001-02-23 17:55:21 +000011913
11914 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011915 input->filename = (char *) systemIdCanonic;
11916 else
11917 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011918 input->line = 1;
11919 input->col = 1;
11920 input->base = ctxt->input->cur;
11921 input->cur = ctxt->input->cur;
11922 input->free = NULL;
11923
11924 /*
11925 * let's parse that entity knowing it's an external subset.
11926 */
11927 ctxt->inSubset = 2;
11928 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011929 if (ctxt->myDoc == NULL) {
11930 xmlErrMemory(ctxt, "New Doc failed");
11931 if (sax != NULL) ctxt->sax = NULL;
11932 xmlFreeParserCtxt(ctxt);
11933 return(NULL);
11934 }
11935 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011936 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11937 ExternalID, SystemID);
11938 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11939
11940 if (ctxt->myDoc != NULL) {
11941 if (ctxt->wellFormed) {
11942 ret = ctxt->myDoc->extSubset;
11943 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011944 if (ret != NULL) {
11945 xmlNodePtr tmp;
11946
11947 ret->doc = NULL;
11948 tmp = ret->children;
11949 while (tmp != NULL) {
11950 tmp->doc = NULL;
11951 tmp = tmp->next;
11952 }
11953 }
Owen Taylor3473f882001-02-23 17:55:21 +000011954 } else {
11955 ret = NULL;
11956 }
11957 xmlFreeDoc(ctxt->myDoc);
11958 ctxt->myDoc = NULL;
11959 }
11960 if (sax != NULL) ctxt->sax = NULL;
11961 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000011962
Owen Taylor3473f882001-02-23 17:55:21 +000011963 return(ret);
11964}
11965
Daniel Veillard4432df22003-09-28 18:58:27 +000011966
Owen Taylor3473f882001-02-23 17:55:21 +000011967/**
11968 * xmlParseDTD:
11969 * @ExternalID: a NAME* containing the External ID of the DTD
11970 * @SystemID: a NAME* containing the URL to the DTD
11971 *
11972 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000011973 *
Owen Taylor3473f882001-02-23 17:55:21 +000011974 * Returns the resulting xmlDtdPtr or NULL in case of error.
11975 */
11976
11977xmlDtdPtr
11978xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11979 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11980}
Daniel Veillard4432df22003-09-28 18:58:27 +000011981#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011982
11983/************************************************************************
11984 * *
11985 * Front ends when parsing an Entity *
11986 * *
11987 ************************************************************************/
11988
11989/**
Owen Taylor3473f882001-02-23 17:55:21 +000011990 * xmlParseCtxtExternalEntity:
11991 * @ctx: the existing parsing context
11992 * @URL: the URL for the entity to load
11993 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011994 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011995 *
11996 * Parse an external general entity within an existing parsing context
11997 * An external general parsed entity is well-formed if it matches the
11998 * production labeled extParsedEnt.
11999 *
12000 * [78] extParsedEnt ::= TextDecl? content
12001 *
12002 * Returns 0 if the entity is well formed, -1 in case of args problem and
12003 * the parser error code otherwise
12004 */
12005
12006int
12007xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012008 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012009 xmlParserCtxtPtr ctxt;
12010 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012011 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012012 xmlSAXHandlerPtr oldsax = NULL;
12013 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012014 xmlChar start[4];
12015 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012016 xmlParserInputPtr inputStream;
12017 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012018
Daniel Veillardce682bc2004-11-05 17:22:25 +000012019 if (ctx == NULL) return(-1);
12020
Daniel Veillard0161e632008-08-28 15:36:32 +000012021 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12022 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012023 return(XML_ERR_ENTITY_LOOP);
12024 }
12025
Daniel Veillardcda96922001-08-21 10:56:31 +000012026 if (lst != NULL)
12027 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012028 if ((URL == NULL) && (ID == NULL))
12029 return(-1);
12030 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12031 return(-1);
12032
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012033 ctxt = xmlNewParserCtxt();
12034 if (ctxt == NULL) {
12035 return(-1);
12036 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012037
Owen Taylor3473f882001-02-23 17:55:21 +000012038 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000012039 ctxt->_private = ctx->_private;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012040
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012041 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12042 if (inputStream == NULL) {
12043 xmlFreeParserCtxt(ctxt);
12044 return(-1);
12045 }
12046
12047 inputPush(ctxt, inputStream);
12048
12049 if ((ctxt->directory == NULL) && (directory == NULL))
12050 directory = xmlParserGetDirectory((char *)URL);
12051 if ((ctxt->directory == NULL) && (directory != NULL))
12052 ctxt->directory = directory;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012053
Owen Taylor3473f882001-02-23 17:55:21 +000012054 oldsax = ctxt->sax;
12055 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012056 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012057 newDoc = xmlNewDoc(BAD_CAST "1.0");
12058 if (newDoc == NULL) {
12059 xmlFreeParserCtxt(ctxt);
12060 return(-1);
12061 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012062 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012063 if (ctx->myDoc->dict) {
12064 newDoc->dict = ctx->myDoc->dict;
12065 xmlDictReference(newDoc->dict);
12066 }
Owen Taylor3473f882001-02-23 17:55:21 +000012067 if (ctx->myDoc != NULL) {
12068 newDoc->intSubset = ctx->myDoc->intSubset;
12069 newDoc->extSubset = ctx->myDoc->extSubset;
12070 }
12071 if (ctx->myDoc->URL != NULL) {
12072 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12073 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012074 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12075 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012076 ctxt->sax = oldsax;
12077 xmlFreeParserCtxt(ctxt);
12078 newDoc->intSubset = NULL;
12079 newDoc->extSubset = NULL;
12080 xmlFreeDoc(newDoc);
12081 return(-1);
12082 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012083 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012084 nodePush(ctxt, newDoc->children);
12085 if (ctx->myDoc == NULL) {
12086 ctxt->myDoc = newDoc;
12087 } else {
12088 ctxt->myDoc = ctx->myDoc;
12089 newDoc->children->doc = ctx->myDoc;
12090 }
12091
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012092 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012093 * Get the 4 first bytes and decode the charset
12094 * if enc != XML_CHAR_ENCODING_NONE
12095 * plug some encoding conversion routines.
12096 */
12097 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012098 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12099 start[0] = RAW;
12100 start[1] = NXT(1);
12101 start[2] = NXT(2);
12102 start[3] = NXT(3);
12103 enc = xmlDetectCharEncoding(start, 4);
12104 if (enc != XML_CHAR_ENCODING_NONE) {
12105 xmlSwitchEncoding(ctxt, enc);
12106 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012107 }
12108
Owen Taylor3473f882001-02-23 17:55:21 +000012109 /*
12110 * Parse a possible text declaration first
12111 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012112 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012113 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012114 /*
12115 * An XML-1.0 document can't reference an entity not XML-1.0
12116 */
12117 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12118 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12119 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12120 "Version mismatch between document and entity\n");
12121 }
Owen Taylor3473f882001-02-23 17:55:21 +000012122 }
12123
12124 /*
12125 * Doing validity checking on chunk doesn't make sense
12126 */
12127 ctxt->instate = XML_PARSER_CONTENT;
12128 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012129 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012130 ctxt->loadsubset = ctx->loadsubset;
12131 ctxt->depth = ctx->depth + 1;
12132 ctxt->replaceEntities = ctx->replaceEntities;
12133 if (ctxt->validate) {
12134 ctxt->vctxt.error = ctx->vctxt.error;
12135 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012136 } else {
12137 ctxt->vctxt.error = NULL;
12138 ctxt->vctxt.warning = NULL;
12139 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012140 ctxt->vctxt.nodeTab = NULL;
12141 ctxt->vctxt.nodeNr = 0;
12142 ctxt->vctxt.nodeMax = 0;
12143 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012144 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12145 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012146 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12147 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12148 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012149 ctxt->dictNames = ctx->dictNames;
12150 ctxt->attsDefault = ctx->attsDefault;
12151 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012152 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012153
12154 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012155
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012156 ctx->validate = ctxt->validate;
12157 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012158 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012159 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012160 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012161 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012162 }
12163 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012164 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012165 }
12166
12167 if (!ctxt->wellFormed) {
12168 if (ctxt->errNo == 0)
12169 ret = 1;
12170 else
12171 ret = ctxt->errNo;
12172 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012173 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012174 xmlNodePtr cur;
12175
12176 /*
12177 * Return the newly created nodeset after unlinking it from
12178 * they pseudo parent.
12179 */
12180 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012181 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012182 while (cur != NULL) {
12183 cur->parent = NULL;
12184 cur = cur->next;
12185 }
12186 newDoc->children->children = NULL;
12187 }
12188 ret = 0;
12189 }
12190 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012191 ctxt->dict = NULL;
12192 ctxt->attsDefault = NULL;
12193 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012194 xmlFreeParserCtxt(ctxt);
12195 newDoc->intSubset = NULL;
12196 newDoc->extSubset = NULL;
12197 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012198
Owen Taylor3473f882001-02-23 17:55:21 +000012199 return(ret);
12200}
12201
12202/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012203 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012204 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012205 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012206 * @sax: the SAX handler bloc (possibly NULL)
12207 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12208 * @depth: Used for loop detection, use 0
12209 * @URL: the URL for the entity to load
12210 * @ID: the System ID for the entity to load
12211 * @list: the return value for the set of parsed nodes
12212 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012213 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012214 *
12215 * Returns 0 if the entity is well formed, -1 in case of args problem and
12216 * the parser error code otherwise
12217 */
12218
Daniel Veillard7d515752003-09-26 19:12:37 +000012219static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012220xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12221 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012222 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012223 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012224 xmlParserCtxtPtr ctxt;
12225 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012226 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012227 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012228 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012229 xmlChar start[4];
12230 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012231
Daniel Veillard0161e632008-08-28 15:36:32 +000012232 if (((depth > 40) &&
12233 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12234 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012235 return(XML_ERR_ENTITY_LOOP);
12236 }
12237
Owen Taylor3473f882001-02-23 17:55:21 +000012238 if (list != NULL)
12239 *list = NULL;
12240 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012241 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012242 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012243 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012244
12245
12246 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000012247 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012248 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012249 if (oldctxt != NULL) {
12250 ctxt->_private = oldctxt->_private;
12251 ctxt->loadsubset = oldctxt->loadsubset;
12252 ctxt->validate = oldctxt->validate;
12253 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012254 ctxt->record_info = oldctxt->record_info;
12255 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12256 ctxt->node_seq.length = oldctxt->node_seq.length;
12257 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012258 } else {
12259 /*
12260 * Doing validity checking on chunk without context
12261 * doesn't make sense
12262 */
12263 ctxt->_private = NULL;
12264 ctxt->validate = 0;
12265 ctxt->external = 2;
12266 ctxt->loadsubset = 0;
12267 }
Owen Taylor3473f882001-02-23 17:55:21 +000012268 if (sax != NULL) {
12269 oldsax = ctxt->sax;
12270 ctxt->sax = sax;
12271 if (user_data != NULL)
12272 ctxt->userData = user_data;
12273 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012274 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012275 newDoc = xmlNewDoc(BAD_CAST "1.0");
12276 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012277 ctxt->node_seq.maximum = 0;
12278 ctxt->node_seq.length = 0;
12279 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012280 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012281 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012282 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012283 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012284 newDoc->intSubset = doc->intSubset;
12285 newDoc->extSubset = doc->extSubset;
12286 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012287 xmlDictReference(newDoc->dict);
12288
Owen Taylor3473f882001-02-23 17:55:21 +000012289 if (doc->URL != NULL) {
12290 newDoc->URL = xmlStrdup(doc->URL);
12291 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012292 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12293 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012294 if (sax != NULL)
12295 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012296 ctxt->node_seq.maximum = 0;
12297 ctxt->node_seq.length = 0;
12298 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012299 xmlFreeParserCtxt(ctxt);
12300 newDoc->intSubset = NULL;
12301 newDoc->extSubset = NULL;
12302 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012303 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012304 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012305 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012306 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012307 ctxt->myDoc = doc;
12308 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012309
Daniel Veillard0161e632008-08-28 15:36:32 +000012310 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012311 * Get the 4 first bytes and decode the charset
12312 * if enc != XML_CHAR_ENCODING_NONE
12313 * plug some encoding conversion routines.
12314 */
12315 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012316 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12317 start[0] = RAW;
12318 start[1] = NXT(1);
12319 start[2] = NXT(2);
12320 start[3] = NXT(3);
12321 enc = xmlDetectCharEncoding(start, 4);
12322 if (enc != XML_CHAR_ENCODING_NONE) {
12323 xmlSwitchEncoding(ctxt, enc);
12324 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012325 }
12326
Owen Taylor3473f882001-02-23 17:55:21 +000012327 /*
12328 * Parse a possible text declaration first
12329 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012330 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012331 xmlParseTextDecl(ctxt);
12332 }
12333
Owen Taylor3473f882001-02-23 17:55:21 +000012334 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012335 ctxt->depth = depth;
12336
12337 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012338
Daniel Veillard561b7f82002-03-20 21:55:57 +000012339 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012340 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012341 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012342 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012343 }
12344 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012345 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012346 }
12347
12348 if (!ctxt->wellFormed) {
12349 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012350 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012351 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012352 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012353 } else {
12354 if (list != NULL) {
12355 xmlNodePtr cur;
12356
12357 /*
12358 * Return the newly created nodeset after unlinking it from
12359 * they pseudo parent.
12360 */
12361 cur = newDoc->children->children;
12362 *list = cur;
12363 while (cur != NULL) {
12364 cur->parent = NULL;
12365 cur = cur->next;
12366 }
12367 newDoc->children->children = NULL;
12368 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012369 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012370 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012371
12372 /*
12373 * Record in the parent context the number of entities replacement
12374 * done when parsing that reference.
12375 */
12376 oldctxt->nbentities += ctxt->nbentities;
12377 /*
12378 * Also record the size of the entity parsed
12379 */
12380 if (ctxt->input != NULL) {
12381 oldctxt->sizeentities += ctxt->input->consumed;
12382 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12383 }
12384 /*
12385 * And record the last error if any
12386 */
12387 if (ctxt->lastError.code != XML_ERR_OK)
12388 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12389
Owen Taylor3473f882001-02-23 17:55:21 +000012390 if (sax != NULL)
12391 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012392 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12393 oldctxt->node_seq.length = ctxt->node_seq.length;
12394 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012395 ctxt->node_seq.maximum = 0;
12396 ctxt->node_seq.length = 0;
12397 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012398 xmlFreeParserCtxt(ctxt);
12399 newDoc->intSubset = NULL;
12400 newDoc->extSubset = NULL;
12401 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012402
Owen Taylor3473f882001-02-23 17:55:21 +000012403 return(ret);
12404}
12405
Daniel Veillard81273902003-09-30 00:43:48 +000012406#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012407/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012408 * xmlParseExternalEntity:
12409 * @doc: the document the chunk pertains to
12410 * @sax: the SAX handler bloc (possibly NULL)
12411 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12412 * @depth: Used for loop detection, use 0
12413 * @URL: the URL for the entity to load
12414 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012415 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012416 *
12417 * Parse an external general entity
12418 * An external general parsed entity is well-formed if it matches the
12419 * production labeled extParsedEnt.
12420 *
12421 * [78] extParsedEnt ::= TextDecl? content
12422 *
12423 * Returns 0 if the entity is well formed, -1 in case of args problem and
12424 * the parser error code otherwise
12425 */
12426
12427int
12428xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012429 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012430 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012431 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012432}
12433
12434/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012435 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012436 * @doc: the document the chunk pertains to
12437 * @sax: the SAX handler bloc (possibly NULL)
12438 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12439 * @depth: Used for loop detection, use 0
12440 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012441 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012442 *
12443 * Parse a well-balanced chunk of an XML document
12444 * called by the parser
12445 * The allowed sequence for the Well Balanced Chunk is the one defined by
12446 * the content production in the XML grammar:
12447 *
12448 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12449 *
12450 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12451 * the parser error code otherwise
12452 */
12453
12454int
12455xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012456 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012457 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12458 depth, string, lst, 0 );
12459}
Daniel Veillard81273902003-09-30 00:43:48 +000012460#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012461
12462/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012463 * xmlParseBalancedChunkMemoryInternal:
12464 * @oldctxt: the existing parsing context
12465 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12466 * @user_data: the user data field for the parser context
12467 * @lst: the return value for the set of parsed nodes
12468 *
12469 *
12470 * Parse a well-balanced chunk of an XML document
12471 * called by the parser
12472 * The allowed sequence for the Well Balanced Chunk is the one defined by
12473 * the content production in the XML grammar:
12474 *
12475 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12476 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012477 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12478 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012479 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012480 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012481 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012482 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012483static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012484xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12485 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12486 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012487 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012488 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012489 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012490 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012491 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012492 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012493 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012494
Daniel Veillard0161e632008-08-28 15:36:32 +000012495 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12496 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012497 return(XML_ERR_ENTITY_LOOP);
12498 }
12499
12500
12501 if (lst != NULL)
12502 *lst = NULL;
12503 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012504 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012505
12506 size = xmlStrlen(string);
12507
12508 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012509 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012510 if (user_data != NULL)
12511 ctxt->userData = user_data;
12512 else
12513 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012514 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12515 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012516 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12517 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12518 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012519
12520 oldsax = ctxt->sax;
12521 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012522 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012523 ctxt->replaceEntities = oldctxt->replaceEntities;
12524 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012525
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012526 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012527 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012528 newDoc = xmlNewDoc(BAD_CAST "1.0");
12529 if (newDoc == NULL) {
12530 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012531 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012532 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012533 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012534 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012535 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012536 newDoc->dict = ctxt->dict;
12537 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012538 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012539 } else {
12540 ctxt->myDoc = oldctxt->myDoc;
12541 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012542 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012543 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012544 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12545 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012546 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012547 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012548 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012549 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012550 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012551 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012552 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012553 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012554 ctxt->myDoc->children = NULL;
12555 ctxt->myDoc->last = NULL;
12556 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012557 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012558 ctxt->instate = XML_PARSER_CONTENT;
12559 ctxt->depth = oldctxt->depth + 1;
12560
Daniel Veillard328f48c2002-11-15 15:24:34 +000012561 ctxt->validate = 0;
12562 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012563 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12564 /*
12565 * ID/IDREF registration will be done in xmlValidateElement below
12566 */
12567 ctxt->loadsubset |= XML_SKIP_IDS;
12568 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012569 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012570 ctxt->attsDefault = oldctxt->attsDefault;
12571 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012572
Daniel Veillard68e9e742002-11-16 15:35:11 +000012573 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012574 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012575 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012576 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012577 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012578 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012579 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012580 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012581 }
12582
12583 if (!ctxt->wellFormed) {
12584 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012585 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012586 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012587 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012588 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012589 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012590 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012591
William M. Brack7b9154b2003-09-27 19:23:50 +000012592 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012593 xmlNodePtr cur;
12594
12595 /*
12596 * Return the newly created nodeset after unlinking it from
12597 * they pseudo parent.
12598 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012599 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012600 *lst = cur;
12601 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012602#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012603 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12604 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12605 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012606 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12607 oldctxt->myDoc, cur);
12608 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012609#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012610 cur->parent = NULL;
12611 cur = cur->next;
12612 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012613 ctxt->myDoc->children->children = NULL;
12614 }
12615 if (ctxt->myDoc != NULL) {
12616 xmlFreeNode(ctxt->myDoc->children);
12617 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012618 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012619 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012620
12621 /*
12622 * Record in the parent context the number of entities replacement
12623 * done when parsing that reference.
12624 */
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012625 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard0161e632008-08-28 15:36:32 +000012626 /*
12627 * Also record the last error if any
12628 */
12629 if (ctxt->lastError.code != XML_ERR_OK)
12630 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12631
Daniel Veillard328f48c2002-11-15 15:24:34 +000012632 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012633 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012634 ctxt->attsDefault = NULL;
12635 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012636 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012637 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012638 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012639 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012640
Daniel Veillard328f48c2002-11-15 15:24:34 +000012641 return(ret);
12642}
12643
Daniel Veillard29b17482004-08-16 00:39:03 +000012644/**
12645 * xmlParseInNodeContext:
12646 * @node: the context node
12647 * @data: the input string
12648 * @datalen: the input string length in bytes
12649 * @options: a combination of xmlParserOption
12650 * @lst: the return value for the set of parsed nodes
12651 *
12652 * Parse a well-balanced chunk of an XML document
12653 * within the context (DTD, namespaces, etc ...) of the given node.
12654 *
12655 * The allowed sequence for the data is a Well Balanced Chunk defined by
12656 * the content production in the XML grammar:
12657 *
12658 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12659 *
12660 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12661 * error code otherwise
12662 */
12663xmlParserErrors
12664xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12665 int options, xmlNodePtr *lst) {
12666#ifdef SAX2
12667 xmlParserCtxtPtr ctxt;
12668 xmlDocPtr doc = NULL;
12669 xmlNodePtr fake, cur;
12670 int nsnr = 0;
12671
12672 xmlParserErrors ret = XML_ERR_OK;
12673
12674 /*
12675 * check all input parameters, grab the document
12676 */
12677 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12678 return(XML_ERR_INTERNAL_ERROR);
12679 switch (node->type) {
12680 case XML_ELEMENT_NODE:
12681 case XML_ATTRIBUTE_NODE:
12682 case XML_TEXT_NODE:
12683 case XML_CDATA_SECTION_NODE:
12684 case XML_ENTITY_REF_NODE:
12685 case XML_PI_NODE:
12686 case XML_COMMENT_NODE:
12687 case XML_DOCUMENT_NODE:
12688 case XML_HTML_DOCUMENT_NODE:
12689 break;
12690 default:
12691 return(XML_ERR_INTERNAL_ERROR);
12692
12693 }
12694 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12695 (node->type != XML_DOCUMENT_NODE) &&
12696 (node->type != XML_HTML_DOCUMENT_NODE))
12697 node = node->parent;
12698 if (node == NULL)
12699 return(XML_ERR_INTERNAL_ERROR);
12700 if (node->type == XML_ELEMENT_NODE)
12701 doc = node->doc;
12702 else
12703 doc = (xmlDocPtr) node;
12704 if (doc == NULL)
12705 return(XML_ERR_INTERNAL_ERROR);
12706
12707 /*
12708 * allocate a context and set-up everything not related to the
12709 * node position in the tree
12710 */
12711 if (doc->type == XML_DOCUMENT_NODE)
12712 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12713#ifdef LIBXML_HTML_ENABLED
12714 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12715 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12716#endif
12717 else
12718 return(XML_ERR_INTERNAL_ERROR);
12719
12720 if (ctxt == NULL)
12721 return(XML_ERR_NO_MEMORY);
12722 fake = xmlNewComment(NULL);
12723 if (fake == NULL) {
12724 xmlFreeParserCtxt(ctxt);
12725 return(XML_ERR_NO_MEMORY);
12726 }
12727 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012728
12729 /*
12730 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12731 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12732 * we must wait until the last moment to free the original one.
12733 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012734 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012735 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012736 xmlDictFree(ctxt->dict);
12737 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012738 } else
12739 options |= XML_PARSE_NODICT;
12740
Daniel Veillard37334572008-07-31 08:20:02 +000012741 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012742 xmlDetectSAX2(ctxt);
12743 ctxt->myDoc = doc;
12744
12745 if (node->type == XML_ELEMENT_NODE) {
12746 nodePush(ctxt, node);
12747 /*
12748 * initialize the SAX2 namespaces stack
12749 */
12750 cur = node;
12751 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12752 xmlNsPtr ns = cur->nsDef;
12753 const xmlChar *iprefix, *ihref;
12754
12755 while (ns != NULL) {
12756 if (ctxt->dict) {
12757 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12758 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12759 } else {
12760 iprefix = ns->prefix;
12761 ihref = ns->href;
12762 }
12763
12764 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12765 nsPush(ctxt, iprefix, ihref);
12766 nsnr++;
12767 }
12768 ns = ns->next;
12769 }
12770 cur = cur->parent;
12771 }
12772 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012773 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012774
12775 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12776 /*
12777 * ID/IDREF registration will be done in xmlValidateElement below
12778 */
12779 ctxt->loadsubset |= XML_SKIP_IDS;
12780 }
12781
Daniel Veillard499cc922006-01-18 17:22:35 +000012782#ifdef LIBXML_HTML_ENABLED
12783 if (doc->type == XML_HTML_DOCUMENT_NODE)
12784 __htmlParseContent(ctxt);
12785 else
12786#endif
12787 xmlParseContent(ctxt);
12788
Daniel Veillard29b17482004-08-16 00:39:03 +000012789 nsPop(ctxt, nsnr);
12790 if ((RAW == '<') && (NXT(1) == '/')) {
12791 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12792 } else if (RAW != 0) {
12793 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12794 }
12795 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12796 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12797 ctxt->wellFormed = 0;
12798 }
12799
12800 if (!ctxt->wellFormed) {
12801 if (ctxt->errNo == 0)
12802 ret = XML_ERR_INTERNAL_ERROR;
12803 else
12804 ret = (xmlParserErrors)ctxt->errNo;
12805 } else {
12806 ret = XML_ERR_OK;
12807 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012808
Daniel Veillard29b17482004-08-16 00:39:03 +000012809 /*
12810 * Return the newly created nodeset after unlinking it from
12811 * the pseudo sibling.
12812 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012813
Daniel Veillard29b17482004-08-16 00:39:03 +000012814 cur = fake->next;
12815 fake->next = NULL;
12816 node->last = fake;
12817
12818 if (cur != NULL) {
12819 cur->prev = NULL;
12820 }
12821
12822 *lst = cur;
12823
12824 while (cur != NULL) {
12825 cur->parent = NULL;
12826 cur = cur->next;
12827 }
12828
12829 xmlUnlinkNode(fake);
12830 xmlFreeNode(fake);
12831
12832
12833 if (ret != XML_ERR_OK) {
12834 xmlFreeNodeList(*lst);
12835 *lst = NULL;
12836 }
William M. Brackc3f81342004-10-03 01:22:44 +000012837
William M. Brackb7b54de2004-10-06 16:38:01 +000012838 if (doc->dict != NULL)
12839 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012840 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012841
Daniel Veillard29b17482004-08-16 00:39:03 +000012842 return(ret);
12843#else /* !SAX2 */
12844 return(XML_ERR_INTERNAL_ERROR);
12845#endif
12846}
12847
Daniel Veillard81273902003-09-30 00:43:48 +000012848#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012849/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012850 * xmlParseBalancedChunkMemoryRecover:
12851 * @doc: the document the chunk pertains to
12852 * @sax: the SAX handler bloc (possibly NULL)
12853 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12854 * @depth: Used for loop detection, use 0
12855 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12856 * @lst: the return value for the set of parsed nodes
12857 * @recover: return nodes even if the data is broken (use 0)
12858 *
12859 *
12860 * Parse a well-balanced chunk of an XML document
12861 * called by the parser
12862 * The allowed sequence for the Well Balanced Chunk is the one defined by
12863 * the content production in the XML grammar:
12864 *
12865 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12866 *
12867 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12868 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012869 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012870 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012871 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12872 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012873 */
12874int
12875xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012876 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012877 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012878 xmlParserCtxtPtr ctxt;
12879 xmlDocPtr newDoc;
12880 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012881 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012882 int size;
12883 int ret = 0;
12884
Daniel Veillard0161e632008-08-28 15:36:32 +000012885 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000012886 return(XML_ERR_ENTITY_LOOP);
12887 }
12888
12889
Daniel Veillardcda96922001-08-21 10:56:31 +000012890 if (lst != NULL)
12891 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012892 if (string == NULL)
12893 return(-1);
12894
12895 size = xmlStrlen(string);
12896
12897 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12898 if (ctxt == NULL) return(-1);
12899 ctxt->userData = ctxt;
12900 if (sax != NULL) {
12901 oldsax = ctxt->sax;
12902 ctxt->sax = sax;
12903 if (user_data != NULL)
12904 ctxt->userData = user_data;
12905 }
12906 newDoc = xmlNewDoc(BAD_CAST "1.0");
12907 if (newDoc == NULL) {
12908 xmlFreeParserCtxt(ctxt);
12909 return(-1);
12910 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012911 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012912 if ((doc != NULL) && (doc->dict != NULL)) {
12913 xmlDictFree(ctxt->dict);
12914 ctxt->dict = doc->dict;
12915 xmlDictReference(ctxt->dict);
12916 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12917 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12918 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12919 ctxt->dictNames = 1;
12920 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012921 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012922 }
Owen Taylor3473f882001-02-23 17:55:21 +000012923 if (doc != NULL) {
12924 newDoc->intSubset = doc->intSubset;
12925 newDoc->extSubset = doc->extSubset;
12926 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012927 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12928 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012929 if (sax != NULL)
12930 ctxt->sax = oldsax;
12931 xmlFreeParserCtxt(ctxt);
12932 newDoc->intSubset = NULL;
12933 newDoc->extSubset = NULL;
12934 xmlFreeDoc(newDoc);
12935 return(-1);
12936 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012937 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12938 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012939 if (doc == NULL) {
12940 ctxt->myDoc = newDoc;
12941 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012942 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012943 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012944 /* Ensure that doc has XML spec namespace */
12945 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12946 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012947 }
12948 ctxt->instate = XML_PARSER_CONTENT;
12949 ctxt->depth = depth;
12950
12951 /*
12952 * Doing validity checking on chunk doesn't make sense
12953 */
12954 ctxt->validate = 0;
12955 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012956 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012957
Daniel Veillardb39bc392002-10-26 19:29:51 +000012958 if ( doc != NULL ){
12959 content = doc->children;
12960 doc->children = NULL;
12961 xmlParseContent(ctxt);
12962 doc->children = content;
12963 }
12964 else {
12965 xmlParseContent(ctxt);
12966 }
Owen Taylor3473f882001-02-23 17:55:21 +000012967 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012968 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012969 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012970 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012971 }
12972 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012973 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012974 }
12975
12976 if (!ctxt->wellFormed) {
12977 if (ctxt->errNo == 0)
12978 ret = 1;
12979 else
12980 ret = ctxt->errNo;
12981 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012982 ret = 0;
12983 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012984
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012985 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12986 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012987
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012988 /*
12989 * Return the newly created nodeset after unlinking it from
12990 * they pseudo parent.
12991 */
12992 cur = newDoc->children->children;
12993 *lst = cur;
12994 while (cur != NULL) {
12995 xmlSetTreeDoc(cur, doc);
12996 cur->parent = NULL;
12997 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012998 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012999 newDoc->children->children = NULL;
13000 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013001
13002 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013003 ctxt->sax = oldsax;
13004 xmlFreeParserCtxt(ctxt);
13005 newDoc->intSubset = NULL;
13006 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013007 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013008 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013009
Owen Taylor3473f882001-02-23 17:55:21 +000013010 return(ret);
13011}
13012
13013/**
13014 * xmlSAXParseEntity:
13015 * @sax: the SAX handler block
13016 * @filename: the filename
13017 *
13018 * parse an XML external entity out of context and build a tree.
13019 * It use the given SAX function block to handle the parsing callback.
13020 * If sax is NULL, fallback to the default DOM tree building routines.
13021 *
13022 * [78] extParsedEnt ::= TextDecl? content
13023 *
13024 * This correspond to a "Well Balanced" chunk
13025 *
13026 * Returns the resulting document tree
13027 */
13028
13029xmlDocPtr
13030xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13031 xmlDocPtr ret;
13032 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013033
13034 ctxt = xmlCreateFileParserCtxt(filename);
13035 if (ctxt == NULL) {
13036 return(NULL);
13037 }
13038 if (sax != NULL) {
13039 if (ctxt->sax != NULL)
13040 xmlFree(ctxt->sax);
13041 ctxt->sax = sax;
13042 ctxt->userData = NULL;
13043 }
13044
Owen Taylor3473f882001-02-23 17:55:21 +000013045 xmlParseExtParsedEnt(ctxt);
13046
13047 if (ctxt->wellFormed)
13048 ret = ctxt->myDoc;
13049 else {
13050 ret = NULL;
13051 xmlFreeDoc(ctxt->myDoc);
13052 ctxt->myDoc = NULL;
13053 }
13054 if (sax != NULL)
13055 ctxt->sax = NULL;
13056 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013057
Owen Taylor3473f882001-02-23 17:55:21 +000013058 return(ret);
13059}
13060
13061/**
13062 * xmlParseEntity:
13063 * @filename: the filename
13064 *
13065 * parse an XML external entity out of context and build a tree.
13066 *
13067 * [78] extParsedEnt ::= TextDecl? content
13068 *
13069 * This correspond to a "Well Balanced" chunk
13070 *
13071 * Returns the resulting document tree
13072 */
13073
13074xmlDocPtr
13075xmlParseEntity(const char *filename) {
13076 return(xmlSAXParseEntity(NULL, filename));
13077}
Daniel Veillard81273902003-09-30 00:43:48 +000013078#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013079
13080/**
13081 * xmlCreateEntityParserCtxt:
13082 * @URL: the entity URL
13083 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013084 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000013085 *
13086 * Create a parser context for an external entity
13087 * Automatic support for ZLIB/Compress compressed document is provided
13088 * by default if found at compile-time.
13089 *
13090 * Returns the new parser context or NULL
13091 */
13092xmlParserCtxtPtr
13093xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13094 const xmlChar *base) {
13095 xmlParserCtxtPtr ctxt;
13096 xmlParserInputPtr inputStream;
13097 char *directory = NULL;
13098 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013099
Owen Taylor3473f882001-02-23 17:55:21 +000013100 ctxt = xmlNewParserCtxt();
13101 if (ctxt == NULL) {
13102 return(NULL);
13103 }
13104
13105 uri = xmlBuildURI(URL, base);
13106
13107 if (uri == NULL) {
13108 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13109 if (inputStream == NULL) {
13110 xmlFreeParserCtxt(ctxt);
13111 return(NULL);
13112 }
13113
13114 inputPush(ctxt, inputStream);
13115
13116 if ((ctxt->directory == NULL) && (directory == NULL))
13117 directory = xmlParserGetDirectory((char *)URL);
13118 if ((ctxt->directory == NULL) && (directory != NULL))
13119 ctxt->directory = directory;
13120 } else {
13121 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13122 if (inputStream == NULL) {
13123 xmlFree(uri);
13124 xmlFreeParserCtxt(ctxt);
13125 return(NULL);
13126 }
13127
13128 inputPush(ctxt, inputStream);
13129
13130 if ((ctxt->directory == NULL) && (directory == NULL))
13131 directory = xmlParserGetDirectory((char *)uri);
13132 if ((ctxt->directory == NULL) && (directory != NULL))
13133 ctxt->directory = directory;
13134 xmlFree(uri);
13135 }
Owen Taylor3473f882001-02-23 17:55:21 +000013136 return(ctxt);
13137}
13138
13139/************************************************************************
13140 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013141 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013142 * *
13143 ************************************************************************/
13144
13145/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013146 * xmlCreateURLParserCtxt:
13147 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013148 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013149 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013150 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013151 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013152 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013153 *
13154 * Returns the new parser context or NULL
13155 */
13156xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013157xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013158{
13159 xmlParserCtxtPtr ctxt;
13160 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013161 char *directory = NULL;
13162
Owen Taylor3473f882001-02-23 17:55:21 +000013163 ctxt = xmlNewParserCtxt();
13164 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013165 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013166 return(NULL);
13167 }
13168
Daniel Veillarddf292f72005-01-16 19:00:15 +000013169 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013170 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013171 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013172
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013173 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013174 if (inputStream == NULL) {
13175 xmlFreeParserCtxt(ctxt);
13176 return(NULL);
13177 }
13178
Owen Taylor3473f882001-02-23 17:55:21 +000013179 inputPush(ctxt, inputStream);
13180 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013181 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013182 if ((ctxt->directory == NULL) && (directory != NULL))
13183 ctxt->directory = directory;
13184
13185 return(ctxt);
13186}
13187
Daniel Veillard61b93382003-11-03 14:28:31 +000013188/**
13189 * xmlCreateFileParserCtxt:
13190 * @filename: the filename
13191 *
13192 * Create a parser context for a file content.
13193 * Automatic support for ZLIB/Compress compressed document is provided
13194 * by default if found at compile-time.
13195 *
13196 * Returns the new parser context or NULL
13197 */
13198xmlParserCtxtPtr
13199xmlCreateFileParserCtxt(const char *filename)
13200{
13201 return(xmlCreateURLParserCtxt(filename, 0));
13202}
13203
Daniel Veillard81273902003-09-30 00:43:48 +000013204#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013205/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013206 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013207 * @sax: the SAX handler block
13208 * @filename: the filename
13209 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13210 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013211 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013212 *
13213 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13214 * compressed document is provided by default if found at compile-time.
13215 * It use the given SAX function block to handle the parsing callback.
13216 * If sax is NULL, fallback to the default DOM tree building routines.
13217 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013218 * User data (void *) is stored within the parser context in the
13219 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013220 *
Owen Taylor3473f882001-02-23 17:55:21 +000013221 * Returns the resulting document tree
13222 */
13223
13224xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013225xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13226 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013227 xmlDocPtr ret;
13228 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013229
Daniel Veillard635ef722001-10-29 11:48:19 +000013230 xmlInitParser();
13231
Owen Taylor3473f882001-02-23 17:55:21 +000013232 ctxt = xmlCreateFileParserCtxt(filename);
13233 if (ctxt == NULL) {
13234 return(NULL);
13235 }
13236 if (sax != NULL) {
13237 if (ctxt->sax != NULL)
13238 xmlFree(ctxt->sax);
13239 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013240 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013241 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013242 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013243 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013244 }
Owen Taylor3473f882001-02-23 17:55:21 +000013245
Daniel Veillard37d2d162008-03-14 10:54:00 +000013246 if (ctxt->directory == NULL)
13247 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013248
Daniel Veillarddad3f682002-11-17 16:47:27 +000013249 ctxt->recovery = recovery;
13250
Owen Taylor3473f882001-02-23 17:55:21 +000013251 xmlParseDocument(ctxt);
13252
William M. Brackc07329e2003-09-08 01:57:30 +000013253 if ((ctxt->wellFormed) || recovery) {
13254 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013255 if (ret != NULL) {
13256 if (ctxt->input->buf->compressed > 0)
13257 ret->compression = 9;
13258 else
13259 ret->compression = ctxt->input->buf->compressed;
13260 }
William M. Brackc07329e2003-09-08 01:57:30 +000013261 }
Owen Taylor3473f882001-02-23 17:55:21 +000013262 else {
13263 ret = NULL;
13264 xmlFreeDoc(ctxt->myDoc);
13265 ctxt->myDoc = NULL;
13266 }
13267 if (sax != NULL)
13268 ctxt->sax = NULL;
13269 xmlFreeParserCtxt(ctxt);
13270
13271 return(ret);
13272}
13273
13274/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013275 * xmlSAXParseFile:
13276 * @sax: the SAX handler block
13277 * @filename: the filename
13278 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13279 * documents
13280 *
13281 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13282 * compressed document is provided by default if found at compile-time.
13283 * It use the given SAX function block to handle the parsing callback.
13284 * If sax is NULL, fallback to the default DOM tree building routines.
13285 *
13286 * Returns the resulting document tree
13287 */
13288
13289xmlDocPtr
13290xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13291 int recovery) {
13292 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13293}
13294
13295/**
Owen Taylor3473f882001-02-23 17:55:21 +000013296 * xmlRecoverDoc:
13297 * @cur: a pointer to an array of xmlChar
13298 *
13299 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013300 * In the case the document is not Well Formed, a attempt to build a
13301 * tree is tried anyway
13302 *
13303 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013304 */
13305
13306xmlDocPtr
13307xmlRecoverDoc(xmlChar *cur) {
13308 return(xmlSAXParseDoc(NULL, cur, 1));
13309}
13310
13311/**
13312 * xmlParseFile:
13313 * @filename: the filename
13314 *
13315 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13316 * compressed document is provided by default if found at compile-time.
13317 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013318 * Returns the resulting document tree if the file was wellformed,
13319 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013320 */
13321
13322xmlDocPtr
13323xmlParseFile(const char *filename) {
13324 return(xmlSAXParseFile(NULL, filename, 0));
13325}
13326
13327/**
13328 * xmlRecoverFile:
13329 * @filename: the filename
13330 *
13331 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13332 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013333 * In the case the document is not Well Formed, it attempts to build
13334 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013335 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013336 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013337 */
13338
13339xmlDocPtr
13340xmlRecoverFile(const char *filename) {
13341 return(xmlSAXParseFile(NULL, filename, 1));
13342}
13343
13344
13345/**
13346 * xmlSetupParserForBuffer:
13347 * @ctxt: an XML parser context
13348 * @buffer: a xmlChar * buffer
13349 * @filename: a file name
13350 *
13351 * Setup the parser context to parse a new buffer; Clears any prior
13352 * contents from the parser context. The buffer parameter must not be
13353 * NULL, but the filename parameter can be
13354 */
13355void
13356xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13357 const char* filename)
13358{
13359 xmlParserInputPtr input;
13360
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013361 if ((ctxt == NULL) || (buffer == NULL))
13362 return;
13363
Owen Taylor3473f882001-02-23 17:55:21 +000013364 input = xmlNewInputStream(ctxt);
13365 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013366 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013367 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013368 return;
13369 }
13370
13371 xmlClearParserCtxt(ctxt);
13372 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013373 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013374 input->base = buffer;
13375 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013376 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013377 inputPush(ctxt, input);
13378}
13379
13380/**
13381 * xmlSAXUserParseFile:
13382 * @sax: a SAX handler
13383 * @user_data: The user data returned on SAX callbacks
13384 * @filename: a file name
13385 *
13386 * parse an XML file and call the given SAX handler routines.
13387 * Automatic support for ZLIB/Compress compressed document is provided
13388 *
13389 * Returns 0 in case of success or a error number otherwise
13390 */
13391int
13392xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13393 const char *filename) {
13394 int ret = 0;
13395 xmlParserCtxtPtr ctxt;
13396
13397 ctxt = xmlCreateFileParserCtxt(filename);
13398 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013399 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013400 xmlFree(ctxt->sax);
13401 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013402 xmlDetectSAX2(ctxt);
13403
Owen Taylor3473f882001-02-23 17:55:21 +000013404 if (user_data != NULL)
13405 ctxt->userData = user_data;
13406
13407 xmlParseDocument(ctxt);
13408
13409 if (ctxt->wellFormed)
13410 ret = 0;
13411 else {
13412 if (ctxt->errNo != 0)
13413 ret = ctxt->errNo;
13414 else
13415 ret = -1;
13416 }
13417 if (sax != NULL)
13418 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013419 if (ctxt->myDoc != NULL) {
13420 xmlFreeDoc(ctxt->myDoc);
13421 ctxt->myDoc = NULL;
13422 }
Owen Taylor3473f882001-02-23 17:55:21 +000013423 xmlFreeParserCtxt(ctxt);
13424
13425 return ret;
13426}
Daniel Veillard81273902003-09-30 00:43:48 +000013427#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013428
13429/************************************************************************
13430 * *
13431 * Front ends when parsing from memory *
13432 * *
13433 ************************************************************************/
13434
13435/**
13436 * xmlCreateMemoryParserCtxt:
13437 * @buffer: a pointer to a char array
13438 * @size: the size of the array
13439 *
13440 * Create a parser context for an XML in-memory document.
13441 *
13442 * Returns the new parser context or NULL
13443 */
13444xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013445xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013446 xmlParserCtxtPtr ctxt;
13447 xmlParserInputPtr input;
13448 xmlParserInputBufferPtr buf;
13449
13450 if (buffer == NULL)
13451 return(NULL);
13452 if (size <= 0)
13453 return(NULL);
13454
13455 ctxt = xmlNewParserCtxt();
13456 if (ctxt == NULL)
13457 return(NULL);
13458
Daniel Veillard53350552003-09-18 13:35:51 +000013459 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013460 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013461 if (buf == NULL) {
13462 xmlFreeParserCtxt(ctxt);
13463 return(NULL);
13464 }
Owen Taylor3473f882001-02-23 17:55:21 +000013465
13466 input = xmlNewInputStream(ctxt);
13467 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013468 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013469 xmlFreeParserCtxt(ctxt);
13470 return(NULL);
13471 }
13472
13473 input->filename = NULL;
13474 input->buf = buf;
13475 input->base = input->buf->buffer->content;
13476 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013477 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013478
13479 inputPush(ctxt, input);
13480 return(ctxt);
13481}
13482
Daniel Veillard81273902003-09-30 00:43:48 +000013483#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013484/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013485 * xmlSAXParseMemoryWithData:
13486 * @sax: the SAX handler block
13487 * @buffer: an pointer to a char array
13488 * @size: the size of the array
13489 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13490 * documents
13491 * @data: the userdata
13492 *
13493 * parse an XML in-memory block and use the given SAX function block
13494 * to handle the parsing callback. If sax is NULL, fallback to the default
13495 * DOM tree building routines.
13496 *
13497 * User data (void *) is stored within the parser context in the
13498 * context's _private member, so it is available nearly everywhere in libxml
13499 *
13500 * Returns the resulting document tree
13501 */
13502
13503xmlDocPtr
13504xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13505 int size, int recovery, void *data) {
13506 xmlDocPtr ret;
13507 xmlParserCtxtPtr ctxt;
13508
13509 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13510 if (ctxt == NULL) return(NULL);
13511 if (sax != NULL) {
13512 if (ctxt->sax != NULL)
13513 xmlFree(ctxt->sax);
13514 ctxt->sax = sax;
13515 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013516 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013517 if (data!=NULL) {
13518 ctxt->_private=data;
13519 }
13520
Daniel Veillardadba5f12003-04-04 16:09:01 +000013521 ctxt->recovery = recovery;
13522
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013523 xmlParseDocument(ctxt);
13524
13525 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13526 else {
13527 ret = NULL;
13528 xmlFreeDoc(ctxt->myDoc);
13529 ctxt->myDoc = NULL;
13530 }
13531 if (sax != NULL)
13532 ctxt->sax = NULL;
13533 xmlFreeParserCtxt(ctxt);
13534
13535 return(ret);
13536}
13537
13538/**
Owen Taylor3473f882001-02-23 17:55:21 +000013539 * xmlSAXParseMemory:
13540 * @sax: the SAX handler block
13541 * @buffer: an pointer to a char array
13542 * @size: the size of the array
13543 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13544 * documents
13545 *
13546 * parse an XML in-memory block and use the given SAX function block
13547 * to handle the parsing callback. If sax is NULL, fallback to the default
13548 * DOM tree building routines.
13549 *
13550 * Returns the resulting document tree
13551 */
13552xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013553xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13554 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013555 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013556}
13557
13558/**
13559 * xmlParseMemory:
13560 * @buffer: an pointer to a char array
13561 * @size: the size of the array
13562 *
13563 * parse an XML in-memory block and build a tree.
13564 *
13565 * Returns the resulting document tree
13566 */
13567
Daniel Veillard50822cb2001-07-26 20:05:51 +000013568xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013569 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13570}
13571
13572/**
13573 * xmlRecoverMemory:
13574 * @buffer: an pointer to a char array
13575 * @size: the size of the array
13576 *
13577 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013578 * In the case the document is not Well Formed, an attempt to
13579 * build a tree is tried anyway
13580 *
13581 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013582 */
13583
Daniel Veillard50822cb2001-07-26 20:05:51 +000013584xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013585 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13586}
13587
13588/**
13589 * xmlSAXUserParseMemory:
13590 * @sax: a SAX handler
13591 * @user_data: The user data returned on SAX callbacks
13592 * @buffer: an in-memory XML document input
13593 * @size: the length of the XML document in bytes
13594 *
13595 * A better SAX parsing routine.
13596 * parse an XML in-memory buffer and call the given SAX handler routines.
13597 *
13598 * Returns 0 in case of success or a error number otherwise
13599 */
13600int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013601 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013602 int ret = 0;
13603 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013604
13605 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13606 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013607 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13608 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013609 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013610 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013611
Daniel Veillard30211a02001-04-26 09:33:18 +000013612 if (user_data != NULL)
13613 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013614
13615 xmlParseDocument(ctxt);
13616
13617 if (ctxt->wellFormed)
13618 ret = 0;
13619 else {
13620 if (ctxt->errNo != 0)
13621 ret = ctxt->errNo;
13622 else
13623 ret = -1;
13624 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013625 if (sax != NULL)
13626 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013627 if (ctxt->myDoc != NULL) {
13628 xmlFreeDoc(ctxt->myDoc);
13629 ctxt->myDoc = NULL;
13630 }
Owen Taylor3473f882001-02-23 17:55:21 +000013631 xmlFreeParserCtxt(ctxt);
13632
13633 return ret;
13634}
Daniel Veillard81273902003-09-30 00:43:48 +000013635#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013636
13637/**
13638 * xmlCreateDocParserCtxt:
13639 * @cur: a pointer to an array of xmlChar
13640 *
13641 * Creates a parser context for an XML in-memory document.
13642 *
13643 * Returns the new parser context or NULL
13644 */
13645xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013646xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013647 int len;
13648
13649 if (cur == NULL)
13650 return(NULL);
13651 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013652 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013653}
13654
Daniel Veillard81273902003-09-30 00:43:48 +000013655#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013656/**
13657 * xmlSAXParseDoc:
13658 * @sax: the SAX handler block
13659 * @cur: a pointer to an array of xmlChar
13660 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13661 * documents
13662 *
13663 * parse an XML in-memory document and build a tree.
13664 * It use the given SAX function block to handle the parsing callback.
13665 * If sax is NULL, fallback to the default DOM tree building routines.
13666 *
13667 * Returns the resulting document tree
13668 */
13669
13670xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013671xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013672 xmlDocPtr ret;
13673 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013674 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013675
Daniel Veillard38936062004-11-04 17:45:11 +000013676 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013677
13678
13679 ctxt = xmlCreateDocParserCtxt(cur);
13680 if (ctxt == NULL) return(NULL);
13681 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013682 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013683 ctxt->sax = sax;
13684 ctxt->userData = NULL;
13685 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013686 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013687
13688 xmlParseDocument(ctxt);
13689 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13690 else {
13691 ret = NULL;
13692 xmlFreeDoc(ctxt->myDoc);
13693 ctxt->myDoc = NULL;
13694 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013695 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013696 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013697 xmlFreeParserCtxt(ctxt);
13698
13699 return(ret);
13700}
13701
13702/**
13703 * xmlParseDoc:
13704 * @cur: a pointer to an array of xmlChar
13705 *
13706 * parse an XML in-memory document and build a tree.
13707 *
13708 * Returns the resulting document tree
13709 */
13710
13711xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013712xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013713 return(xmlSAXParseDoc(NULL, cur, 0));
13714}
Daniel Veillard81273902003-09-30 00:43:48 +000013715#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013716
Daniel Veillard81273902003-09-30 00:43:48 +000013717#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013718/************************************************************************
13719 * *
13720 * Specific function to keep track of entities references *
13721 * and used by the XSLT debugger *
13722 * *
13723 ************************************************************************/
13724
13725static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13726
13727/**
13728 * xmlAddEntityReference:
13729 * @ent : A valid entity
13730 * @firstNode : A valid first node for children of entity
13731 * @lastNode : A valid last node of children entity
13732 *
13733 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13734 */
13735static void
13736xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13737 xmlNodePtr lastNode)
13738{
13739 if (xmlEntityRefFunc != NULL) {
13740 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13741 }
13742}
13743
13744
13745/**
13746 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013747 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013748 *
13749 * Set the function to call call back when a xml reference has been made
13750 */
13751void
13752xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13753{
13754 xmlEntityRefFunc = func;
13755}
Daniel Veillard81273902003-09-30 00:43:48 +000013756#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013757
13758/************************************************************************
13759 * *
13760 * Miscellaneous *
13761 * *
13762 ************************************************************************/
13763
13764#ifdef LIBXML_XPATH_ENABLED
13765#include <libxml/xpath.h>
13766#endif
13767
Daniel Veillardffa3c742005-07-21 13:24:09 +000013768extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013769static int xmlParserInitialized = 0;
13770
13771/**
13772 * xmlInitParser:
13773 *
13774 * Initialization function for the XML parser.
13775 * This is not reentrant. Call once before processing in case of
13776 * use in multithreaded programs.
13777 */
13778
13779void
13780xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013781 if (xmlParserInitialized != 0)
13782 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013783
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013784#ifdef LIBXML_THREAD_ENABLED
13785 __xmlGlobalInitMutexLock();
13786 if (xmlParserInitialized == 0) {
13787#endif
13788 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13789 (xmlGenericError == NULL))
13790 initGenericErrorDefaultFunc(NULL);
13791 xmlInitGlobals();
13792 xmlInitThreads();
13793 xmlInitMemory();
13794 xmlInitCharEncodingHandlers();
13795 xmlDefaultSAXHandlerInit();
13796 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013797#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013798 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013799#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013800#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013801 htmlInitAutoClose();
13802 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013803#endif
13804#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013805 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013806#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013807 xmlParserInitialized = 1;
13808#ifdef LIBXML_THREAD_ENABLED
13809 }
13810 __xmlGlobalInitMutexUnlock();
13811#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013812}
13813
13814/**
13815 * xmlCleanupParser:
13816 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013817 * This function name is somewhat misleading. It does not clean up
13818 * parser state, it cleans up memory allocated by the library itself.
13819 * It is a cleanup function for the XML library. It tries to reclaim all
13820 * related global memory allocated for the library processing.
13821 * It doesn't deallocate any document related memory. One should
13822 * call xmlCleanupParser() only when the process has finished using
13823 * the library and all XML/HTML documents built with it.
13824 * See also xmlInitParser() which has the opposite function of preparing
13825 * the library for operations.
Owen Taylor3473f882001-02-23 17:55:21 +000013826 */
13827
13828void
13829xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013830 if (!xmlParserInitialized)
13831 return;
13832
Owen Taylor3473f882001-02-23 17:55:21 +000013833 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013834#ifdef LIBXML_CATALOG_ENABLED
13835 xmlCatalogCleanup();
13836#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013837 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013838 xmlCleanupInputCallbacks();
13839#ifdef LIBXML_OUTPUT_ENABLED
13840 xmlCleanupOutputCallbacks();
13841#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013842#ifdef LIBXML_SCHEMAS_ENABLED
13843 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013844 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013845#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013846 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013847 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013848 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013849 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013850 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013851}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013852
13853/************************************************************************
13854 * *
13855 * New set (2.6.0) of simpler and more flexible APIs *
13856 * *
13857 ************************************************************************/
13858
13859/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013860 * DICT_FREE:
13861 * @str: a string
13862 *
13863 * Free a string if it is not owned by the "dict" dictionnary in the
13864 * current scope
13865 */
13866#define DICT_FREE(str) \
13867 if ((str) && ((!dict) || \
13868 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13869 xmlFree((char *)(str));
13870
13871/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013872 * xmlCtxtReset:
13873 * @ctxt: an XML parser context
13874 *
13875 * Reset a parser context
13876 */
13877void
13878xmlCtxtReset(xmlParserCtxtPtr ctxt)
13879{
13880 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013881 xmlDictPtr dict;
13882
13883 if (ctxt == NULL)
13884 return;
13885
13886 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013887
13888 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13889 xmlFreeInputStream(input);
13890 }
13891 ctxt->inputNr = 0;
13892 ctxt->input = NULL;
13893
13894 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013895 if (ctxt->spaceTab != NULL) {
13896 ctxt->spaceTab[0] = -1;
13897 ctxt->space = &ctxt->spaceTab[0];
13898 } else {
13899 ctxt->space = NULL;
13900 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013901
13902
13903 ctxt->nodeNr = 0;
13904 ctxt->node = NULL;
13905
13906 ctxt->nameNr = 0;
13907 ctxt->name = NULL;
13908
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013909 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013910 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013911 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013912 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013913 DICT_FREE(ctxt->directory);
13914 ctxt->directory = NULL;
13915 DICT_FREE(ctxt->extSubURI);
13916 ctxt->extSubURI = NULL;
13917 DICT_FREE(ctxt->extSubSystem);
13918 ctxt->extSubSystem = NULL;
13919 if (ctxt->myDoc != NULL)
13920 xmlFreeDoc(ctxt->myDoc);
13921 ctxt->myDoc = NULL;
13922
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013923 ctxt->standalone = -1;
13924 ctxt->hasExternalSubset = 0;
13925 ctxt->hasPErefs = 0;
13926 ctxt->html = 0;
13927 ctxt->external = 0;
13928 ctxt->instate = XML_PARSER_START;
13929 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013930
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013931 ctxt->wellFormed = 1;
13932 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013933 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013934 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013935#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013936 ctxt->vctxt.userData = ctxt;
13937 ctxt->vctxt.error = xmlParserValidityError;
13938 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013939#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013940 ctxt->record_info = 0;
13941 ctxt->nbChars = 0;
13942 ctxt->checkIndex = 0;
13943 ctxt->inSubset = 0;
13944 ctxt->errNo = XML_ERR_OK;
13945 ctxt->depth = 0;
13946 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13947 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000013948 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000013949 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013950 xmlInitNodeInfoSeq(&ctxt->node_seq);
13951
13952 if (ctxt->attsDefault != NULL) {
13953 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13954 ctxt->attsDefault = NULL;
13955 }
13956 if (ctxt->attsSpecial != NULL) {
13957 xmlHashFree(ctxt->attsSpecial, NULL);
13958 ctxt->attsSpecial = NULL;
13959 }
13960
Daniel Veillard4432df22003-09-28 18:58:27 +000013961#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013962 if (ctxt->catalogs != NULL)
13963 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013964#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013965 if (ctxt->lastError.code != XML_ERR_OK)
13966 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013967}
13968
13969/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013970 * xmlCtxtResetPush:
13971 * @ctxt: an XML parser context
13972 * @chunk: a pointer to an array of chars
13973 * @size: number of chars in the array
13974 * @filename: an optional file name or URI
13975 * @encoding: the document encoding, or NULL
13976 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013977 * Reset a push parser context
13978 *
13979 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013980 */
13981int
13982xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13983 int size, const char *filename, const char *encoding)
13984{
13985 xmlParserInputPtr inputStream;
13986 xmlParserInputBufferPtr buf;
13987 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13988
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013989 if (ctxt == NULL)
13990 return(1);
13991
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013992 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13993 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13994
13995 buf = xmlAllocParserInputBuffer(enc);
13996 if (buf == NULL)
13997 return(1);
13998
13999 if (ctxt == NULL) {
14000 xmlFreeParserInputBuffer(buf);
14001 return(1);
14002 }
14003
14004 xmlCtxtReset(ctxt);
14005
14006 if (ctxt->pushTab == NULL) {
14007 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14008 sizeof(xmlChar *));
14009 if (ctxt->pushTab == NULL) {
14010 xmlErrMemory(ctxt, NULL);
14011 xmlFreeParserInputBuffer(buf);
14012 return(1);
14013 }
14014 }
14015
14016 if (filename == NULL) {
14017 ctxt->directory = NULL;
14018 } else {
14019 ctxt->directory = xmlParserGetDirectory(filename);
14020 }
14021
14022 inputStream = xmlNewInputStream(ctxt);
14023 if (inputStream == NULL) {
14024 xmlFreeParserInputBuffer(buf);
14025 return(1);
14026 }
14027
14028 if (filename == NULL)
14029 inputStream->filename = NULL;
14030 else
14031 inputStream->filename = (char *)
14032 xmlCanonicPath((const xmlChar *) filename);
14033 inputStream->buf = buf;
14034 inputStream->base = inputStream->buf->buffer->content;
14035 inputStream->cur = inputStream->buf->buffer->content;
14036 inputStream->end =
14037 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14038
14039 inputPush(ctxt, inputStream);
14040
14041 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14042 (ctxt->input->buf != NULL)) {
14043 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14044 int cur = ctxt->input->cur - ctxt->input->base;
14045
14046 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14047
14048 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14049 ctxt->input->cur = ctxt->input->base + cur;
14050 ctxt->input->end =
14051 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14052 use];
14053#ifdef DEBUG_PUSH
14054 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14055#endif
14056 }
14057
14058 if (encoding != NULL) {
14059 xmlCharEncodingHandlerPtr hdlr;
14060
Daniel Veillard37334572008-07-31 08:20:02 +000014061 if (ctxt->encoding != NULL)
14062 xmlFree((xmlChar *) ctxt->encoding);
14063 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14064
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014065 hdlr = xmlFindCharEncodingHandler(encoding);
14066 if (hdlr != NULL) {
14067 xmlSwitchToEncoding(ctxt, hdlr);
14068 } else {
14069 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14070 "Unsupported encoding %s\n", BAD_CAST encoding);
14071 }
14072 } else if (enc != XML_CHAR_ENCODING_NONE) {
14073 xmlSwitchEncoding(ctxt, enc);
14074 }
14075
14076 return(0);
14077}
14078
Daniel Veillard37334572008-07-31 08:20:02 +000014079
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014080/**
Daniel Veillard37334572008-07-31 08:20:02 +000014081 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014082 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014083 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014084 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014085 *
14086 * Applies the options to the parser context
14087 *
14088 * Returns 0 in case of success, the set of unknown or unimplemented options
14089 * in case of error.
14090 */
Daniel Veillard37334572008-07-31 08:20:02 +000014091static int
14092xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014093{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014094 if (ctxt == NULL)
14095 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014096 if (encoding != NULL) {
14097 if (ctxt->encoding != NULL)
14098 xmlFree((xmlChar *) ctxt->encoding);
14099 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14100 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014101 if (options & XML_PARSE_RECOVER) {
14102 ctxt->recovery = 1;
14103 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014104 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014105 } else
14106 ctxt->recovery = 0;
14107 if (options & XML_PARSE_DTDLOAD) {
14108 ctxt->loadsubset = XML_DETECT_IDS;
14109 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014110 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014111 } else
14112 ctxt->loadsubset = 0;
14113 if (options & XML_PARSE_DTDATTR) {
14114 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14115 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014116 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014117 }
14118 if (options & XML_PARSE_NOENT) {
14119 ctxt->replaceEntities = 1;
14120 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14121 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014122 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014123 } else
14124 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014125 if (options & XML_PARSE_PEDANTIC) {
14126 ctxt->pedantic = 1;
14127 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014128 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014129 } else
14130 ctxt->pedantic = 0;
14131 if (options & XML_PARSE_NOBLANKS) {
14132 ctxt->keepBlanks = 0;
14133 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14134 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014135 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014136 } else
14137 ctxt->keepBlanks = 1;
14138 if (options & XML_PARSE_DTDVALID) {
14139 ctxt->validate = 1;
14140 if (options & XML_PARSE_NOWARNING)
14141 ctxt->vctxt.warning = NULL;
14142 if (options & XML_PARSE_NOERROR)
14143 ctxt->vctxt.error = NULL;
14144 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014145 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014146 } else
14147 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014148 if (options & XML_PARSE_NOWARNING) {
14149 ctxt->sax->warning = NULL;
14150 options -= XML_PARSE_NOWARNING;
14151 }
14152 if (options & XML_PARSE_NOERROR) {
14153 ctxt->sax->error = NULL;
14154 ctxt->sax->fatalError = NULL;
14155 options -= XML_PARSE_NOERROR;
14156 }
Daniel Veillard81273902003-09-30 00:43:48 +000014157#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014158 if (options & XML_PARSE_SAX1) {
14159 ctxt->sax->startElement = xmlSAX2StartElement;
14160 ctxt->sax->endElement = xmlSAX2EndElement;
14161 ctxt->sax->startElementNs = NULL;
14162 ctxt->sax->endElementNs = NULL;
14163 ctxt->sax->initialized = 1;
14164 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014165 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014166 }
Daniel Veillard81273902003-09-30 00:43:48 +000014167#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014168 if (options & XML_PARSE_NODICT) {
14169 ctxt->dictNames = 0;
14170 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014171 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014172 } else {
14173 ctxt->dictNames = 1;
14174 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014175 if (options & XML_PARSE_NOCDATA) {
14176 ctxt->sax->cdataBlock = NULL;
14177 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014178 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014179 }
14180 if (options & XML_PARSE_NSCLEAN) {
14181 ctxt->options |= XML_PARSE_NSCLEAN;
14182 options -= XML_PARSE_NSCLEAN;
14183 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014184 if (options & XML_PARSE_NONET) {
14185 ctxt->options |= XML_PARSE_NONET;
14186 options -= XML_PARSE_NONET;
14187 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014188 if (options & XML_PARSE_COMPACT) {
14189 ctxt->options |= XML_PARSE_COMPACT;
14190 options -= XML_PARSE_COMPACT;
14191 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014192 if (options & XML_PARSE_OLD10) {
14193 ctxt->options |= XML_PARSE_OLD10;
14194 options -= XML_PARSE_OLD10;
14195 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014196 if (options & XML_PARSE_NOBASEFIX) {
14197 ctxt->options |= XML_PARSE_NOBASEFIX;
14198 options -= XML_PARSE_NOBASEFIX;
14199 }
14200 if (options & XML_PARSE_HUGE) {
14201 ctxt->options |= XML_PARSE_HUGE;
14202 options -= XML_PARSE_HUGE;
14203 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014204 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014205 return (options);
14206}
14207
14208/**
Daniel Veillard37334572008-07-31 08:20:02 +000014209 * xmlCtxtUseOptions:
14210 * @ctxt: an XML parser context
14211 * @options: a combination of xmlParserOption
14212 *
14213 * Applies the options to the parser context
14214 *
14215 * Returns 0 in case of success, the set of unknown or unimplemented options
14216 * in case of error.
14217 */
14218int
14219xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14220{
14221 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14222}
14223
14224/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014225 * xmlDoRead:
14226 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014227 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014228 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014229 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014230 * @reuse: keep the context for reuse
14231 *
14232 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014233 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014234 * Returns the resulting document tree or NULL
14235 */
14236static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014237xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14238 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014239{
14240 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014241
14242 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014243 if (encoding != NULL) {
14244 xmlCharEncodingHandlerPtr hdlr;
14245
14246 hdlr = xmlFindCharEncodingHandler(encoding);
14247 if (hdlr != NULL)
14248 xmlSwitchToEncoding(ctxt, hdlr);
14249 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014250 if ((URL != NULL) && (ctxt->input != NULL) &&
14251 (ctxt->input->filename == NULL))
14252 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014253 xmlParseDocument(ctxt);
14254 if ((ctxt->wellFormed) || ctxt->recovery)
14255 ret = ctxt->myDoc;
14256 else {
14257 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014258 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014259 xmlFreeDoc(ctxt->myDoc);
14260 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014261 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014262 ctxt->myDoc = NULL;
14263 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014264 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014265 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014266
14267 return (ret);
14268}
14269
14270/**
14271 * xmlReadDoc:
14272 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014273 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014274 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014275 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014276 *
14277 * parse an XML in-memory document and build a tree.
14278 *
14279 * Returns the resulting document tree
14280 */
14281xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014282xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014283{
14284 xmlParserCtxtPtr ctxt;
14285
14286 if (cur == NULL)
14287 return (NULL);
14288
14289 ctxt = xmlCreateDocParserCtxt(cur);
14290 if (ctxt == NULL)
14291 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014292 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014293}
14294
14295/**
14296 * xmlReadFile:
14297 * @filename: a file or URL
14298 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014299 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014300 *
14301 * parse an XML file from the filesystem or the network.
14302 *
14303 * Returns the resulting document tree
14304 */
14305xmlDocPtr
14306xmlReadFile(const char *filename, const char *encoding, int options)
14307{
14308 xmlParserCtxtPtr ctxt;
14309
Daniel Veillard61b93382003-11-03 14:28:31 +000014310 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014311 if (ctxt == NULL)
14312 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014313 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014314}
14315
14316/**
14317 * xmlReadMemory:
14318 * @buffer: a pointer to a char array
14319 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014320 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014321 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014322 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014323 *
14324 * parse an XML in-memory document and build a tree.
14325 *
14326 * Returns the resulting document tree
14327 */
14328xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014329xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014330{
14331 xmlParserCtxtPtr ctxt;
14332
14333 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14334 if (ctxt == NULL)
14335 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014336 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014337}
14338
14339/**
14340 * xmlReadFd:
14341 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014342 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014343 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014344 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014345 *
14346 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014347 * NOTE that the file descriptor will not be closed when the
14348 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014349 *
14350 * Returns the resulting document tree
14351 */
14352xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014353xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014354{
14355 xmlParserCtxtPtr ctxt;
14356 xmlParserInputBufferPtr input;
14357 xmlParserInputPtr stream;
14358
14359 if (fd < 0)
14360 return (NULL);
14361
14362 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14363 if (input == NULL)
14364 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014365 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014366 ctxt = xmlNewParserCtxt();
14367 if (ctxt == NULL) {
14368 xmlFreeParserInputBuffer(input);
14369 return (NULL);
14370 }
14371 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14372 if (stream == NULL) {
14373 xmlFreeParserInputBuffer(input);
14374 xmlFreeParserCtxt(ctxt);
14375 return (NULL);
14376 }
14377 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014378 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014379}
14380
14381/**
14382 * xmlReadIO:
14383 * @ioread: an I/O read function
14384 * @ioclose: an I/O close function
14385 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014386 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014387 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014388 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014389 *
14390 * parse an XML document from I/O functions and source and build a tree.
14391 *
14392 * Returns the resulting document tree
14393 */
14394xmlDocPtr
14395xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014396 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014397{
14398 xmlParserCtxtPtr ctxt;
14399 xmlParserInputBufferPtr input;
14400 xmlParserInputPtr stream;
14401
14402 if (ioread == NULL)
14403 return (NULL);
14404
14405 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14406 XML_CHAR_ENCODING_NONE);
14407 if (input == NULL)
14408 return (NULL);
14409 ctxt = xmlNewParserCtxt();
14410 if (ctxt == NULL) {
14411 xmlFreeParserInputBuffer(input);
14412 return (NULL);
14413 }
14414 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14415 if (stream == NULL) {
14416 xmlFreeParserInputBuffer(input);
14417 xmlFreeParserCtxt(ctxt);
14418 return (NULL);
14419 }
14420 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014421 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014422}
14423
14424/**
14425 * xmlCtxtReadDoc:
14426 * @ctxt: an XML parser context
14427 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014428 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014429 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014430 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014431 *
14432 * parse an XML in-memory document and build a tree.
14433 * This reuses the existing @ctxt parser context
14434 *
14435 * Returns the resulting document tree
14436 */
14437xmlDocPtr
14438xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014439 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014440{
14441 xmlParserInputPtr stream;
14442
14443 if (cur == NULL)
14444 return (NULL);
14445 if (ctxt == NULL)
14446 return (NULL);
14447
14448 xmlCtxtReset(ctxt);
14449
14450 stream = xmlNewStringInputStream(ctxt, cur);
14451 if (stream == NULL) {
14452 return (NULL);
14453 }
14454 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014455 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014456}
14457
14458/**
14459 * xmlCtxtReadFile:
14460 * @ctxt: an XML parser context
14461 * @filename: a file or URL
14462 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014463 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014464 *
14465 * parse an XML file from the filesystem or the network.
14466 * This reuses the existing @ctxt parser context
14467 *
14468 * Returns the resulting document tree
14469 */
14470xmlDocPtr
14471xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14472 const char *encoding, int options)
14473{
14474 xmlParserInputPtr stream;
14475
14476 if (filename == NULL)
14477 return (NULL);
14478 if (ctxt == NULL)
14479 return (NULL);
14480
14481 xmlCtxtReset(ctxt);
14482
Daniel Veillard29614c72004-11-26 10:47:26 +000014483 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014484 if (stream == NULL) {
14485 return (NULL);
14486 }
14487 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014488 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014489}
14490
14491/**
14492 * xmlCtxtReadMemory:
14493 * @ctxt: an XML parser context
14494 * @buffer: a pointer to a char array
14495 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014496 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014497 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014498 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014499 *
14500 * parse an XML in-memory document and build a tree.
14501 * This reuses the existing @ctxt parser context
14502 *
14503 * Returns the resulting document tree
14504 */
14505xmlDocPtr
14506xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014507 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014508{
14509 xmlParserInputBufferPtr input;
14510 xmlParserInputPtr stream;
14511
14512 if (ctxt == NULL)
14513 return (NULL);
14514 if (buffer == NULL)
14515 return (NULL);
14516
14517 xmlCtxtReset(ctxt);
14518
14519 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14520 if (input == NULL) {
14521 return(NULL);
14522 }
14523
14524 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14525 if (stream == NULL) {
14526 xmlFreeParserInputBuffer(input);
14527 return(NULL);
14528 }
14529
14530 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014531 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014532}
14533
14534/**
14535 * xmlCtxtReadFd:
14536 * @ctxt: an XML parser context
14537 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014538 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014539 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014540 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014541 *
14542 * parse an XML from a file descriptor and build a tree.
14543 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014544 * NOTE that the file descriptor will not be closed when the
14545 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014546 *
14547 * Returns the resulting document tree
14548 */
14549xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014550xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14551 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014552{
14553 xmlParserInputBufferPtr input;
14554 xmlParserInputPtr stream;
14555
14556 if (fd < 0)
14557 return (NULL);
14558 if (ctxt == NULL)
14559 return (NULL);
14560
14561 xmlCtxtReset(ctxt);
14562
14563
14564 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14565 if (input == NULL)
14566 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014567 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014568 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14569 if (stream == NULL) {
14570 xmlFreeParserInputBuffer(input);
14571 return (NULL);
14572 }
14573 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014574 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014575}
14576
14577/**
14578 * xmlCtxtReadIO:
14579 * @ctxt: an XML parser context
14580 * @ioread: an I/O read function
14581 * @ioclose: an I/O close function
14582 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014583 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014584 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014585 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014586 *
14587 * parse an XML document from I/O functions and source and build a tree.
14588 * This reuses the existing @ctxt parser context
14589 *
14590 * Returns the resulting document tree
14591 */
14592xmlDocPtr
14593xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14594 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014595 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014596 const char *encoding, int options)
14597{
14598 xmlParserInputBufferPtr input;
14599 xmlParserInputPtr stream;
14600
14601 if (ioread == NULL)
14602 return (NULL);
14603 if (ctxt == NULL)
14604 return (NULL);
14605
14606 xmlCtxtReset(ctxt);
14607
14608 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14609 XML_CHAR_ENCODING_NONE);
14610 if (input == NULL)
14611 return (NULL);
14612 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14613 if (stream == NULL) {
14614 xmlFreeParserInputBuffer(input);
14615 return (NULL);
14616 }
14617 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014618 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014619}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014620
14621#define bottom_parser
14622#include "elfgcchack.h"