blob: 85e759969a18de84cca7876fe1dfdcfc48ed3b1a [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
Rob Richards9c0aa472009-03-26 18:10:19 +000086static xmlParserCtxtPtr
87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090/************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96#define XML_PARSER_BIG_ENTITY 1000
97#define XML_PARSER_LOT_ENTITY 5000
98
99/*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105#define XML_PARSER_NON_LINEAR 10
106
107/*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116static int
117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119{
Daniel Veillardcba68392008-08-29 12:43:40 +0000120 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175}
176
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000177/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000178 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000179 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000185unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000186
Daniel Veillard0fb18932003-09-07 09:14:37 +0000187
Daniel Veillard0161e632008-08-28 15:36:32 +0000188
189#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000190#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000191#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
Owen Taylor3473f882001-02-23 17:55:21 +0000194/*
Owen Taylor3473f882001-02-23 17:55:21 +0000195 * List of XML prefixed PI allowed by W3C specs
196 */
197
Daniel Veillardb44025c2001-10-11 22:55:55 +0000198static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000199 "xml-stylesheet",
200 NULL
201};
202
Daniel Veillarda07050d2003-10-19 14:46:32 +0000203
Owen Taylor3473f882001-02-23 17:55:21 +0000204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200205static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000207
Daniel Veillard7d515752003-09-26 19:12:37 +0000208static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000211 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000212 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000213
Daniel Veillard37334572008-07-31 08:20:02 +0000214static int
215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000218static void
219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000222
Daniel Veillard7d515752003-09-26 19:12:37 +0000223static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000227static int
228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230/************************************************************************
231 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244static void
245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247{
Daniel Veillard157fee02003-10-31 10:36:03 +0000248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200253
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000254 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 (const char *) localname, NULL, NULL, 0, 0,
258 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000259 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 (const char *) prefix, (const char *) localname,
263 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
264 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000265 if (ctxt != NULL) {
266 ctxt->wellFormed = 0;
267 if (ctxt->recovery == 0)
268 ctxt->disableSAX = 1;
269 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000270}
271
272/**
273 * xmlFatalErr:
274 * @ctxt: an XML parser context
275 * @error: the error number
276 * @extra: extra information string
277 *
278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
279 */
280static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000282{
283 const char *errmsg;
284
Daniel Veillard157fee02003-10-31 10:36:03 +0000285 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286 (ctxt->instate == XML_PARSER_EOF))
287 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000288 switch (error) {
289 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "CharRef: invalid hexadecimal value\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg = "CharRef: invalid decimal value\n";
294 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000295 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000296 errmsg = "CharRef: invalid value\n";
297 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000298 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000299 errmsg = "internal error";
300 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000301 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000302 errmsg = "PEReference at end of document\n";
303 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000304 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000305 errmsg = "PEReference in prolog\n";
306 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000307 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000308 errmsg = "PEReference in epilog\n";
309 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000310 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000311 errmsg = "PEReference: no name\n";
312 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000313 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000314 errmsg = "PEReference: expecting ';'\n";
315 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000316 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000317 errmsg = "Detected an entity reference loop\n";
318 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000319 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000320 errmsg = "EntityValue: \" or ' expected\n";
321 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000322 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000323 errmsg = "PEReferences forbidden in internal subset\n";
324 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000325 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 errmsg = "EntityValue: \" or ' expected\n";
327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000329 errmsg = "AttValue: \" or ' expected\n";
330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 errmsg = "Unescaped '<' not allowed in attributes values\n";
333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "SystemLiteral \" or ' expected\n";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000338 errmsg = "Unfinished System or Public ID \" or ' expected\n";
339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000341 errmsg = "Sequence ']]>' not allowed in content\n";
342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000344 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000347 errmsg = "PUBLIC, the Public Identifier is missing\n";
348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350 errmsg = "Comment must not contain '--' (double-hyphen)\n";
351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000353 errmsg = "xmlParsePI : no target name\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 errmsg = "Invalid PI name\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 errmsg = "NOTATION: Name expected here\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 errmsg = "'>' required to close NOTATION declaration\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 errmsg = "Entity value required\n";
366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 errmsg = "Fragment not allowed";
369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 errmsg = "'(' required to start ATTLIST enumeration\n";
372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 errmsg = "NmToken expected in ATTLIST enumeration\n";
375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377 errmsg = "')' required to finish ATTLIST enumeration\n";
378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000383 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386 errmsg = "ContentDecl : Name or '(' expected\n";
387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000392 errmsg =
393 "PEReference: forbidden within markup decl in internal subset\n";
394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 errmsg = "expected '>'\n";
397 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000398 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 errmsg = "XML conditional section '[' expected\n";
400 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000401 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 errmsg = "Content error in the external subset\n";
403 break;
404 case XML_ERR_CONDSEC_INVALID_KEYWORD:
405 errmsg =
406 "conditional section INCLUDE or IGNORE keyword expected\n";
407 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000408 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000409 errmsg = "XML conditional section not closed\n";
410 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000411 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 errmsg = "Text declaration '<?xml' required\n";
413 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000414 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000415 errmsg = "parsing XML declaration: '?>' expected\n";
416 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000417 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000418 errmsg = "external parsed entities cannot be standalone\n";
419 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000420 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000421 errmsg = "EntityRef: expecting ';'\n";
422 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000423 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000424 errmsg = "DOCTYPE improperly terminated\n";
425 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000426 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 errmsg = "EndTag: '</' not found\n";
428 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000429 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 errmsg = "expected '='\n";
431 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000432 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 errmsg = "String not closed expecting \" or '\n";
434 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000435 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 errmsg = "String not started expecting ' or \"\n";
437 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000438 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 errmsg = "Invalid XML encoding name\n";
440 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000441 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 errmsg = "standalone accepts only 'yes' or 'no'\n";
443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 errmsg = "Document is empty\n";
446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 errmsg = "Extra content at the end of the document\n";
449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 errmsg = "chunk is not well balanced\n";
452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 errmsg = "extra content at the end of well balanced chunk\n";
455 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000456 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000457 errmsg = "Malformed declaration expecting version\n";
458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000460 case:
461 errmsg = "\n";
462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 default:
465 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000466 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000467 if (ctxt != NULL)
468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
471 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000472 if (ctxt != NULL) {
473 ctxt->wellFormed = 0;
474 if (ctxt->recovery == 0)
475 ctxt->disableSAX = 1;
476 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477}
478
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000479/**
480 * xmlFatalErrMsg:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 *
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
486 */
487static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000490{
Daniel Veillard157fee02003-10-31 10:36:03 +0000491 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492 (ctxt->instate == XML_PARSER_EOF))
493 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000494 if (ctxt != NULL)
495 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200497 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000498 if (ctxt != NULL) {
499 ctxt->wellFormed = 0;
500 if (ctxt->recovery == 0)
501 ctxt->disableSAX = 1;
502 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000503}
504
505/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000506 * xmlWarningMsg:
507 * @ctxt: an XML parser context
508 * @error: the error number
509 * @msg: the error message
510 * @str1: extra data
511 * @str2: extra data
512 *
513 * Handle a warning.
514 */
515static void
516xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar *str1, const xmlChar *str2)
518{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000519 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000520
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000526 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200527 if (ctxt != NULL) {
528 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000529 (ctxt->sax) ? ctxt->sax->warning : NULL,
530 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000531 ctxt, NULL, XML_FROM_PARSER, error,
532 XML_ERR_WARNING, NULL, 0,
533 (const char *) str1, (const char *) str2, NULL, 0, 0,
534 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200535 } else {
536 __xmlRaiseError(schannel, NULL, NULL,
537 ctxt, NULL, XML_FROM_PARSER, error,
538 XML_ERR_WARNING, NULL, 0,
539 (const char *) str1, (const char *) str2, NULL, 0, 0,
540 msg, (const char *) str1, (const char *) str2);
541 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000542}
543
544/**
545 * xmlValidityError:
546 * @ctxt: an XML parser context
547 * @error: the error number
548 * @msg: the error message
549 * @str1: extra data
550 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000551 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000552 */
553static void
554xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000555 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000556{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000557 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000558
559 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
560 (ctxt->instate == XML_PARSER_EOF))
561 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000562 if (ctxt != NULL) {
563 ctxt->errNo = error;
564 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
565 schannel = ctxt->sax->serror;
566 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200567 if (ctxt != NULL) {
568 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000569 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000570 ctxt, NULL, XML_FROM_DTD, error,
571 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000572 (const char *) str2, NULL, 0, 0,
573 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000574 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200575 } else {
576 __xmlRaiseError(schannel, NULL, NULL,
577 ctxt, NULL, XML_FROM_DTD, error,
578 XML_ERR_ERROR, NULL, 0, (const char *) str1,
579 (const char *) str2, NULL, 0, 0,
580 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000581 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000582}
583
584/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000585 * xmlFatalErrMsgInt:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @val: an integer value
590 *
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592 */
593static void
594xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000595 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
604 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->wellFormed = 0;
607 if (ctxt->recovery == 0)
608 ctxt->disableSAX = 1;
609 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000610}
611
612/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000613 * xmlFatalErrMsgStrIntStr:
614 * @ctxt: an XML parser context
615 * @error: the error number
616 * @msg: the error message
617 * @str1: an string info
618 * @val: an integer value
619 * @str2: an string info
620 *
621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
622 */
623static void
624xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
625 const char *msg, const xmlChar *str1, int val,
626 const xmlChar *str2)
627{
Daniel Veillard157fee02003-10-31 10:36:03 +0000628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000631 if (ctxt != NULL)
632 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000633 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000634 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
635 NULL, 0, (const char *) str1, (const char *) str2,
636 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000637 if (ctxt != NULL) {
638 ctxt->wellFormed = 0;
639 if (ctxt->recovery == 0)
640 ctxt->disableSAX = 1;
641 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000642}
643
644/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000645 * xmlFatalErrMsgStr:
646 * @ctxt: an XML parser context
647 * @error: the error number
648 * @msg: the error message
649 * @val: a string value
650 *
651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
652 */
653static void
654xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000655 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000656{
Daniel Veillard157fee02003-10-31 10:36:03 +0000657 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
658 (ctxt->instate == XML_PARSER_EOF))
659 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000660 if (ctxt != NULL)
661 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000662 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000663 XML_FROM_PARSER, error, XML_ERR_FATAL,
664 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
665 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000666 if (ctxt != NULL) {
667 ctxt->wellFormed = 0;
668 if (ctxt->recovery == 0)
669 ctxt->disableSAX = 1;
670 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000671}
672
673/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000674 * xmlErrMsgStr:
675 * @ctxt: an XML parser context
676 * @error: the error number
677 * @msg: the error message
678 * @val: a string value
679 *
680 * Handle a non fatal parser error
681 */
682static void
683xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
684 const char *msg, const xmlChar * val)
685{
Daniel Veillard157fee02003-10-31 10:36:03 +0000686 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687 (ctxt->instate == XML_PARSER_EOF))
688 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000689 if (ctxt != NULL)
690 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000691 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000692 XML_FROM_PARSER, error, XML_ERR_ERROR,
693 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
694 val);
695}
696
697/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000698 * xmlNsErr:
699 * @ctxt: an XML parser context
700 * @error: the error number
701 * @msg: the message
702 * @info1: extra information string
703 * @info2: extra information string
704 *
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706 */
707static void
708xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000710 const xmlChar * info1, const xmlChar * info2,
711 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000712{
Daniel Veillard157fee02003-10-31 10:36:03 +0000713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714 (ctxt->instate == XML_PARSER_EOF))
715 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000716 if (ctxt != NULL)
717 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000719 XML_ERR_ERROR, NULL, 0, (const char *) info1,
720 (const char *) info2, (const char *) info3, 0, 0, msg,
721 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000722 if (ctxt != NULL)
723 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000724}
725
Daniel Veillard37334572008-07-31 08:20:02 +0000726/**
727 * xmlNsWarn
728 * @ctxt: an XML parser context
729 * @error: the error number
730 * @msg: the message
731 * @info1: extra information string
732 * @info2: extra information string
733 *
734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
735 */
736static void
737xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
738 const char *msg,
739 const xmlChar * info1, const xmlChar * info2,
740 const xmlChar * info3)
741{
742 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
743 (ctxt->instate == XML_PARSER_EOF))
744 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746 XML_ERR_WARNING, NULL, 0, (const char *) info1,
747 (const char *) info2, (const char *) info3, 0, 0, msg,
748 info1, info2, info3);
749}
750
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000751/************************************************************************
752 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000753 * Library wide options *
754 * *
755 ************************************************************************/
756
757/**
758 * xmlHasFeature:
759 * @feature: the feature to be examined
760 *
761 * Examines if the library has been compiled with a given feature.
762 *
763 * Returns a non-zero value if the feature exist, otherwise zero.
764 * Returns zero (0) if the feature does not exist or an unknown
765 * unknown feature is requested, non-zero otherwise.
766 */
767int
768xmlHasFeature(xmlFeature feature)
769{
770 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_THREAD_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_TREE_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_OUTPUT_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_PUSH_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_READER_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef LIBXML_PATTERN_ENABLED
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_WRITER_ENABLED
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000813 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000814#ifdef LIBXML_SAX1_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000819 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000820#ifdef LIBXML_FTP_ENABLED
821 return(1);
822#else
823 return(0);
824#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000825 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000826#ifdef LIBXML_HTTP_ENABLED
827 return(1);
828#else
829 return(0);
830#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000831 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000832#ifdef LIBXML_VALID_ENABLED
833 return(1);
834#else
835 return(0);
836#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000837 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000838#ifdef LIBXML_HTML_ENABLED
839 return(1);
840#else
841 return(0);
842#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000843 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000844#ifdef LIBXML_LEGACY_ENABLED
845 return(1);
846#else
847 return(0);
848#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000849 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000850#ifdef LIBXML_C14N_ENABLED
851 return(1);
852#else
853 return(0);
854#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000855 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000856#ifdef LIBXML_CATALOG_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000861 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000862#ifdef LIBXML_XPATH_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000867 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000868#ifdef LIBXML_XPTR_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000873 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000874#ifdef LIBXML_XINCLUDE_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000879 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000880#ifdef LIBXML_ICONV_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000885 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000886#ifdef LIBXML_ISO8859X_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000891 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000892#ifdef LIBXML_UNICODE_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000897 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000898#ifdef LIBXML_REGEXP_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000903 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000904#ifdef LIBXML_AUTOMATA_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000909 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000910#ifdef LIBXML_EXPR_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000915 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000916#ifdef LIBXML_SCHEMAS_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000921 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000922#ifdef LIBXML_SCHEMATRON_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000927 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000928#ifdef LIBXML_MODULES_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000933 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000934#ifdef LIBXML_DEBUG_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000939 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940#ifdef DEBUG_MEMORY_LOCATION
941 return(1);
942#else
943 return(0);
944#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000945 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000946#ifdef LIBXML_DEBUG_RUNTIME
947 return(1);
948#else
949 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000950#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000951 case XML_WITH_ZLIB:
952#ifdef LIBXML_ZLIB_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000957 default:
958 break;
959 }
960 return(0);
961}
962
963/************************************************************************
964 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000965 * SAX2 defaulted attributes handling *
966 * *
967 ************************************************************************/
968
969/**
970 * xmlDetectSAX2:
971 * @ctxt: an XML parser context
972 *
973 * Do the SAX2 detection and specific intialization
974 */
975static void
976xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
977 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000978#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000979 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
980 ((ctxt->sax->startElementNs != NULL) ||
981 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000982#else
983 ctxt->sax2 = 1;
984#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985
986 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
987 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
988 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000989 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
990 (ctxt->str_xml_ns == NULL)) {
991 xmlErrMemory(ctxt, NULL);
992 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000993}
994
Daniel Veillarde57ec792003-09-10 10:50:59 +0000995typedef struct _xmlDefAttrs xmlDefAttrs;
996typedef xmlDefAttrs *xmlDefAttrsPtr;
997struct _xmlDefAttrs {
998 int nbAttrs; /* number of defaulted attributes on that element */
999 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001000 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001001};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001002
1003/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001004 * xmlAttrNormalizeSpace:
1005 * @src: the source string
1006 * @dst: the target string
1007 *
1008 * Normalize the space in non CDATA attribute values:
1009 * If the attribute type is not CDATA, then the XML processor MUST further
1010 * process the normalized attribute value by discarding any leading and
1011 * trailing space (#x20) characters, and by replacing sequences of space
1012 * (#x20) characters by a single space (#x20) character.
1013 * Note that the size of dst need to be at least src, and if one doesn't need
1014 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1015 * passing src as dst is just fine.
1016 *
1017 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1018 * is needed.
1019 */
1020static xmlChar *
1021xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1022{
1023 if ((src == NULL) || (dst == NULL))
1024 return(NULL);
1025
1026 while (*src == 0x20) src++;
1027 while (*src != 0) {
1028 if (*src == 0x20) {
1029 while (*src == 0x20) src++;
1030 if (*src != 0)
1031 *dst++ = 0x20;
1032 } else {
1033 *dst++ = *src++;
1034 }
1035 }
1036 *dst = 0;
1037 if (dst == src)
1038 return(NULL);
1039 return(dst);
1040}
1041
1042/**
1043 * xmlAttrNormalizeSpace2:
1044 * @src: the source string
1045 *
1046 * Normalize the space in non CDATA attribute values, a slightly more complex
1047 * front end to avoid allocation problems when running on attribute values
1048 * coming from the input.
1049 *
1050 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1051 * is needed.
1052 */
1053static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001054xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001055{
1056 int i;
1057 int remove_head = 0;
1058 int need_realloc = 0;
1059 const xmlChar *cur;
1060
1061 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1062 return(NULL);
1063 i = *len;
1064 if (i <= 0)
1065 return(NULL);
1066
1067 cur = src;
1068 while (*cur == 0x20) {
1069 cur++;
1070 remove_head++;
1071 }
1072 while (*cur != 0) {
1073 if (*cur == 0x20) {
1074 cur++;
1075 if ((*cur == 0x20) || (*cur == 0)) {
1076 need_realloc = 1;
1077 break;
1078 }
1079 } else
1080 cur++;
1081 }
1082 if (need_realloc) {
1083 xmlChar *ret;
1084
1085 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1086 if (ret == NULL) {
1087 xmlErrMemory(ctxt, NULL);
1088 return(NULL);
1089 }
1090 xmlAttrNormalizeSpace(ret, ret);
1091 *len = (int) strlen((const char *)ret);
1092 return(ret);
1093 } else if (remove_head) {
1094 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001095 memmove(src, src + remove_head, 1 + *len);
1096 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001097 }
1098 return(NULL);
1099}
1100
1101/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102 * xmlAddDefAttrs:
1103 * @ctxt: an XML parser context
1104 * @fullname: the element fullname
1105 * @fullattr: the attribute fullname
1106 * @value: the attribute value
1107 *
1108 * Add a defaulted attribute for an element
1109 */
1110static void
1111xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1112 const xmlChar *fullname,
1113 const xmlChar *fullattr,
1114 const xmlChar *value) {
1115 xmlDefAttrsPtr defaults;
1116 int len;
1117 const xmlChar *name;
1118 const xmlChar *prefix;
1119
Daniel Veillard6a31b832008-03-26 14:06:44 +00001120 /*
1121 * Allows to detect attribute redefinitions
1122 */
1123 if (ctxt->attsSpecial != NULL) {
1124 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1125 return;
1126 }
1127
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001129 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130 if (ctxt->attsDefault == NULL)
1131 goto mem_error;
1132 }
1133
1134 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001135 * split the element name into prefix:localname , the string found
1136 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001137 */
1138 name = xmlSplitQName3(fullname, &len);
1139 if (name == NULL) {
1140 name = xmlDictLookup(ctxt->dict, fullname, -1);
1141 prefix = NULL;
1142 } else {
1143 name = xmlDictLookup(ctxt->dict, name, -1);
1144 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1145 }
1146
1147 /*
1148 * make sure there is some storage
1149 */
1150 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1151 if (defaults == NULL) {
1152 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001153 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 if (defaults == NULL)
1155 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001157 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001158 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1159 defaults, NULL) < 0) {
1160 xmlFree(defaults);
1161 goto mem_error;
1162 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001163 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001164 xmlDefAttrsPtr temp;
1165
1166 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001167 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001168 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001169 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001170 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001171 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001172 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1173 defaults, NULL) < 0) {
1174 xmlFree(defaults);
1175 goto mem_error;
1176 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 }
1178
1179 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001180 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001181 * are within the DTD and hen not associated to namespace names.
1182 */
1183 name = xmlSplitQName3(fullattr, &len);
1184 if (name == NULL) {
1185 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1186 prefix = NULL;
1187 } else {
1188 name = xmlDictLookup(ctxt->dict, name, -1);
1189 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1190 }
1191
Daniel Veillardae0765b2008-07-31 19:54:59 +00001192 defaults->values[5 * defaults->nbAttrs] = name;
1193 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001194 /* intern the string and precompute the end */
1195 len = xmlStrlen(value);
1196 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001197 defaults->values[5 * defaults->nbAttrs + 2] = value;
1198 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1199 if (ctxt->external)
1200 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1201 else
1202 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001203 defaults->nbAttrs++;
1204
1205 return;
1206
1207mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001208 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 return;
1210}
1211
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001212/**
1213 * xmlAddSpecialAttr:
1214 * @ctxt: an XML parser context
1215 * @fullname: the element fullname
1216 * @fullattr: the attribute fullname
1217 * @type: the attribute type
1218 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001219 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001220 */
1221static void
1222xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1223 const xmlChar *fullname,
1224 const xmlChar *fullattr,
1225 int type)
1226{
1227 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001228 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001229 if (ctxt->attsSpecial == NULL)
1230 goto mem_error;
1231 }
1232
Daniel Veillardac4118d2008-01-11 05:27:32 +00001233 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1234 return;
1235
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001236 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1237 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001238 return;
1239
1240mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001241 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001242 return;
1243}
1244
Daniel Veillard4432df22003-09-28 18:58:27 +00001245/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001246 * xmlCleanSpecialAttrCallback:
1247 *
1248 * Removes CDATA attributes from the special attribute table
1249 */
1250static void
1251xmlCleanSpecialAttrCallback(void *payload, void *data,
1252 const xmlChar *fullname, const xmlChar *fullattr,
1253 const xmlChar *unused ATTRIBUTE_UNUSED) {
1254 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1255
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001256 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001257 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1258 }
1259}
1260
1261/**
1262 * xmlCleanSpecialAttr:
1263 * @ctxt: an XML parser context
1264 *
1265 * Trim the list of attributes defined to remove all those of type
1266 * CDATA as they are not special. This call should be done when finishing
1267 * to parse the DTD and before starting to parse the document root.
1268 */
1269static void
1270xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1271{
1272 if (ctxt->attsSpecial == NULL)
1273 return;
1274
1275 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1276
1277 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1278 xmlHashFree(ctxt->attsSpecial, NULL);
1279 ctxt->attsSpecial = NULL;
1280 }
1281 return;
1282}
1283
1284/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001285 * xmlCheckLanguageID:
1286 * @lang: pointer to the string value
1287 *
1288 * Checks that the value conforms to the LanguageID production:
1289 *
1290 * NOTE: this is somewhat deprecated, those productions were removed from
1291 * the XML Second edition.
1292 *
1293 * [33] LanguageID ::= Langcode ('-' Subcode)*
1294 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1295 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1296 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1297 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1298 * [38] Subcode ::= ([a-z] | [A-Z])+
1299 *
1300 * Returns 1 if correct 0 otherwise
1301 **/
1302int
1303xmlCheckLanguageID(const xmlChar * lang)
1304{
1305 const xmlChar *cur = lang;
1306
1307 if (cur == NULL)
1308 return (0);
1309 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1310 ((cur[0] == 'I') && (cur[1] == '-'))) {
1311 /*
1312 * IANA code
1313 */
1314 cur += 2;
1315 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1316 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1317 cur++;
1318 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1319 ((cur[0] == 'X') && (cur[1] == '-'))) {
1320 /*
1321 * User code
1322 */
1323 cur += 2;
1324 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1325 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1326 cur++;
1327 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1328 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1329 /*
1330 * ISO639
1331 */
1332 cur++;
1333 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1334 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1335 cur++;
1336 else
1337 return (0);
1338 } else
1339 return (0);
1340 while (cur[0] != 0) { /* non input consuming */
1341 if (cur[0] != '-')
1342 return (0);
1343 cur++;
1344 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1345 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1346 cur++;
1347 else
1348 return (0);
1349 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1350 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1351 cur++;
1352 }
1353 return (1);
1354}
1355
Owen Taylor3473f882001-02-23 17:55:21 +00001356/************************************************************************
1357 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001358 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001359 * *
1360 ************************************************************************/
1361
Daniel Veillard8ed10722009-08-20 19:17:36 +02001362static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1363 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001364
Daniel Veillard0fb18932003-09-07 09:14:37 +00001365#ifdef SAX2
1366/**
1367 * nsPush:
1368 * @ctxt: an XML parser context
1369 * @prefix: the namespace prefix or NULL
1370 * @URL: the namespace name
1371 *
1372 * Pushes a new parser namespace on top of the ns stack
1373 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001374 * Returns -1 in case of error, -2 if the namespace should be discarded
1375 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001376 */
1377static int
1378nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1379{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001380 if (ctxt->options & XML_PARSE_NSCLEAN) {
1381 int i;
1382 for (i = 0;i < ctxt->nsNr;i += 2) {
1383 if (ctxt->nsTab[i] == prefix) {
1384 /* in scope */
1385 if (ctxt->nsTab[i + 1] == URL)
1386 return(-2);
1387 /* out of scope keep it */
1388 break;
1389 }
1390 }
1391 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001392 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1393 ctxt->nsMax = 10;
1394 ctxt->nsNr = 0;
1395 ctxt->nsTab = (const xmlChar **)
1396 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1397 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001398 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001399 ctxt->nsMax = 0;
1400 return (-1);
1401 }
1402 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001403 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001404 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001405 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1406 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1407 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001408 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001409 ctxt->nsMax /= 2;
1410 return (-1);
1411 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001412 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001413 }
1414 ctxt->nsTab[ctxt->nsNr++] = prefix;
1415 ctxt->nsTab[ctxt->nsNr++] = URL;
1416 return (ctxt->nsNr);
1417}
1418/**
1419 * nsPop:
1420 * @ctxt: an XML parser context
1421 * @nr: the number to pop
1422 *
1423 * Pops the top @nr parser prefix/namespace from the ns stack
1424 *
1425 * Returns the number of namespaces removed
1426 */
1427static int
1428nsPop(xmlParserCtxtPtr ctxt, int nr)
1429{
1430 int i;
1431
1432 if (ctxt->nsTab == NULL) return(0);
1433 if (ctxt->nsNr < nr) {
1434 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1435 nr = ctxt->nsNr;
1436 }
1437 if (ctxt->nsNr <= 0)
1438 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001439
Daniel Veillard0fb18932003-09-07 09:14:37 +00001440 for (i = 0;i < nr;i++) {
1441 ctxt->nsNr--;
1442 ctxt->nsTab[ctxt->nsNr] = NULL;
1443 }
1444 return(nr);
1445}
1446#endif
1447
1448static int
1449xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1450 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001451 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001452 int maxatts;
1453
1454 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001455 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001456 atts = (const xmlChar **)
1457 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001458 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001459 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001460 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1461 if (attallocs == NULL) goto mem_error;
1462 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001463 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001464 } else if (nr + 5 > ctxt->maxatts) {
1465 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001466 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1467 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001468 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001469 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001470 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1471 (maxatts / 5) * sizeof(int));
1472 if (attallocs == NULL) goto mem_error;
1473 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001474 ctxt->maxatts = maxatts;
1475 }
1476 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001477mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001478 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001479 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001480}
1481
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001482/**
1483 * inputPush:
1484 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001485 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001486 *
1487 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001488 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001489 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001490 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001491int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001492inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1493{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001494 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001495 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001496 if (ctxt->inputNr >= ctxt->inputMax) {
1497 ctxt->inputMax *= 2;
1498 ctxt->inputTab =
1499 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1500 ctxt->inputMax *
1501 sizeof(ctxt->inputTab[0]));
1502 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001503 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001504 xmlFreeInputStream(value);
1505 ctxt->inputMax /= 2;
1506 value = NULL;
1507 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001508 }
1509 }
1510 ctxt->inputTab[ctxt->inputNr] = value;
1511 ctxt->input = value;
1512 return (ctxt->inputNr++);
1513}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001514/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001515 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001516 * @ctxt: an XML parser context
1517 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001518 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001519 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001520 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001521 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001522xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001523inputPop(xmlParserCtxtPtr ctxt)
1524{
1525 xmlParserInputPtr ret;
1526
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001527 if (ctxt == NULL)
1528 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001529 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001530 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001531 ctxt->inputNr--;
1532 if (ctxt->inputNr > 0)
1533 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1534 else
1535 ctxt->input = NULL;
1536 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001537 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001538 return (ret);
1539}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001540/**
1541 * nodePush:
1542 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001543 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001544 *
1545 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001546 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001547 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001548 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001549int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001550nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1551{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001552 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001553 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001554 xmlNodePtr *tmp;
1555
1556 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1557 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001558 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001559 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001560 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001561 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001562 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001563 ctxt->nodeTab = tmp;
1564 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001565 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001566 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1567 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001568 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001569 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001570 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001571 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001572 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001573 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001574 ctxt->nodeTab[ctxt->nodeNr] = value;
1575 ctxt->node = value;
1576 return (ctxt->nodeNr++);
1577}
Daniel Veillard8915c152008-08-26 13:05:34 +00001578
Daniel Veillard1c732d22002-11-30 11:22:59 +00001579/**
1580 * nodePop:
1581 * @ctxt: an XML parser context
1582 *
1583 * Pops the top element node from the node stack
1584 *
1585 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001586 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001587xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001588nodePop(xmlParserCtxtPtr ctxt)
1589{
1590 xmlNodePtr ret;
1591
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001592 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001593 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001594 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001595 ctxt->nodeNr--;
1596 if (ctxt->nodeNr > 0)
1597 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1598 else
1599 ctxt->node = NULL;
1600 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001601 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001602 return (ret);
1603}
Daniel Veillarda2351322004-06-27 12:08:10 +00001604
1605#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001606/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001607 * nameNsPush:
1608 * @ctxt: an XML parser context
1609 * @value: the element name
1610 * @prefix: the element prefix
1611 * @URI: the element namespace name
1612 *
1613 * Pushes a new element name/prefix/URL on top of the name stack
1614 *
1615 * Returns -1 in case of error, the index in the stack otherwise
1616 */
1617static int
1618nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1619 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1620{
1621 if (ctxt->nameNr >= ctxt->nameMax) {
1622 const xmlChar * *tmp;
1623 void **tmp2;
1624 ctxt->nameMax *= 2;
1625 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1626 ctxt->nameMax *
1627 sizeof(ctxt->nameTab[0]));
1628 if (tmp == NULL) {
1629 ctxt->nameMax /= 2;
1630 goto mem_error;
1631 }
1632 ctxt->nameTab = tmp;
1633 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1634 ctxt->nameMax * 3 *
1635 sizeof(ctxt->pushTab[0]));
1636 if (tmp2 == NULL) {
1637 ctxt->nameMax /= 2;
1638 goto mem_error;
1639 }
1640 ctxt->pushTab = tmp2;
1641 }
1642 ctxt->nameTab[ctxt->nameNr] = value;
1643 ctxt->name = value;
1644 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1645 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001646 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001647 return (ctxt->nameNr++);
1648mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001649 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001650 return (-1);
1651}
1652/**
1653 * nameNsPop:
1654 * @ctxt: an XML parser context
1655 *
1656 * Pops the top element/prefix/URI name from the name stack
1657 *
1658 * Returns the name just removed
1659 */
1660static const xmlChar *
1661nameNsPop(xmlParserCtxtPtr ctxt)
1662{
1663 const xmlChar *ret;
1664
1665 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001666 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001667 ctxt->nameNr--;
1668 if (ctxt->nameNr > 0)
1669 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1670 else
1671 ctxt->name = NULL;
1672 ret = ctxt->nameTab[ctxt->nameNr];
1673 ctxt->nameTab[ctxt->nameNr] = NULL;
1674 return (ret);
1675}
Daniel Veillarda2351322004-06-27 12:08:10 +00001676#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001677
1678/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 * namePush:
1680 * @ctxt: an XML parser context
1681 * @value: the element name
1682 *
1683 * Pushes a new element name on top of the name stack
1684 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001685 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001687int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001688namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001689{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001690 if (ctxt == NULL) return (-1);
1691
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001693 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001694 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001695 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001696 ctxt->nameMax *
1697 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001698 if (tmp == NULL) {
1699 ctxt->nameMax /= 2;
1700 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001701 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001702 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001703 }
1704 ctxt->nameTab[ctxt->nameNr] = value;
1705 ctxt->name = value;
1706 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001707mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001709 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001710}
1711/**
1712 * namePop:
1713 * @ctxt: an XML parser context
1714 *
1715 * Pops the top element name from the name stack
1716 *
1717 * Returns the name just removed
1718 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001719const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001720namePop(xmlParserCtxtPtr ctxt)
1721{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001722 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001723
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001724 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1725 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001726 ctxt->nameNr--;
1727 if (ctxt->nameNr > 0)
1728 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1729 else
1730 ctxt->name = NULL;
1731 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001732 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001733 return (ret);
1734}
Owen Taylor3473f882001-02-23 17:55:21 +00001735
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001736static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001737 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001738 int *tmp;
1739
Owen Taylor3473f882001-02-23 17:55:21 +00001740 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001741 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1742 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1743 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001744 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001745 ctxt->spaceMax /=2;
1746 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001747 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001748 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001749 }
1750 ctxt->spaceTab[ctxt->spaceNr] = val;
1751 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1752 return(ctxt->spaceNr++);
1753}
1754
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001755static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001756 int ret;
1757 if (ctxt->spaceNr <= 0) return(0);
1758 ctxt->spaceNr--;
1759 if (ctxt->spaceNr > 0)
1760 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1761 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001762 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001763 ret = ctxt->spaceTab[ctxt->spaceNr];
1764 ctxt->spaceTab[ctxt->spaceNr] = -1;
1765 return(ret);
1766}
1767
1768/*
1769 * Macros for accessing the content. Those should be used only by the parser,
1770 * and not exported.
1771 *
1772 * Dirty macros, i.e. one often need to make assumption on the context to
1773 * use them
1774 *
1775 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1776 * To be used with extreme caution since operations consuming
1777 * characters may move the input buffer to a different location !
1778 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1779 * This should be used internally by the parser
1780 * only to compare to ASCII values otherwise it would break when
1781 * running with UTF-8 encoding.
1782 * RAW same as CUR but in the input buffer, bypass any token
1783 * extraction that may have been done
1784 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1785 * to compare on ASCII based substring.
1786 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001787 * strings without newlines within the parser.
1788 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1789 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001790 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1791 *
1792 * NEXT Skip to the next character, this does the proper decoding
1793 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001794 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001795 * CUR_CHAR(l) returns the current unicode character (int), set l
1796 * to the number of xmlChars used for the encoding [0-5].
1797 * CUR_SCHAR same but operate on a string instead of the context
1798 * COPY_BUF copy the current unicode char to the target buffer, increment
1799 * the index
1800 * GROW, SHRINK handling of input buffers
1801 */
1802
Daniel Veillardfdc91562002-07-01 21:52:03 +00001803#define RAW (*ctxt->input->cur)
1804#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001805#define NXT(val) ctxt->input->cur[(val)]
1806#define CUR_PTR ctxt->input->cur
1807
Daniel Veillarda07050d2003-10-19 14:46:32 +00001808#define CMP4( s, c1, c2, c3, c4 ) \
1809 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1810 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1811#define CMP5( s, c1, c2, c3, c4, c5 ) \
1812 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1813#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1814 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1815#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1816 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1817#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1818 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1819#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1820 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1821 ((unsigned char *) s)[ 8 ] == c9 )
1822#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1823 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1824 ((unsigned char *) s)[ 9 ] == c10 )
1825
Owen Taylor3473f882001-02-23 17:55:21 +00001826#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001827 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001828 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001829 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001830 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1831 xmlPopInput(ctxt); \
1832 } while (0)
1833
Daniel Veillard0b787f32004-03-26 17:29:53 +00001834#define SKIPL(val) do { \
1835 int skipl; \
1836 for(skipl=0; skipl<val; skipl++) { \
1837 if (*(ctxt->input->cur) == '\n') { \
1838 ctxt->input->line++; ctxt->input->col = 1; \
1839 } else ctxt->input->col++; \
1840 ctxt->nbChars++; \
1841 ctxt->input->cur++; \
1842 } \
1843 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1844 if ((*ctxt->input->cur == 0) && \
1845 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1846 xmlPopInput(ctxt); \
1847 } while (0)
1848
Daniel Veillarda880b122003-04-21 21:36:41 +00001849#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001850 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1851 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001852 xmlSHRINK (ctxt);
1853
1854static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1855 xmlParserInputShrink(ctxt->input);
1856 if ((*ctxt->input->cur == 0) &&
1857 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1858 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001859 }
Owen Taylor3473f882001-02-23 17:55:21 +00001860
Daniel Veillarda880b122003-04-21 21:36:41 +00001861#define GROW if ((ctxt->progressive == 0) && \
1862 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001863 xmlGROW (ctxt);
1864
1865static void xmlGROW (xmlParserCtxtPtr ctxt) {
1866 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01001867 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00001868 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1869 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001870}
Owen Taylor3473f882001-02-23 17:55:21 +00001871
1872#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1873
1874#define NEXT xmlNextChar(ctxt)
1875
Daniel Veillard21a0f912001-02-25 19:54:14 +00001876#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001877 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001878 ctxt->input->cur++; \
1879 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001880 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001881 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1882 }
1883
Owen Taylor3473f882001-02-23 17:55:21 +00001884#define NEXTL(l) do { \
1885 if (*(ctxt->input->cur) == '\n') { \
1886 ctxt->input->line++; ctxt->input->col = 1; \
1887 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001888 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001889 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001890 } while (0)
1891
1892#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1893#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1894
1895#define COPY_BUF(l,b,i,v) \
1896 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001897 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001898
1899/**
1900 * xmlSkipBlankChars:
1901 * @ctxt: the XML parser context
1902 *
1903 * skip all blanks character found at that point in the input streams.
1904 * It pops up finished entities in the process if allowable at that point.
1905 *
1906 * Returns the number of space chars skipped
1907 */
1908
1909int
1910xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001911 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001912
1913 /*
1914 * It's Okay to use CUR/NEXT here since all the blanks are on
1915 * the ASCII range.
1916 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001917 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1918 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001919 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001920 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001921 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001922 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001923 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001924 if (*cur == '\n') {
1925 ctxt->input->line++; ctxt->input->col = 1;
1926 }
1927 cur++;
1928 res++;
1929 if (*cur == 0) {
1930 ctxt->input->cur = cur;
1931 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1932 cur = ctxt->input->cur;
1933 }
1934 }
1935 ctxt->input->cur = cur;
1936 } else {
1937 int cur;
1938 do {
1939 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001940 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001941 NEXT;
1942 cur = CUR;
1943 res++;
1944 }
1945 while ((cur == 0) && (ctxt->inputNr > 1) &&
1946 (ctxt->instate != XML_PARSER_COMMENT)) {
1947 xmlPopInput(ctxt);
1948 cur = CUR;
1949 }
1950 /*
1951 * Need to handle support of entities branching here
1952 */
1953 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1954 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1955 }
Owen Taylor3473f882001-02-23 17:55:21 +00001956 return(res);
1957}
1958
1959/************************************************************************
1960 * *
1961 * Commodity functions to handle entities *
1962 * *
1963 ************************************************************************/
1964
1965/**
1966 * xmlPopInput:
1967 * @ctxt: an XML parser context
1968 *
1969 * xmlPopInput: the current input pointed by ctxt->input came to an end
1970 * pop it and return the next char.
1971 *
1972 * Returns the current xmlChar in the parser context
1973 */
1974xmlChar
1975xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001976 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 if (xmlParserDebugEntities)
1978 xmlGenericError(xmlGenericErrorContext,
1979 "Popping input %d\n", ctxt->inputNr);
1980 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001981 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001982 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1983 return(xmlPopInput(ctxt));
1984 return(CUR);
1985}
1986
1987/**
1988 * xmlPushInput:
1989 * @ctxt: an XML parser context
1990 * @input: an XML parser input fragment (entity, XML fragment ...).
1991 *
1992 * xmlPushInput: switch to a new input stream which is stacked on top
1993 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001994 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001995 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001996int
Owen Taylor3473f882001-02-23 17:55:21 +00001997xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001998 int ret;
1999 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002000
2001 if (xmlParserDebugEntities) {
2002 if ((ctxt->input != NULL) && (ctxt->input->filename))
2003 xmlGenericError(xmlGenericErrorContext,
2004 "%s(%d): ", ctxt->input->filename,
2005 ctxt->input->line);
2006 xmlGenericError(xmlGenericErrorContext,
2007 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2008 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002009 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002010 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002011 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002012}
2013
2014/**
2015 * xmlParseCharRef:
2016 * @ctxt: an XML parser context
2017 *
2018 * parse Reference declarations
2019 *
2020 * [66] CharRef ::= '&#' [0-9]+ ';' |
2021 * '&#x' [0-9a-fA-F]+ ';'
2022 *
2023 * [ WFC: Legal Character ]
2024 * Characters referred to using character references must match the
2025 * production for Char.
2026 *
2027 * Returns the value parsed (as an int), 0 in case of error
2028 */
2029int
2030xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002031 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002032 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002033 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002034
Owen Taylor3473f882001-02-23 17:55:21 +00002035 /*
2036 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2037 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002038 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002039 (NXT(2) == 'x')) {
2040 SKIP(3);
2041 GROW;
2042 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002043 if (count++ > 20) {
2044 count = 0;
2045 GROW;
2046 }
2047 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002048 val = val * 16 + (CUR - '0');
2049 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2050 val = val * 16 + (CUR - 'a') + 10;
2051 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2052 val = val * 16 + (CUR - 'A') + 10;
2053 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002054 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002055 val = 0;
2056 break;
2057 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002058 if (val > 0x10FFFF)
2059 outofrange = val;
2060
Owen Taylor3473f882001-02-23 17:55:21 +00002061 NEXT;
2062 count++;
2063 }
2064 if (RAW == ';') {
2065 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002066 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002067 ctxt->nbChars ++;
2068 ctxt->input->cur++;
2069 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002070 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002071 SKIP(2);
2072 GROW;
2073 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002074 if (count++ > 20) {
2075 count = 0;
2076 GROW;
2077 }
2078 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002079 val = val * 10 + (CUR - '0');
2080 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002081 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002082 val = 0;
2083 break;
2084 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002085 if (val > 0x10FFFF)
2086 outofrange = val;
2087
Owen Taylor3473f882001-02-23 17:55:21 +00002088 NEXT;
2089 count++;
2090 }
2091 if (RAW == ';') {
2092 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002093 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002094 ctxt->nbChars ++;
2095 ctxt->input->cur++;
2096 }
2097 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002098 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002099 }
2100
2101 /*
2102 * [ WFC: Legal Character ]
2103 * Characters referred to using character references must match the
2104 * production for Char.
2105 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002106 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002107 return(val);
2108 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002109 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2110 "xmlParseCharRef: invalid xmlChar value %d\n",
2111 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002112 }
2113 return(0);
2114}
2115
2116/**
2117 * xmlParseStringCharRef:
2118 * @ctxt: an XML parser context
2119 * @str: a pointer to an index in the string
2120 *
2121 * parse Reference declarations, variant parsing from a string rather
2122 * than an an input flow.
2123 *
2124 * [66] CharRef ::= '&#' [0-9]+ ';' |
2125 * '&#x' [0-9a-fA-F]+ ';'
2126 *
2127 * [ WFC: Legal Character ]
2128 * Characters referred to using character references must match the
2129 * production for Char.
2130 *
2131 * Returns the value parsed (as an int), 0 in case of error, str will be
2132 * updated to the current value of the index
2133 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002134static int
Owen Taylor3473f882001-02-23 17:55:21 +00002135xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2136 const xmlChar *ptr;
2137 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002138 unsigned int val = 0;
2139 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002140
2141 if ((str == NULL) || (*str == NULL)) return(0);
2142 ptr = *str;
2143 cur = *ptr;
2144 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2145 ptr += 3;
2146 cur = *ptr;
2147 while (cur != ';') { /* Non input consuming loop */
2148 if ((cur >= '0') && (cur <= '9'))
2149 val = val * 16 + (cur - '0');
2150 else if ((cur >= 'a') && (cur <= 'f'))
2151 val = val * 16 + (cur - 'a') + 10;
2152 else if ((cur >= 'A') && (cur <= 'F'))
2153 val = val * 16 + (cur - 'A') + 10;
2154 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002155 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002156 val = 0;
2157 break;
2158 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002159 if (val > 0x10FFFF)
2160 outofrange = val;
2161
Owen Taylor3473f882001-02-23 17:55:21 +00002162 ptr++;
2163 cur = *ptr;
2164 }
2165 if (cur == ';')
2166 ptr++;
2167 } else if ((cur == '&') && (ptr[1] == '#')){
2168 ptr += 2;
2169 cur = *ptr;
2170 while (cur != ';') { /* Non input consuming loops */
2171 if ((cur >= '0') && (cur <= '9'))
2172 val = val * 10 + (cur - '0');
2173 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002174 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002175 val = 0;
2176 break;
2177 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002178 if (val > 0x10FFFF)
2179 outofrange = val;
2180
Owen Taylor3473f882001-02-23 17:55:21 +00002181 ptr++;
2182 cur = *ptr;
2183 }
2184 if (cur == ';')
2185 ptr++;
2186 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002187 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002188 return(0);
2189 }
2190 *str = ptr;
2191
2192 /*
2193 * [ WFC: Legal Character ]
2194 * Characters referred to using character references must match the
2195 * production for Char.
2196 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002197 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002198 return(val);
2199 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002200 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2201 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2202 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002203 }
2204 return(0);
2205}
2206
2207/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002208 * xmlNewBlanksWrapperInputStream:
2209 * @ctxt: an XML parser context
2210 * @entity: an Entity pointer
2211 *
2212 * Create a new input stream for wrapping
2213 * blanks around a PEReference
2214 *
2215 * Returns the new input stream or NULL
2216 */
2217
2218static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2219
Daniel Veillardf4862f02002-09-10 11:13:43 +00002220static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002221xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2222 xmlParserInputPtr input;
2223 xmlChar *buffer;
2224 size_t length;
2225 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002226 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2227 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002228 return(NULL);
2229 }
2230 if (xmlParserDebugEntities)
2231 xmlGenericError(xmlGenericErrorContext,
2232 "new blanks wrapper for entity: %s\n", entity->name);
2233 input = xmlNewInputStream(ctxt);
2234 if (input == NULL) {
2235 return(NULL);
2236 }
2237 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002238 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002239 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002240 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002241 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002242 return(NULL);
2243 }
2244 buffer [0] = ' ';
2245 buffer [1] = '%';
2246 buffer [length-3] = ';';
2247 buffer [length-2] = ' ';
2248 buffer [length-1] = 0;
2249 memcpy(buffer + 2, entity->name, length - 5);
2250 input->free = deallocblankswrapper;
2251 input->base = buffer;
2252 input->cur = buffer;
2253 input->length = length;
2254 input->end = &buffer[length];
2255 return(input);
2256}
2257
2258/**
Owen Taylor3473f882001-02-23 17:55:21 +00002259 * xmlParserHandlePEReference:
2260 * @ctxt: the parser context
2261 *
2262 * [69] PEReference ::= '%' Name ';'
2263 *
2264 * [ WFC: No Recursion ]
2265 * A parsed entity must not contain a recursive
2266 * reference to itself, either directly or indirectly.
2267 *
2268 * [ WFC: Entity Declared ]
2269 * In a document without any DTD, a document with only an internal DTD
2270 * subset which contains no parameter entity references, or a document
2271 * with "standalone='yes'", ... ... The declaration of a parameter
2272 * entity must precede any reference to it...
2273 *
2274 * [ VC: Entity Declared ]
2275 * In a document with an external subset or external parameter entities
2276 * with "standalone='no'", ... ... The declaration of a parameter entity
2277 * must precede any reference to it...
2278 *
2279 * [ WFC: In DTD ]
2280 * Parameter-entity references may only appear in the DTD.
2281 * NOTE: misleading but this is handled.
2282 *
2283 * A PEReference may have been detected in the current input stream
2284 * the handling is done accordingly to
2285 * http://www.w3.org/TR/REC-xml#entproc
2286 * i.e.
2287 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002288 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002289 */
2290void
2291xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002292 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002293 xmlEntityPtr entity = NULL;
2294 xmlParserInputPtr input;
2295
Owen Taylor3473f882001-02-23 17:55:21 +00002296 if (RAW != '%') return;
2297 switch(ctxt->instate) {
2298 case XML_PARSER_CDATA_SECTION:
2299 return;
2300 case XML_PARSER_COMMENT:
2301 return;
2302 case XML_PARSER_START_TAG:
2303 return;
2304 case XML_PARSER_END_TAG:
2305 return;
2306 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002307 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002308 return;
2309 case XML_PARSER_PROLOG:
2310 case XML_PARSER_START:
2311 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002312 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002313 return;
2314 case XML_PARSER_ENTITY_DECL:
2315 case XML_PARSER_CONTENT:
2316 case XML_PARSER_ATTRIBUTE_VALUE:
2317 case XML_PARSER_PI:
2318 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002319 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002320 /* we just ignore it there */
2321 return;
2322 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002323 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002324 return;
2325 case XML_PARSER_ENTITY_VALUE:
2326 /*
2327 * NOTE: in the case of entity values, we don't do the
2328 * substitution here since we need the literal
2329 * entity value to be able to save the internal
2330 * subset of the document.
2331 * This will be handled by xmlStringDecodeEntities
2332 */
2333 return;
2334 case XML_PARSER_DTD:
2335 /*
2336 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2337 * In the internal DTD subset, parameter-entity references
2338 * can occur only where markup declarations can occur, not
2339 * within markup declarations.
2340 * In that case this is handled in xmlParseMarkupDecl
2341 */
2342 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2343 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002344 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002345 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002346 break;
2347 case XML_PARSER_IGNORE:
2348 return;
2349 }
2350
2351 NEXT;
2352 name = xmlParseName(ctxt);
2353 if (xmlParserDebugEntities)
2354 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002355 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002356 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002357 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002358 } else {
2359 if (RAW == ';') {
2360 NEXT;
2361 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2362 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2363 if (entity == NULL) {
2364
2365 /*
2366 * [ WFC: Entity Declared ]
2367 * In a document without any DTD, a document with only an
2368 * internal DTD subset which contains no parameter entity
2369 * references, or a document with "standalone='yes'", ...
2370 * ... The declaration of a parameter entity must precede
2371 * any reference to it...
2372 */
2373 if ((ctxt->standalone == 1) ||
2374 ((ctxt->hasExternalSubset == 0) &&
2375 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002376 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002377 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002378 } else {
2379 /*
2380 * [ VC: Entity Declared ]
2381 * In a document with an external subset or external
2382 * parameter entities with "standalone='no'", ...
2383 * ... The declaration of a parameter entity must precede
2384 * any reference to it...
2385 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002386 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2387 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2388 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002389 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002390 } else
2391 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2392 "PEReference: %%%s; not found\n",
2393 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002394 ctxt->valid = 0;
2395 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002396 } else if (ctxt->input->free != deallocblankswrapper) {
2397 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002398 if (xmlPushInput(ctxt, input) < 0)
2399 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002400 } else {
2401 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2402 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002403 xmlChar start[4];
2404 xmlCharEncoding enc;
2405
Owen Taylor3473f882001-02-23 17:55:21 +00002406 /*
2407 * handle the extra spaces added before and after
2408 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002409 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002410 */
2411 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002412 if (xmlPushInput(ctxt, input) < 0)
2413 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002414
2415 /*
2416 * Get the 4 first bytes and decode the charset
2417 * if enc != XML_CHAR_ENCODING_NONE
2418 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002419 * Note that, since we may have some non-UTF8
2420 * encoding (like UTF16, bug 135229), the 'length'
2421 * is not known, but we can calculate based upon
2422 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002423 */
2424 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002425 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002426 start[0] = RAW;
2427 start[1] = NXT(1);
2428 start[2] = NXT(2);
2429 start[3] = NXT(3);
2430 enc = xmlDetectCharEncoding(start, 4);
2431 if (enc != XML_CHAR_ENCODING_NONE) {
2432 xmlSwitchEncoding(ctxt, enc);
2433 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002434 }
2435
Owen Taylor3473f882001-02-23 17:55:21 +00002436 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002437 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2438 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002439 xmlParseTextDecl(ctxt);
2440 }
Owen Taylor3473f882001-02-23 17:55:21 +00002441 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002442 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2443 "PEReference: %s is not a parameter entity\n",
2444 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002445 }
2446 }
2447 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002448 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002449 }
Owen Taylor3473f882001-02-23 17:55:21 +00002450 }
2451}
2452
2453/*
2454 * Macro used to grow the current buffer.
2455 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002456#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002457 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002458 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002459 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002460 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002461 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002462 if (tmp == NULL) goto mem_error; \
2463 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002464}
2465
2466/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002467 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002468 * @ctxt: the parser context
2469 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002470 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002471 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2472 * @end: an end marker xmlChar, 0 if none
2473 * @end2: an end marker xmlChar, 0 if none
2474 * @end3: an end marker xmlChar, 0 if none
2475 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002476 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002477 *
2478 * [67] Reference ::= EntityRef | CharRef
2479 *
2480 * [69] PEReference ::= '%' Name ';'
2481 *
2482 * Returns A newly allocated string with the substitution done. The caller
2483 * must deallocate it !
2484 */
2485xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002486xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2487 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002488 xmlChar *buffer = NULL;
2489 int buffer_size = 0;
2490
2491 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002492 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002493 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002494 xmlEntityPtr ent;
2495 int c,l;
2496 int nbchars = 0;
2497
Daniel Veillarda82b1822004-11-08 16:24:57 +00002498 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002499 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002500 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002501
Daniel Veillard0161e632008-08-28 15:36:32 +00002502 if (((ctxt->depth > 40) &&
2503 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2504 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002505 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002506 return(NULL);
2507 }
2508
2509 /*
2510 * allocate a translation buffer.
2511 */
2512 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002513 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002514 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002515
2516 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002517 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002518 * we are operating on already parsed values.
2519 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002520 if (str < last)
2521 c = CUR_SCHAR(str, l);
2522 else
2523 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002524 while ((c != 0) && (c != end) && /* non input consuming loop */
2525 (c != end2) && (c != end3)) {
2526
2527 if (c == 0) break;
2528 if ((c == '&') && (str[1] == '#')) {
2529 int val = xmlParseStringCharRef(ctxt, &str);
2530 if (val != 0) {
2531 COPY_BUF(0,buffer,nbchars,val);
2532 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002533 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002534 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002535 }
Owen Taylor3473f882001-02-23 17:55:21 +00002536 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2537 if (xmlParserDebugEntities)
2538 xmlGenericError(xmlGenericErrorContext,
2539 "String decoding Entity Reference: %.30s\n",
2540 str);
2541 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002542 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2543 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002544 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002545 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002546 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002547 if ((ent != NULL) &&
2548 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2549 if (ent->content != NULL) {
2550 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002551 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002552 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002553 }
Owen Taylor3473f882001-02-23 17:55:21 +00002554 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002555 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2556 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002557 }
2558 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002559 ctxt->depth++;
2560 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2561 0, 0, 0);
2562 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002563
Owen Taylor3473f882001-02-23 17:55:21 +00002564 if (rep != NULL) {
2565 current = rep;
2566 while (*current != 0) { /* non input consuming loop */
2567 buffer[nbchars++] = *current++;
2568 if (nbchars >
2569 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002570 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2571 goto int_error;
2572 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002573 }
2574 }
2575 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002576 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002577 }
2578 } else if (ent != NULL) {
2579 int i = xmlStrlen(ent->name);
2580 const xmlChar *cur = ent->name;
2581
2582 buffer[nbchars++] = '&';
2583 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002584 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002585 }
2586 for (;i > 0;i--)
2587 buffer[nbchars++] = *cur++;
2588 buffer[nbchars++] = ';';
2589 }
2590 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2591 if (xmlParserDebugEntities)
2592 xmlGenericError(xmlGenericErrorContext,
2593 "String decoding PE Reference: %.30s\n", str);
2594 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002595 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2596 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002597 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002598 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002599 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002600 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002601 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002602 }
Owen Taylor3473f882001-02-23 17:55:21 +00002603 ctxt->depth++;
2604 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2605 0, 0, 0);
2606 ctxt->depth--;
2607 if (rep != NULL) {
2608 current = rep;
2609 while (*current != 0) { /* non input consuming loop */
2610 buffer[nbchars++] = *current++;
2611 if (nbchars >
2612 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002613 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2614 goto int_error;
2615 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002616 }
2617 }
2618 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002619 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002620 }
2621 }
2622 } else {
2623 COPY_BUF(l,buffer,nbchars,c);
2624 str += l;
2625 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002626 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002627 }
2628 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002629 if (str < last)
2630 c = CUR_SCHAR(str, l);
2631 else
2632 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002633 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002634 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002635 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002636
2637mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002638 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002639int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002640 if (rep != NULL)
2641 xmlFree(rep);
2642 if (buffer != NULL)
2643 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002644 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002645}
2646
Daniel Veillarde57ec792003-09-10 10:50:59 +00002647/**
2648 * xmlStringDecodeEntities:
2649 * @ctxt: the parser context
2650 * @str: the input string
2651 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2652 * @end: an end marker xmlChar, 0 if none
2653 * @end2: an end marker xmlChar, 0 if none
2654 * @end3: an end marker xmlChar, 0 if none
2655 *
2656 * Takes a entity string content and process to do the adequate substitutions.
2657 *
2658 * [67] Reference ::= EntityRef | CharRef
2659 *
2660 * [69] PEReference ::= '%' Name ';'
2661 *
2662 * Returns A newly allocated string with the substitution done. The caller
2663 * must deallocate it !
2664 */
2665xmlChar *
2666xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2667 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002668 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002669 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2670 end, end2, end3));
2671}
Owen Taylor3473f882001-02-23 17:55:21 +00002672
2673/************************************************************************
2674 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002675 * Commodity functions, cleanup needed ? *
2676 * *
2677 ************************************************************************/
2678
2679/**
2680 * areBlanks:
2681 * @ctxt: an XML parser context
2682 * @str: a xmlChar *
2683 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002684 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002685 *
2686 * Is this a sequence of blank chars that one can ignore ?
2687 *
2688 * Returns 1 if ignorable 0 otherwise.
2689 */
2690
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002691static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2692 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002693 int i, ret;
2694 xmlNodePtr lastChild;
2695
Daniel Veillard05c13a22001-09-09 08:38:09 +00002696 /*
2697 * Don't spend time trying to differentiate them, the same callback is
2698 * used !
2699 */
2700 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002701 return(0);
2702
Owen Taylor3473f882001-02-23 17:55:21 +00002703 /*
2704 * Check for xml:space value.
2705 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002706 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2707 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002708 return(0);
2709
2710 /*
2711 * Check that the string is made of blanks
2712 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002713 if (blank_chars == 0) {
2714 for (i = 0;i < len;i++)
2715 if (!(IS_BLANK_CH(str[i]))) return(0);
2716 }
Owen Taylor3473f882001-02-23 17:55:21 +00002717
2718 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002719 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002720 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002721 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002722 if (ctxt->myDoc != NULL) {
2723 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2724 if (ret == 0) return(1);
2725 if (ret == 1) return(0);
2726 }
2727
2728 /*
2729 * Otherwise, heuristic :-\
2730 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002731 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002732 if ((ctxt->node->children == NULL) &&
2733 (RAW == '<') && (NXT(1) == '/')) return(0);
2734
2735 lastChild = xmlGetLastChild(ctxt->node);
2736 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002737 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2738 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002739 } else if (xmlNodeIsText(lastChild))
2740 return(0);
2741 else if ((ctxt->node->children != NULL) &&
2742 (xmlNodeIsText(ctxt->node->children)))
2743 return(0);
2744 return(1);
2745}
2746
Owen Taylor3473f882001-02-23 17:55:21 +00002747/************************************************************************
2748 * *
2749 * Extra stuff for namespace support *
2750 * Relates to http://www.w3.org/TR/WD-xml-names *
2751 * *
2752 ************************************************************************/
2753
2754/**
2755 * xmlSplitQName:
2756 * @ctxt: an XML parser context
2757 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002758 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002759 *
2760 * parse an UTF8 encoded XML qualified name string
2761 *
2762 * [NS 5] QName ::= (Prefix ':')? LocalPart
2763 *
2764 * [NS 6] Prefix ::= NCName
2765 *
2766 * [NS 7] LocalPart ::= NCName
2767 *
2768 * Returns the local part, and prefix is updated
2769 * to get the Prefix if any.
2770 */
2771
2772xmlChar *
2773xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2774 xmlChar buf[XML_MAX_NAMELEN + 5];
2775 xmlChar *buffer = NULL;
2776 int len = 0;
2777 int max = XML_MAX_NAMELEN;
2778 xmlChar *ret = NULL;
2779 const xmlChar *cur = name;
2780 int c;
2781
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002782 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002783 *prefix = NULL;
2784
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002785 if (cur == NULL) return(NULL);
2786
Owen Taylor3473f882001-02-23 17:55:21 +00002787#ifndef XML_XML_NAMESPACE
2788 /* xml: prefix is not really a namespace */
2789 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2790 (cur[2] == 'l') && (cur[3] == ':'))
2791 return(xmlStrdup(name));
2792#endif
2793
Daniel Veillard597bc482003-07-24 16:08:28 +00002794 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002795 if (cur[0] == ':')
2796 return(xmlStrdup(name));
2797
2798 c = *cur++;
2799 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2800 buf[len++] = c;
2801 c = *cur++;
2802 }
2803 if (len >= max) {
2804 /*
2805 * Okay someone managed to make a huge name, so he's ready to pay
2806 * for the processing speed.
2807 */
2808 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002809
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002810 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002811 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002812 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002813 return(NULL);
2814 }
2815 memcpy(buffer, buf, len);
2816 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2817 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002818 xmlChar *tmp;
2819
Owen Taylor3473f882001-02-23 17:55:21 +00002820 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002821 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002822 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002823 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002824 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002825 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002826 return(NULL);
2827 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002828 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002829 }
2830 buffer[len++] = c;
2831 c = *cur++;
2832 }
2833 buffer[len] = 0;
2834 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002835
Daniel Veillard597bc482003-07-24 16:08:28 +00002836 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002837 if (buffer != NULL)
2838 xmlFree(buffer);
2839 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002840 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002841 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002842
Owen Taylor3473f882001-02-23 17:55:21 +00002843 if (buffer == NULL)
2844 ret = xmlStrndup(buf, len);
2845 else {
2846 ret = buffer;
2847 buffer = NULL;
2848 max = XML_MAX_NAMELEN;
2849 }
2850
2851
2852 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002853 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002854 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002855 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002856 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002857 }
Owen Taylor3473f882001-02-23 17:55:21 +00002858 len = 0;
2859
Daniel Veillardbb284f42002-10-16 18:02:47 +00002860 /*
2861 * Check that the first character is proper to start
2862 * a new name
2863 */
2864 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2865 ((c >= 0x41) && (c <= 0x5A)) ||
2866 (c == '_') || (c == ':'))) {
2867 int l;
2868 int first = CUR_SCHAR(cur, l);
2869
2870 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002871 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002872 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002873 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002874 }
2875 }
2876 cur++;
2877
Owen Taylor3473f882001-02-23 17:55:21 +00002878 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2879 buf[len++] = c;
2880 c = *cur++;
2881 }
2882 if (len >= max) {
2883 /*
2884 * Okay someone managed to make a huge name, so he's ready to pay
2885 * for the processing speed.
2886 */
2887 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002888
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002889 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002890 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002891 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002892 return(NULL);
2893 }
2894 memcpy(buffer, buf, len);
2895 while (c != 0) { /* tested bigname2.xml */
2896 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002897 xmlChar *tmp;
2898
Owen Taylor3473f882001-02-23 17:55:21 +00002899 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002900 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002901 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002902 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002903 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002904 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002905 return(NULL);
2906 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002907 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002908 }
2909 buffer[len++] = c;
2910 c = *cur++;
2911 }
2912 buffer[len] = 0;
2913 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002914
Owen Taylor3473f882001-02-23 17:55:21 +00002915 if (buffer == NULL)
2916 ret = xmlStrndup(buf, len);
2917 else {
2918 ret = buffer;
2919 }
2920 }
2921
2922 return(ret);
2923}
2924
2925/************************************************************************
2926 * *
2927 * The parser itself *
2928 * Relates to http://www.w3.org/TR/REC-xml *
2929 * *
2930 ************************************************************************/
2931
Daniel Veillard34e3f642008-07-29 09:02:27 +00002932/************************************************************************
2933 * *
2934 * Routines to parse Name, NCName and NmToken *
2935 * *
2936 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00002937#ifdef DEBUG
2938static unsigned long nbParseName = 0;
2939static unsigned long nbParseNmToken = 0;
2940static unsigned long nbParseNCName = 0;
2941static unsigned long nbParseNCNameComplex = 0;
2942static unsigned long nbParseNameComplex = 0;
2943static unsigned long nbParseStringName = 0;
2944#endif
2945
Daniel Veillard34e3f642008-07-29 09:02:27 +00002946/*
2947 * The two following functions are related to the change of accepted
2948 * characters for Name and NmToken in the Revision 5 of XML-1.0
2949 * They correspond to the modified production [4] and the new production [4a]
2950 * changes in that revision. Also note that the macros used for the
2951 * productions Letter, Digit, CombiningChar and Extender are not needed
2952 * anymore.
2953 * We still keep compatibility to pre-revision5 parsing semantic if the
2954 * new XML_PARSE_OLD10 option is given to the parser.
2955 */
2956static int
2957xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2958 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2959 /*
2960 * Use the new checks of production [4] [4a] amd [5] of the
2961 * Update 5 of XML-1.0
2962 */
2963 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2964 (((c >= 'a') && (c <= 'z')) ||
2965 ((c >= 'A') && (c <= 'Z')) ||
2966 (c == '_') || (c == ':') ||
2967 ((c >= 0xC0) && (c <= 0xD6)) ||
2968 ((c >= 0xD8) && (c <= 0xF6)) ||
2969 ((c >= 0xF8) && (c <= 0x2FF)) ||
2970 ((c >= 0x370) && (c <= 0x37D)) ||
2971 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2972 ((c >= 0x200C) && (c <= 0x200D)) ||
2973 ((c >= 0x2070) && (c <= 0x218F)) ||
2974 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2975 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2976 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2977 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2978 ((c >= 0x10000) && (c <= 0xEFFFF))))
2979 return(1);
2980 } else {
2981 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2982 return(1);
2983 }
2984 return(0);
2985}
2986
2987static int
2988xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2989 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2990 /*
2991 * Use the new checks of production [4] [4a] amd [5] of the
2992 * Update 5 of XML-1.0
2993 */
2994 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2995 (((c >= 'a') && (c <= 'z')) ||
2996 ((c >= 'A') && (c <= 'Z')) ||
2997 ((c >= '0') && (c <= '9')) || /* !start */
2998 (c == '_') || (c == ':') ||
2999 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3000 ((c >= 0xC0) && (c <= 0xD6)) ||
3001 ((c >= 0xD8) && (c <= 0xF6)) ||
3002 ((c >= 0xF8) && (c <= 0x2FF)) ||
3003 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3004 ((c >= 0x370) && (c <= 0x37D)) ||
3005 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3006 ((c >= 0x200C) && (c <= 0x200D)) ||
3007 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3008 ((c >= 0x2070) && (c <= 0x218F)) ||
3009 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3010 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3011 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3012 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3013 ((c >= 0x10000) && (c <= 0xEFFFF))))
3014 return(1);
3015 } else {
3016 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3017 (c == '.') || (c == '-') ||
3018 (c == '_') || (c == ':') ||
3019 (IS_COMBINING(c)) ||
3020 (IS_EXTENDER(c)))
3021 return(1);
3022 }
3023 return(0);
3024}
3025
Daniel Veillarde57ec792003-09-10 10:50:59 +00003026static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003027 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003028
Daniel Veillard34e3f642008-07-29 09:02:27 +00003029static const xmlChar *
3030xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3031 int len = 0, l;
3032 int c;
3033 int count = 0;
3034
Daniel Veillardc6561462009-03-25 10:22:31 +00003035#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003036 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003037#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003038
3039 /*
3040 * Handler for more complex cases
3041 */
3042 GROW;
3043 c = CUR_CHAR(l);
3044 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3045 /*
3046 * Use the new checks of production [4] [4a] amd [5] of the
3047 * Update 5 of XML-1.0
3048 */
3049 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3050 (!(((c >= 'a') && (c <= 'z')) ||
3051 ((c >= 'A') && (c <= 'Z')) ||
3052 (c == '_') || (c == ':') ||
3053 ((c >= 0xC0) && (c <= 0xD6)) ||
3054 ((c >= 0xD8) && (c <= 0xF6)) ||
3055 ((c >= 0xF8) && (c <= 0x2FF)) ||
3056 ((c >= 0x370) && (c <= 0x37D)) ||
3057 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3058 ((c >= 0x200C) && (c <= 0x200D)) ||
3059 ((c >= 0x2070) && (c <= 0x218F)) ||
3060 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3061 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3062 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3063 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3064 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3065 return(NULL);
3066 }
3067 len += l;
3068 NEXTL(l);
3069 c = CUR_CHAR(l);
3070 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3071 (((c >= 'a') && (c <= 'z')) ||
3072 ((c >= 'A') && (c <= 'Z')) ||
3073 ((c >= '0') && (c <= '9')) || /* !start */
3074 (c == '_') || (c == ':') ||
3075 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3076 ((c >= 0xC0) && (c <= 0xD6)) ||
3077 ((c >= 0xD8) && (c <= 0xF6)) ||
3078 ((c >= 0xF8) && (c <= 0x2FF)) ||
3079 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3080 ((c >= 0x370) && (c <= 0x37D)) ||
3081 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3082 ((c >= 0x200C) && (c <= 0x200D)) ||
3083 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3084 ((c >= 0x2070) && (c <= 0x218F)) ||
3085 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3086 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3087 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3088 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3089 ((c >= 0x10000) && (c <= 0xEFFFF))
3090 )) {
3091 if (count++ > 100) {
3092 count = 0;
3093 GROW;
3094 }
3095 len += l;
3096 NEXTL(l);
3097 c = CUR_CHAR(l);
3098 }
3099 } else {
3100 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3101 (!IS_LETTER(c) && (c != '_') &&
3102 (c != ':'))) {
3103 return(NULL);
3104 }
3105 len += l;
3106 NEXTL(l);
3107 c = CUR_CHAR(l);
3108
3109 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3110 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3111 (c == '.') || (c == '-') ||
3112 (c == '_') || (c == ':') ||
3113 (IS_COMBINING(c)) ||
3114 (IS_EXTENDER(c)))) {
3115 if (count++ > 100) {
3116 count = 0;
3117 GROW;
3118 }
3119 len += l;
3120 NEXTL(l);
3121 c = CUR_CHAR(l);
3122 }
3123 }
3124 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3125 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3126 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3127}
3128
Owen Taylor3473f882001-02-23 17:55:21 +00003129/**
3130 * xmlParseName:
3131 * @ctxt: an XML parser context
3132 *
3133 * parse an XML name.
3134 *
3135 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3136 * CombiningChar | Extender
3137 *
3138 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3139 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003140 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003141 *
3142 * Returns the Name parsed or NULL
3143 */
3144
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003145const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003146xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003147 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003148 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003149 int count = 0;
3150
3151 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003152
Daniel Veillardc6561462009-03-25 10:22:31 +00003153#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003154 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003155#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003156
Daniel Veillard48b2f892001-02-25 16:11:03 +00003157 /*
3158 * Accelerator for simple ASCII names
3159 */
3160 in = ctxt->input->cur;
3161 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3162 ((*in >= 0x41) && (*in <= 0x5A)) ||
3163 (*in == '_') || (*in == ':')) {
3164 in++;
3165 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3166 ((*in >= 0x41) && (*in <= 0x5A)) ||
3167 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003168 (*in == '_') || (*in == '-') ||
3169 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003170 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003171 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003172 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003173 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003174 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003175 ctxt->nbChars += count;
3176 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003177 if (ret == NULL)
3178 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003179 return(ret);
3180 }
3181 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003182 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003183 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003184}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003185
Daniel Veillard34e3f642008-07-29 09:02:27 +00003186static const xmlChar *
3187xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3188 int len = 0, l;
3189 int c;
3190 int count = 0;
3191
Daniel Veillardc6561462009-03-25 10:22:31 +00003192#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003193 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003194#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003195
3196 /*
3197 * Handler for more complex cases
3198 */
3199 GROW;
3200 c = CUR_CHAR(l);
3201 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3202 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3203 return(NULL);
3204 }
3205
3206 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3207 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3208 if (count++ > 100) {
3209 count = 0;
3210 GROW;
3211 }
3212 len += l;
3213 NEXTL(l);
3214 c = CUR_CHAR(l);
3215 }
3216 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3217}
3218
3219/**
3220 * xmlParseNCName:
3221 * @ctxt: an XML parser context
3222 * @len: lenght of the string parsed
3223 *
3224 * parse an XML name.
3225 *
3226 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3227 * CombiningChar | Extender
3228 *
3229 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3230 *
3231 * Returns the Name parsed or NULL
3232 */
3233
3234static const xmlChar *
3235xmlParseNCName(xmlParserCtxtPtr ctxt) {
3236 const xmlChar *in;
3237 const xmlChar *ret;
3238 int count = 0;
3239
Daniel Veillardc6561462009-03-25 10:22:31 +00003240#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003241 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003242#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003243
3244 /*
3245 * Accelerator for simple ASCII names
3246 */
3247 in = ctxt->input->cur;
3248 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3249 ((*in >= 0x41) && (*in <= 0x5A)) ||
3250 (*in == '_')) {
3251 in++;
3252 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3253 ((*in >= 0x41) && (*in <= 0x5A)) ||
3254 ((*in >= 0x30) && (*in <= 0x39)) ||
3255 (*in == '_') || (*in == '-') ||
3256 (*in == '.'))
3257 in++;
3258 if ((*in > 0) && (*in < 0x80)) {
3259 count = in - ctxt->input->cur;
3260 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3261 ctxt->input->cur = in;
3262 ctxt->nbChars += count;
3263 ctxt->input->col += count;
3264 if (ret == NULL) {
3265 xmlErrMemory(ctxt, NULL);
3266 }
3267 return(ret);
3268 }
3269 }
3270 return(xmlParseNCNameComplex(ctxt));
3271}
3272
Daniel Veillard46de64e2002-05-29 08:21:33 +00003273/**
3274 * xmlParseNameAndCompare:
3275 * @ctxt: an XML parser context
3276 *
3277 * parse an XML name and compares for match
3278 * (specialized for endtag parsing)
3279 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003280 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3281 * and the name for mismatch
3282 */
3283
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003284static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003285xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003286 register const xmlChar *cmp = other;
3287 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003288 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003289
3290 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003291
Daniel Veillard46de64e2002-05-29 08:21:33 +00003292 in = ctxt->input->cur;
3293 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003294 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003295 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003296 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003297 }
William M. Brack76e95df2003-10-18 16:20:14 +00003298 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003299 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003300 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003301 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003302 }
3303 /* failure (or end of input buffer), check with full function */
3304 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003305 /* strings coming from the dictionnary direct compare possible */
3306 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003307 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003308 }
3309 return ret;
3310}
3311
Owen Taylor3473f882001-02-23 17:55:21 +00003312/**
3313 * xmlParseStringName:
3314 * @ctxt: an XML parser context
3315 * @str: a pointer to the string pointer (IN/OUT)
3316 *
3317 * parse an XML name.
3318 *
3319 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3320 * CombiningChar | Extender
3321 *
3322 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3323 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003324 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003325 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003326 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003327 * is updated to the current location in the string.
3328 */
3329
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003330static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003331xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3332 xmlChar buf[XML_MAX_NAMELEN + 5];
3333 const xmlChar *cur = *str;
3334 int len = 0, l;
3335 int c;
3336
Daniel Veillardc6561462009-03-25 10:22:31 +00003337#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003338 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003339#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003340
Owen Taylor3473f882001-02-23 17:55:21 +00003341 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003342 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003343 return(NULL);
3344 }
3345
Daniel Veillard34e3f642008-07-29 09:02:27 +00003346 COPY_BUF(l,buf,len,c);
3347 cur += l;
3348 c = CUR_SCHAR(cur, l);
3349 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003350 COPY_BUF(l,buf,len,c);
3351 cur += l;
3352 c = CUR_SCHAR(cur, l);
3353 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3354 /*
3355 * Okay someone managed to make a huge name, so he's ready to pay
3356 * for the processing speed.
3357 */
3358 xmlChar *buffer;
3359 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003360
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003361 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003362 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003363 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003364 return(NULL);
3365 }
3366 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003367 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003368 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003369 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003370 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003371 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003372 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003373 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003374 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003375 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003376 return(NULL);
3377 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003378 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 }
3380 COPY_BUF(l,buffer,len,c);
3381 cur += l;
3382 c = CUR_SCHAR(cur, l);
3383 }
3384 buffer[len] = 0;
3385 *str = cur;
3386 return(buffer);
3387 }
3388 }
3389 *str = cur;
3390 return(xmlStrndup(buf, len));
3391}
3392
3393/**
3394 * xmlParseNmtoken:
3395 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003396 *
Owen Taylor3473f882001-02-23 17:55:21 +00003397 * parse an XML Nmtoken.
3398 *
3399 * [7] Nmtoken ::= (NameChar)+
3400 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003401 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003402 *
3403 * Returns the Nmtoken parsed or NULL
3404 */
3405
3406xmlChar *
3407xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3408 xmlChar buf[XML_MAX_NAMELEN + 5];
3409 int len = 0, l;
3410 int c;
3411 int count = 0;
3412
Daniel Veillardc6561462009-03-25 10:22:31 +00003413#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003414 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003415#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003416
Owen Taylor3473f882001-02-23 17:55:21 +00003417 GROW;
3418 c = CUR_CHAR(l);
3419
Daniel Veillard34e3f642008-07-29 09:02:27 +00003420 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003421 if (count++ > 100) {
3422 count = 0;
3423 GROW;
3424 }
3425 COPY_BUF(l,buf,len,c);
3426 NEXTL(l);
3427 c = CUR_CHAR(l);
3428 if (len >= XML_MAX_NAMELEN) {
3429 /*
3430 * Okay someone managed to make a huge token, so he's ready to pay
3431 * for the processing speed.
3432 */
3433 xmlChar *buffer;
3434 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003435
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003436 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003437 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003439 return(NULL);
3440 }
3441 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003442 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003443 if (count++ > 100) {
3444 count = 0;
3445 GROW;
3446 }
3447 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003448 xmlChar *tmp;
3449
Owen Taylor3473f882001-02-23 17:55:21 +00003450 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003451 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003452 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003453 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003454 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003455 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003456 return(NULL);
3457 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003458 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003459 }
3460 COPY_BUF(l,buffer,len,c);
3461 NEXTL(l);
3462 c = CUR_CHAR(l);
3463 }
3464 buffer[len] = 0;
3465 return(buffer);
3466 }
3467 }
3468 if (len == 0)
3469 return(NULL);
3470 return(xmlStrndup(buf, len));
3471}
3472
3473/**
3474 * xmlParseEntityValue:
3475 * @ctxt: an XML parser context
3476 * @orig: if non-NULL store a copy of the original entity value
3477 *
3478 * parse a value for ENTITY declarations
3479 *
3480 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3481 * "'" ([^%&'] | PEReference | Reference)* "'"
3482 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003483 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003484 */
3485
3486xmlChar *
3487xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3488 xmlChar *buf = NULL;
3489 int len = 0;
3490 int size = XML_PARSER_BUFFER_SIZE;
3491 int c, l;
3492 xmlChar stop;
3493 xmlChar *ret = NULL;
3494 const xmlChar *cur = NULL;
3495 xmlParserInputPtr input;
3496
3497 if (RAW == '"') stop = '"';
3498 else if (RAW == '\'') stop = '\'';
3499 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003500 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003501 return(NULL);
3502 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003503 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003504 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003505 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003506 return(NULL);
3507 }
3508
3509 /*
3510 * The content of the entity definition is copied in a buffer.
3511 */
3512
3513 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3514 input = ctxt->input;
3515 GROW;
3516 NEXT;
3517 c = CUR_CHAR(l);
3518 /*
3519 * NOTE: 4.4.5 Included in Literal
3520 * When a parameter entity reference appears in a literal entity
3521 * value, ... a single or double quote character in the replacement
3522 * text is always treated as a normal data character and will not
3523 * terminate the literal.
3524 * In practice it means we stop the loop only when back at parsing
3525 * the initial entity and the quote is found
3526 */
William M. Brack871611b2003-10-18 04:53:14 +00003527 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003528 (ctxt->input != input))) {
3529 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003530 xmlChar *tmp;
3531
Owen Taylor3473f882001-02-23 17:55:21 +00003532 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003533 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3534 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003535 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003536 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003537 return(NULL);
3538 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003539 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003540 }
3541 COPY_BUF(l,buf,len,c);
3542 NEXTL(l);
3543 /*
3544 * Pop-up of finished entities.
3545 */
3546 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3547 xmlPopInput(ctxt);
3548
3549 GROW;
3550 c = CUR_CHAR(l);
3551 if (c == 0) {
3552 GROW;
3553 c = CUR_CHAR(l);
3554 }
3555 }
3556 buf[len] = 0;
3557
3558 /*
3559 * Raise problem w.r.t. '&' and '%' being used in non-entities
3560 * reference constructs. Note Charref will be handled in
3561 * xmlStringDecodeEntities()
3562 */
3563 cur = buf;
3564 while (*cur != 0) { /* non input consuming */
3565 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3566 xmlChar *name;
3567 xmlChar tmp = *cur;
3568
3569 cur++;
3570 name = xmlParseStringName(ctxt, &cur);
3571 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003572 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003573 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003574 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003575 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003576 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3577 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003578 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003579 }
3580 if (name != NULL)
3581 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003582 if (*cur == 0)
3583 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003584 }
3585 cur++;
3586 }
3587
3588 /*
3589 * Then PEReference entities are substituted.
3590 */
3591 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003592 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003593 xmlFree(buf);
3594 } else {
3595 NEXT;
3596 /*
3597 * NOTE: 4.4.7 Bypassed
3598 * When a general entity reference appears in the EntityValue in
3599 * an entity declaration, it is bypassed and left as is.
3600 * so XML_SUBSTITUTE_REF is not set here.
3601 */
3602 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3603 0, 0, 0);
3604 if (orig != NULL)
3605 *orig = buf;
3606 else
3607 xmlFree(buf);
3608 }
3609
3610 return(ret);
3611}
3612
3613/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003614 * xmlParseAttValueComplex:
3615 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003616 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003617 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003618 *
3619 * parse a value for an attribute, this is the fallback function
3620 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003621 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003622 *
3623 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3624 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003625static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003626xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003627 xmlChar limit = 0;
3628 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003629 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003630 int len = 0;
3631 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003632 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003633 xmlChar *current = NULL;
3634 xmlEntityPtr ent;
3635
Owen Taylor3473f882001-02-23 17:55:21 +00003636 if (NXT(0) == '"') {
3637 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3638 limit = '"';
3639 NEXT;
3640 } else if (NXT(0) == '\'') {
3641 limit = '\'';
3642 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3643 NEXT;
3644 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003645 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003646 return(NULL);
3647 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003648
Owen Taylor3473f882001-02-23 17:55:21 +00003649 /*
3650 * allocate a translation buffer.
3651 */
3652 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003653 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003654 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003655
3656 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003657 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003658 */
3659 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003660 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003661 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003662 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003663 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003664 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003665 if (NXT(1) == '#') {
3666 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003667
Owen Taylor3473f882001-02-23 17:55:21 +00003668 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003669 if (ctxt->replaceEntities) {
3670 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003671 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003672 }
3673 buf[len++] = '&';
3674 } else {
3675 /*
3676 * The reparsing will be done in xmlStringGetNodeList()
3677 * called by the attribute() function in SAX.c
3678 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003679 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003680 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003681 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003682 buf[len++] = '&';
3683 buf[len++] = '#';
3684 buf[len++] = '3';
3685 buf[len++] = '8';
3686 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003687 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003688 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003689 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003690 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003691 }
Owen Taylor3473f882001-02-23 17:55:21 +00003692 len += xmlCopyChar(0, &buf[len], val);
3693 }
3694 } else {
3695 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003696 ctxt->nbentities++;
3697 if (ent != NULL)
3698 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003699 if ((ent != NULL) &&
3700 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3701 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003702 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003703 }
3704 if ((ctxt->replaceEntities == 0) &&
3705 (ent->content[0] == '&')) {
3706 buf[len++] = '&';
3707 buf[len++] = '#';
3708 buf[len++] = '3';
3709 buf[len++] = '8';
3710 buf[len++] = ';';
3711 } else {
3712 buf[len++] = ent->content[0];
3713 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003714 } else if ((ent != NULL) &&
3715 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003716 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3717 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003718 XML_SUBSTITUTE_REF,
3719 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003720 if (rep != NULL) {
3721 current = rep;
3722 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003723 if ((*current == 0xD) || (*current == 0xA) ||
3724 (*current == 0x9)) {
3725 buf[len++] = 0x20;
3726 current++;
3727 } else
3728 buf[len++] = *current++;
Owen Taylor3473f882001-02-23 17:55:21 +00003729 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003730 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003731 }
3732 }
3733 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003734 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003735 }
3736 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003737 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003738 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003739 }
Owen Taylor3473f882001-02-23 17:55:21 +00003740 if (ent->content != NULL)
3741 buf[len++] = ent->content[0];
3742 }
3743 } else if (ent != NULL) {
3744 int i = xmlStrlen(ent->name);
3745 const xmlChar *cur = ent->name;
3746
3747 /*
3748 * This may look absurd but is needed to detect
3749 * entities problems
3750 */
3751 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3752 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003753 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003754 XML_SUBSTITUTE_REF, 0, 0, 0);
3755 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003756 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003757 rep = NULL;
3758 }
Owen Taylor3473f882001-02-23 17:55:21 +00003759 }
3760
3761 /*
3762 * Just output the reference
3763 */
3764 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003765 while (len > buf_size - i - 10) {
3766 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003767 }
3768 for (;i > 0;i--)
3769 buf[len++] = *cur++;
3770 buf[len++] = ';';
3771 }
3772 }
3773 } else {
3774 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003775 if ((len != 0) || (!normalize)) {
3776 if ((!normalize) || (!in_space)) {
3777 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003778 while (len > buf_size - 10) {
3779 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003780 }
3781 }
3782 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003783 }
3784 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003785 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003786 COPY_BUF(l,buf,len,c);
3787 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003788 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003789 }
3790 }
3791 NEXTL(l);
3792 }
3793 GROW;
3794 c = CUR_CHAR(l);
3795 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003796 if ((in_space) && (normalize)) {
3797 while (buf[len - 1] == 0x20) len--;
3798 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003799 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003800 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003801 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003802 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003803 if ((c != 0) && (!IS_CHAR(c))) {
3804 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3805 "invalid character in attribute value\n");
3806 } else {
3807 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3808 "AttValue: ' expected\n");
3809 }
Owen Taylor3473f882001-02-23 17:55:21 +00003810 } else
3811 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003812 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003813 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003814
3815mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003816 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003817 if (buf != NULL)
3818 xmlFree(buf);
3819 if (rep != NULL)
3820 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003821 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003822}
3823
3824/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003825 * xmlParseAttValue:
3826 * @ctxt: an XML parser context
3827 *
3828 * parse a value for an attribute
3829 * Note: the parser won't do substitution of entities here, this
3830 * will be handled later in xmlStringGetNodeList
3831 *
3832 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3833 * "'" ([^<&'] | Reference)* "'"
3834 *
3835 * 3.3.3 Attribute-Value Normalization:
3836 * Before the value of an attribute is passed to the application or
3837 * checked for validity, the XML processor must normalize it as follows:
3838 * - a character reference is processed by appending the referenced
3839 * character to the attribute value
3840 * - an entity reference is processed by recursively processing the
3841 * replacement text of the entity
3842 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3843 * appending #x20 to the normalized value, except that only a single
3844 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3845 * parsed entity or the literal entity value of an internal parsed entity
3846 * - other characters are processed by appending them to the normalized value
3847 * If the declared value is not CDATA, then the XML processor must further
3848 * process the normalized attribute value by discarding any leading and
3849 * trailing space (#x20) characters, and by replacing sequences of space
3850 * (#x20) characters by a single space (#x20) character.
3851 * All attributes for which no declaration has been read should be treated
3852 * by a non-validating parser as if declared CDATA.
3853 *
3854 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3855 */
3856
3857
3858xmlChar *
3859xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003860 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003861 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003862}
3863
3864/**
Owen Taylor3473f882001-02-23 17:55:21 +00003865 * xmlParseSystemLiteral:
3866 * @ctxt: an XML parser context
3867 *
3868 * parse an XML Literal
3869 *
3870 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3871 *
3872 * Returns the SystemLiteral parsed or NULL
3873 */
3874
3875xmlChar *
3876xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3877 xmlChar *buf = NULL;
3878 int len = 0;
3879 int size = XML_PARSER_BUFFER_SIZE;
3880 int cur, l;
3881 xmlChar stop;
3882 int state = ctxt->instate;
3883 int count = 0;
3884
3885 SHRINK;
3886 if (RAW == '"') {
3887 NEXT;
3888 stop = '"';
3889 } else if (RAW == '\'') {
3890 NEXT;
3891 stop = '\'';
3892 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003893 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003894 return(NULL);
3895 }
3896
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003897 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003898 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003899 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003900 return(NULL);
3901 }
3902 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3903 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003904 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003905 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003906 xmlChar *tmp;
3907
Owen Taylor3473f882001-02-23 17:55:21 +00003908 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003909 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3910 if (tmp == NULL) {
3911 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003912 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003913 ctxt->instate = (xmlParserInputState) state;
3914 return(NULL);
3915 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003916 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003917 }
3918 count++;
3919 if (count > 50) {
3920 GROW;
3921 count = 0;
3922 }
3923 COPY_BUF(l,buf,len,cur);
3924 NEXTL(l);
3925 cur = CUR_CHAR(l);
3926 if (cur == 0) {
3927 GROW;
3928 SHRINK;
3929 cur = CUR_CHAR(l);
3930 }
3931 }
3932 buf[len] = 0;
3933 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003934 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003935 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003936 } else {
3937 NEXT;
3938 }
3939 return(buf);
3940}
3941
3942/**
3943 * xmlParsePubidLiteral:
3944 * @ctxt: an XML parser context
3945 *
3946 * parse an XML public literal
3947 *
3948 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3949 *
3950 * Returns the PubidLiteral parsed or NULL.
3951 */
3952
3953xmlChar *
3954xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3955 xmlChar *buf = NULL;
3956 int len = 0;
3957 int size = XML_PARSER_BUFFER_SIZE;
3958 xmlChar cur;
3959 xmlChar stop;
3960 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003961 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003962
3963 SHRINK;
3964 if (RAW == '"') {
3965 NEXT;
3966 stop = '"';
3967 } else if (RAW == '\'') {
3968 NEXT;
3969 stop = '\'';
3970 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003971 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003972 return(NULL);
3973 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003974 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003975 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003976 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003977 return(NULL);
3978 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003979 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003980 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003981 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003982 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003983 xmlChar *tmp;
3984
Owen Taylor3473f882001-02-23 17:55:21 +00003985 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003986 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3987 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003988 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003989 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003990 return(NULL);
3991 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003992 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003993 }
3994 buf[len++] = cur;
3995 count++;
3996 if (count > 50) {
3997 GROW;
3998 count = 0;
3999 }
4000 NEXT;
4001 cur = CUR;
4002 if (cur == 0) {
4003 GROW;
4004 SHRINK;
4005 cur = CUR;
4006 }
4007 }
4008 buf[len] = 0;
4009 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004010 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004011 } else {
4012 NEXT;
4013 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004014 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004015 return(buf);
4016}
4017
Daniel Veillard8ed10722009-08-20 19:17:36 +02004018static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004019
4020/*
4021 * used for the test in the inner loop of the char data testing
4022 */
4023static const unsigned char test_char_data[256] = {
4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4025 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4028 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4029 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4030 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4031 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4032 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4033 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4034 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4035 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4036 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4037 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4038 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4039 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4040 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4041 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4042 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4043 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4044 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4045 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4046 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4047 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4048 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4049 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4050 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4051 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4052 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4053 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4054 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4055 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4056};
4057
Owen Taylor3473f882001-02-23 17:55:21 +00004058/**
4059 * xmlParseCharData:
4060 * @ctxt: an XML parser context
4061 * @cdata: int indicating whether we are within a CDATA section
4062 *
4063 * parse a CharData section.
4064 * if we are within a CDATA section ']]>' marks an end of section.
4065 *
4066 * The right angle bracket (>) may be represented using the string "&gt;",
4067 * and must, for compatibility, be escaped using "&gt;" or a character
4068 * reference when it appears in the string "]]>" in content, when that
4069 * string is not marking the end of a CDATA section.
4070 *
4071 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4072 */
4073
4074void
4075xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004076 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004077 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004078 int line = ctxt->input->line;
4079 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004080 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004081
4082 SHRINK;
4083 GROW;
4084 /*
4085 * Accelerated common case where input don't need to be
4086 * modified before passing it to the handler.
4087 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004088 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004089 in = ctxt->input->cur;
4090 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004091get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004092 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004093 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004094 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004095 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004096 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004097 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004098 goto get_more_space;
4099 }
4100 if (*in == '<') {
4101 nbchar = in - ctxt->input->cur;
4102 if (nbchar > 0) {
4103 const xmlChar *tmp = ctxt->input->cur;
4104 ctxt->input->cur = in;
4105
Daniel Veillard34099b42004-11-04 17:34:35 +00004106 if ((ctxt->sax != NULL) &&
4107 (ctxt->sax->ignorableWhitespace !=
4108 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004109 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004110 if (ctxt->sax->ignorableWhitespace != NULL)
4111 ctxt->sax->ignorableWhitespace(ctxt->userData,
4112 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004113 } else {
4114 if (ctxt->sax->characters != NULL)
4115 ctxt->sax->characters(ctxt->userData,
4116 tmp, nbchar);
4117 if (*ctxt->space == -1)
4118 *ctxt->space = -2;
4119 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004120 } else if ((ctxt->sax != NULL) &&
4121 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004122 ctxt->sax->characters(ctxt->userData,
4123 tmp, nbchar);
4124 }
4125 }
4126 return;
4127 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004128
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004129get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004130 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004131 while (test_char_data[*in]) {
4132 in++;
4133 ccol++;
4134 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004135 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004136 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004137 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004138 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004139 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004140 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004141 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004142 }
4143 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004144 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004145 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004146 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004147 return;
4148 }
4149 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004150 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004151 goto get_more;
4152 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004153 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004154 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004155 if ((ctxt->sax != NULL) &&
4156 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004157 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004158 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004159 const xmlChar *tmp = ctxt->input->cur;
4160 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004161
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004162 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004163 if (ctxt->sax->ignorableWhitespace != NULL)
4164 ctxt->sax->ignorableWhitespace(ctxt->userData,
4165 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004166 } else {
4167 if (ctxt->sax->characters != NULL)
4168 ctxt->sax->characters(ctxt->userData,
4169 tmp, nbchar);
4170 if (*ctxt->space == -1)
4171 *ctxt->space = -2;
4172 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004173 line = ctxt->input->line;
4174 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004175 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004176 if (ctxt->sax->characters != NULL)
4177 ctxt->sax->characters(ctxt->userData,
4178 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004179 line = ctxt->input->line;
4180 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004181 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004182 /* something really bad happened in the SAX callback */
4183 if (ctxt->instate != XML_PARSER_CONTENT)
4184 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004185 }
4186 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004187 if (*in == 0xD) {
4188 in++;
4189 if (*in == 0xA) {
4190 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004191 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004192 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004193 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004194 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004195 in--;
4196 }
4197 if (*in == '<') {
4198 return;
4199 }
4200 if (*in == '&') {
4201 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004202 }
4203 SHRINK;
4204 GROW;
4205 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004206 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004207 nbchar = 0;
4208 }
Daniel Veillard50582112001-03-26 22:52:16 +00004209 ctxt->input->line = line;
4210 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004211 xmlParseCharDataComplex(ctxt, cdata);
4212}
4213
Daniel Veillard01c13b52002-12-10 15:19:08 +00004214/**
4215 * xmlParseCharDataComplex:
4216 * @ctxt: an XML parser context
4217 * @cdata: int indicating whether we are within a CDATA section
4218 *
4219 * parse a CharData section.this is the fallback function
4220 * of xmlParseCharData() when the parsing requires handling
4221 * of non-ASCII characters.
4222 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004223static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004224xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004225 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4226 int nbchar = 0;
4227 int cur, l;
4228 int count = 0;
4229
4230 SHRINK;
4231 GROW;
4232 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004233 while ((cur != '<') && /* checked */
4234 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004235 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004236 if ((cur == ']') && (NXT(1) == ']') &&
4237 (NXT(2) == '>')) {
4238 if (cdata) break;
4239 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004240 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004241 }
4242 }
4243 COPY_BUF(l,buf,nbchar,cur);
4244 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004245 buf[nbchar] = 0;
4246
Owen Taylor3473f882001-02-23 17:55:21 +00004247 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004248 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004249 */
4250 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004251 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004252 if (ctxt->sax->ignorableWhitespace != NULL)
4253 ctxt->sax->ignorableWhitespace(ctxt->userData,
4254 buf, nbchar);
4255 } else {
4256 if (ctxt->sax->characters != NULL)
4257 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004258 if ((ctxt->sax->characters !=
4259 ctxt->sax->ignorableWhitespace) &&
4260 (*ctxt->space == -1))
4261 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004262 }
4263 }
4264 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004265 /* something really bad happened in the SAX callback */
4266 if (ctxt->instate != XML_PARSER_CONTENT)
4267 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
4269 count++;
4270 if (count > 50) {
4271 GROW;
4272 count = 0;
4273 }
4274 NEXTL(l);
4275 cur = CUR_CHAR(l);
4276 }
4277 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004278 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004279 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004280 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004281 */
4282 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004283 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004284 if (ctxt->sax->ignorableWhitespace != NULL)
4285 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4286 } else {
4287 if (ctxt->sax->characters != NULL)
4288 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004289 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4290 (*ctxt->space == -1))
4291 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004292 }
4293 }
4294 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004295 if ((cur != 0) && (!IS_CHAR(cur))) {
4296 /* Generate the error and skip the offending character */
4297 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4298 "PCDATA invalid Char value %d\n",
4299 cur);
4300 NEXTL(l);
4301 }
Owen Taylor3473f882001-02-23 17:55:21 +00004302}
4303
4304/**
4305 * xmlParseExternalID:
4306 * @ctxt: an XML parser context
4307 * @publicID: a xmlChar** receiving PubidLiteral
4308 * @strict: indicate whether we should restrict parsing to only
4309 * production [75], see NOTE below
4310 *
4311 * Parse an External ID or a Public ID
4312 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004313 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004314 * 'PUBLIC' S PubidLiteral S SystemLiteral
4315 *
4316 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4317 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4318 *
4319 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4320 *
4321 * Returns the function returns SystemLiteral and in the second
4322 * case publicID receives PubidLiteral, is strict is off
4323 * it is possible to return NULL and have publicID set.
4324 */
4325
4326xmlChar *
4327xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4328 xmlChar *URI = NULL;
4329
4330 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004331
4332 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004333 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004334 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004335 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004336 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4337 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004338 }
4339 SKIP_BLANKS;
4340 URI = xmlParseSystemLiteral(ctxt);
4341 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004342 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004343 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004344 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004345 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004346 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004347 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004348 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004349 }
4350 SKIP_BLANKS;
4351 *publicID = xmlParsePubidLiteral(ctxt);
4352 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004353 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004354 }
4355 if (strict) {
4356 /*
4357 * We don't handle [83] so "S SystemLiteral" is required.
4358 */
William M. Brack76e95df2003-10-18 16:20:14 +00004359 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004360 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004361 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004362 }
4363 } else {
4364 /*
4365 * We handle [83] so we return immediately, if
4366 * "S SystemLiteral" is not detected. From a purely parsing
4367 * point of view that's a nice mess.
4368 */
4369 const xmlChar *ptr;
4370 GROW;
4371
4372 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004373 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004374
William M. Brack76e95df2003-10-18 16:20:14 +00004375 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004376 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4377 }
4378 SKIP_BLANKS;
4379 URI = xmlParseSystemLiteral(ctxt);
4380 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004381 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004382 }
4383 }
4384 return(URI);
4385}
4386
4387/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004388 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004389 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004390 * @buf: the already parsed part of the buffer
4391 * @len: number of bytes filles in the buffer
4392 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004393 *
4394 * Skip an XML (SGML) comment <!-- .... -->
4395 * The spec says that "For compatibility, the string "--" (double-hyphen)
4396 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004397 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004398 *
4399 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4400 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004401static void
4402xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004403 int q, ql;
4404 int r, rl;
4405 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004406 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004407 int inputid;
4408
4409 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004410
Owen Taylor3473f882001-02-23 17:55:21 +00004411 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004412 len = 0;
4413 size = XML_PARSER_BUFFER_SIZE;
4414 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4415 if (buf == NULL) {
4416 xmlErrMemory(ctxt, NULL);
4417 return;
4418 }
Owen Taylor3473f882001-02-23 17:55:21 +00004419 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004420 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004421 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004422 if (q == 0)
4423 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004424 if (!IS_CHAR(q)) {
4425 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4426 "xmlParseComment: invalid xmlChar value %d\n",
4427 q);
4428 xmlFree (buf);
4429 return;
4430 }
Owen Taylor3473f882001-02-23 17:55:21 +00004431 NEXTL(ql);
4432 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004433 if (r == 0)
4434 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004435 if (!IS_CHAR(r)) {
4436 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4437 "xmlParseComment: invalid xmlChar value %d\n",
4438 q);
4439 xmlFree (buf);
4440 return;
4441 }
Owen Taylor3473f882001-02-23 17:55:21 +00004442 NEXTL(rl);
4443 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004444 if (cur == 0)
4445 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004446 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004447 ((cur != '>') ||
4448 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004449 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004450 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004451 }
4452 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004453 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004454 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004455 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4456 if (new_buf == NULL) {
4457 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004458 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004459 return;
4460 }
William M. Bracka3215c72004-07-31 16:24:01 +00004461 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004462 }
4463 COPY_BUF(ql,buf,len,q);
4464 q = r;
4465 ql = rl;
4466 r = cur;
4467 rl = l;
4468
4469 count++;
4470 if (count > 50) {
4471 GROW;
4472 count = 0;
4473 }
4474 NEXTL(l);
4475 cur = CUR_CHAR(l);
4476 if (cur == 0) {
4477 SHRINK;
4478 GROW;
4479 cur = CUR_CHAR(l);
4480 }
4481 }
4482 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004483 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004484 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004485 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004486 } else if (!IS_CHAR(cur)) {
4487 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4488 "xmlParseComment: invalid xmlChar value %d\n",
4489 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004490 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004491 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004492 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4493 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004494 }
4495 NEXT;
4496 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4497 (!ctxt->disableSAX))
4498 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004499 }
Daniel Veillardda629342007-08-01 07:49:06 +00004500 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004501 return;
4502not_terminated:
4503 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4504 "Comment not terminated\n", NULL);
4505 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004506 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004507}
Daniel Veillardda629342007-08-01 07:49:06 +00004508
Daniel Veillard4c778d82005-01-23 17:37:44 +00004509/**
4510 * xmlParseComment:
4511 * @ctxt: an XML parser context
4512 *
4513 * Skip an XML (SGML) comment <!-- .... -->
4514 * The spec says that "For compatibility, the string "--" (double-hyphen)
4515 * must not occur within comments. "
4516 *
4517 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4518 */
4519void
4520xmlParseComment(xmlParserCtxtPtr ctxt) {
4521 xmlChar *buf = NULL;
4522 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004523 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004524 xmlParserInputState state;
4525 const xmlChar *in;
4526 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004527 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004528
4529 /*
4530 * Check that there is a comment right here.
4531 */
4532 if ((RAW != '<') || (NXT(1) != '!') ||
4533 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004534 state = ctxt->instate;
4535 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004536 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004537 SKIP(4);
4538 SHRINK;
4539 GROW;
4540
4541 /*
4542 * Accelerated common case where input don't need to be
4543 * modified before passing it to the handler.
4544 */
4545 in = ctxt->input->cur;
4546 do {
4547 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004548 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004549 ctxt->input->line++; ctxt->input->col = 1;
4550 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004551 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004552 }
4553get_more:
4554 ccol = ctxt->input->col;
4555 while (((*in > '-') && (*in <= 0x7F)) ||
4556 ((*in >= 0x20) && (*in < '-')) ||
4557 (*in == 0x09)) {
4558 in++;
4559 ccol++;
4560 }
4561 ctxt->input->col = ccol;
4562 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004563 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004564 ctxt->input->line++; ctxt->input->col = 1;
4565 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004566 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004567 goto get_more;
4568 }
4569 nbchar = in - ctxt->input->cur;
4570 /*
4571 * save current set of data
4572 */
4573 if (nbchar > 0) {
4574 if ((ctxt->sax != NULL) &&
4575 (ctxt->sax->comment != NULL)) {
4576 if (buf == NULL) {
4577 if ((*in == '-') && (in[1] == '-'))
4578 size = nbchar + 1;
4579 else
4580 size = XML_PARSER_BUFFER_SIZE + nbchar;
4581 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4582 if (buf == NULL) {
4583 xmlErrMemory(ctxt, NULL);
4584 ctxt->instate = state;
4585 return;
4586 }
4587 len = 0;
4588 } else if (len + nbchar + 1 >= size) {
4589 xmlChar *new_buf;
4590 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4591 new_buf = (xmlChar *) xmlRealloc(buf,
4592 size * sizeof(xmlChar));
4593 if (new_buf == NULL) {
4594 xmlFree (buf);
4595 xmlErrMemory(ctxt, NULL);
4596 ctxt->instate = state;
4597 return;
4598 }
4599 buf = new_buf;
4600 }
4601 memcpy(&buf[len], ctxt->input->cur, nbchar);
4602 len += nbchar;
4603 buf[len] = 0;
4604 }
4605 }
4606 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004607 if (*in == 0xA) {
4608 in++;
4609 ctxt->input->line++; ctxt->input->col = 1;
4610 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004611 if (*in == 0xD) {
4612 in++;
4613 if (*in == 0xA) {
4614 ctxt->input->cur = in;
4615 in++;
4616 ctxt->input->line++; ctxt->input->col = 1;
4617 continue; /* while */
4618 }
4619 in--;
4620 }
4621 SHRINK;
4622 GROW;
4623 in = ctxt->input->cur;
4624 if (*in == '-') {
4625 if (in[1] == '-') {
4626 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004627 if (ctxt->input->id != inputid) {
4628 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4629 "comment doesn't start and stop in the same entity\n");
4630 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004631 SKIP(3);
4632 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4633 (!ctxt->disableSAX)) {
4634 if (buf != NULL)
4635 ctxt->sax->comment(ctxt->userData, buf);
4636 else
4637 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4638 }
4639 if (buf != NULL)
4640 xmlFree(buf);
4641 ctxt->instate = state;
4642 return;
4643 }
4644 if (buf != NULL)
4645 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4646 "Comment not terminated \n<!--%.50s\n",
4647 buf);
4648 else
4649 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4650 "Comment not terminated \n", NULL);
4651 in++;
4652 ctxt->input->col++;
4653 }
4654 in++;
4655 ctxt->input->col++;
4656 goto get_more;
4657 }
4658 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4659 xmlParseCommentComplex(ctxt, buf, len, size);
4660 ctxt->instate = state;
4661 return;
4662}
4663
Owen Taylor3473f882001-02-23 17:55:21 +00004664
4665/**
4666 * xmlParsePITarget:
4667 * @ctxt: an XML parser context
4668 *
4669 * parse the name of a PI
4670 *
4671 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4672 *
4673 * Returns the PITarget name or NULL
4674 */
4675
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004676const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004677xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004678 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004679
4680 name = xmlParseName(ctxt);
4681 if ((name != NULL) &&
4682 ((name[0] == 'x') || (name[0] == 'X')) &&
4683 ((name[1] == 'm') || (name[1] == 'M')) &&
4684 ((name[2] == 'l') || (name[2] == 'L'))) {
4685 int i;
4686 if ((name[0] == 'x') && (name[1] == 'm') &&
4687 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004688 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004689 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004690 return(name);
4691 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004692 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004693 return(name);
4694 }
4695 for (i = 0;;i++) {
4696 if (xmlW3CPIs[i] == NULL) break;
4697 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4698 return(name);
4699 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004700 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4701 "xmlParsePITarget: invalid name prefix 'xml'\n",
4702 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004703 }
Daniel Veillard37334572008-07-31 08:20:02 +00004704 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4705 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4706 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4707 }
Owen Taylor3473f882001-02-23 17:55:21 +00004708 return(name);
4709}
4710
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004711#ifdef LIBXML_CATALOG_ENABLED
4712/**
4713 * xmlParseCatalogPI:
4714 * @ctxt: an XML parser context
4715 * @catalog: the PI value string
4716 *
4717 * parse an XML Catalog Processing Instruction.
4718 *
4719 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4720 *
4721 * Occurs only if allowed by the user and if happening in the Misc
4722 * part of the document before any doctype informations
4723 * This will add the given catalog to the parsing context in order
4724 * to be used if there is a resolution need further down in the document
4725 */
4726
4727static void
4728xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4729 xmlChar *URL = NULL;
4730 const xmlChar *tmp, *base;
4731 xmlChar marker;
4732
4733 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004734 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004735 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4736 goto error;
4737 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004738 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004739 if (*tmp != '=') {
4740 return;
4741 }
4742 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004743 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004744 marker = *tmp;
4745 if ((marker != '\'') && (marker != '"'))
4746 goto error;
4747 tmp++;
4748 base = tmp;
4749 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4750 if (*tmp == 0)
4751 goto error;
4752 URL = xmlStrndup(base, tmp - base);
4753 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004754 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004755 if (*tmp != 0)
4756 goto error;
4757
4758 if (URL != NULL) {
4759 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4760 xmlFree(URL);
4761 }
4762 return;
4763
4764error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004765 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4766 "Catalog PI syntax error: %s\n",
4767 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004768 if (URL != NULL)
4769 xmlFree(URL);
4770}
4771#endif
4772
Owen Taylor3473f882001-02-23 17:55:21 +00004773/**
4774 * xmlParsePI:
4775 * @ctxt: an XML parser context
4776 *
4777 * parse an XML Processing Instruction.
4778 *
4779 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4780 *
4781 * The processing is transfered to SAX once parsed.
4782 */
4783
4784void
4785xmlParsePI(xmlParserCtxtPtr ctxt) {
4786 xmlChar *buf = NULL;
4787 int len = 0;
4788 int size = XML_PARSER_BUFFER_SIZE;
4789 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004790 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004791 xmlParserInputState state;
4792 int count = 0;
4793
4794 if ((RAW == '<') && (NXT(1) == '?')) {
4795 xmlParserInputPtr input = ctxt->input;
4796 state = ctxt->instate;
4797 ctxt->instate = XML_PARSER_PI;
4798 /*
4799 * this is a Processing Instruction.
4800 */
4801 SKIP(2);
4802 SHRINK;
4803
4804 /*
4805 * Parse the target name and check for special support like
4806 * namespace.
4807 */
4808 target = xmlParsePITarget(ctxt);
4809 if (target != NULL) {
4810 if ((RAW == '?') && (NXT(1) == '>')) {
4811 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004812 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4813 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004814 }
4815 SKIP(2);
4816
4817 /*
4818 * SAX: PI detected.
4819 */
4820 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4821 (ctxt->sax->processingInstruction != NULL))
4822 ctxt->sax->processingInstruction(ctxt->userData,
4823 target, NULL);
4824 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004825 return;
4826 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004827 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004828 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004829 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004830 ctxt->instate = state;
4831 return;
4832 }
4833 cur = CUR;
4834 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004835 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4836 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004837 }
4838 SKIP_BLANKS;
4839 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004840 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004841 ((cur != '?') || (NXT(1) != '>'))) {
4842 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004843 xmlChar *tmp;
4844
Owen Taylor3473f882001-02-23 17:55:21 +00004845 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004846 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4847 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004848 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004849 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004850 ctxt->instate = state;
4851 return;
4852 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004853 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004854 }
4855 count++;
4856 if (count > 50) {
4857 GROW;
4858 count = 0;
4859 }
4860 COPY_BUF(l,buf,len,cur);
4861 NEXTL(l);
4862 cur = CUR_CHAR(l);
4863 if (cur == 0) {
4864 SHRINK;
4865 GROW;
4866 cur = CUR_CHAR(l);
4867 }
4868 }
4869 buf[len] = 0;
4870 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004871 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4872 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004873 } else {
4874 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004875 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4876 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004877 }
4878 SKIP(2);
4879
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004880#ifdef LIBXML_CATALOG_ENABLED
4881 if (((state == XML_PARSER_MISC) ||
4882 (state == XML_PARSER_START)) &&
4883 (xmlStrEqual(target, XML_CATALOG_PI))) {
4884 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4885 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4886 (allow == XML_CATA_ALLOW_ALL))
4887 xmlParseCatalogPI(ctxt, buf);
4888 }
4889#endif
4890
4891
Owen Taylor3473f882001-02-23 17:55:21 +00004892 /*
4893 * SAX: PI detected.
4894 */
4895 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4896 (ctxt->sax->processingInstruction != NULL))
4897 ctxt->sax->processingInstruction(ctxt->userData,
4898 target, buf);
4899 }
4900 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004901 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004902 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004903 }
4904 ctxt->instate = state;
4905 }
4906}
4907
4908/**
4909 * xmlParseNotationDecl:
4910 * @ctxt: an XML parser context
4911 *
4912 * parse a notation declaration
4913 *
4914 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4915 *
4916 * Hence there is actually 3 choices:
4917 * 'PUBLIC' S PubidLiteral
4918 * 'PUBLIC' S PubidLiteral S SystemLiteral
4919 * and 'SYSTEM' S SystemLiteral
4920 *
4921 * See the NOTE on xmlParseExternalID().
4922 */
4923
4924void
4925xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004926 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004927 xmlChar *Pubid;
4928 xmlChar *Systemid;
4929
Daniel Veillarda07050d2003-10-19 14:46:32 +00004930 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004931 xmlParserInputPtr input = ctxt->input;
4932 SHRINK;
4933 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004934 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4936 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004937 return;
4938 }
4939 SKIP_BLANKS;
4940
Daniel Veillard76d66f42001-05-16 21:05:17 +00004941 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004942 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004943 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004944 return;
4945 }
William M. Brack76e95df2003-10-18 16:20:14 +00004946 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004948 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004949 return;
4950 }
Daniel Veillard37334572008-07-31 08:20:02 +00004951 if (xmlStrchr(name, ':') != NULL) {
4952 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4953 "colon are forbidden from notation names '%s'\n",
4954 name, NULL, NULL);
4955 }
Owen Taylor3473f882001-02-23 17:55:21 +00004956 SKIP_BLANKS;
4957
4958 /*
4959 * Parse the IDs.
4960 */
4961 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4962 SKIP_BLANKS;
4963
4964 if (RAW == '>') {
4965 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004966 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4967 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004968 }
4969 NEXT;
4970 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4971 (ctxt->sax->notationDecl != NULL))
4972 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4973 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004974 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004975 }
Owen Taylor3473f882001-02-23 17:55:21 +00004976 if (Systemid != NULL) xmlFree(Systemid);
4977 if (Pubid != NULL) xmlFree(Pubid);
4978 }
4979}
4980
4981/**
4982 * xmlParseEntityDecl:
4983 * @ctxt: an XML parser context
4984 *
4985 * parse <!ENTITY declarations
4986 *
4987 * [70] EntityDecl ::= GEDecl | PEDecl
4988 *
4989 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4990 *
4991 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4992 *
4993 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4994 *
4995 * [74] PEDef ::= EntityValue | ExternalID
4996 *
4997 * [76] NDataDecl ::= S 'NDATA' S Name
4998 *
4999 * [ VC: Notation Declared ]
5000 * The Name must match the declared name of a notation.
5001 */
5002
5003void
5004xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005005 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005006 xmlChar *value = NULL;
5007 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005008 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005009 int isParameter = 0;
5010 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005011 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005012
Daniel Veillard4c778d82005-01-23 17:37:44 +00005013 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005014 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005015 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005016 SHRINK;
5017 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005018 skipped = SKIP_BLANKS;
5019 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5021 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005022 }
Owen Taylor3473f882001-02-23 17:55:21 +00005023
5024 if (RAW == '%') {
5025 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005026 skipped = SKIP_BLANKS;
5027 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005028 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5029 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005030 }
Owen Taylor3473f882001-02-23 17:55:21 +00005031 isParameter = 1;
5032 }
5033
Daniel Veillard76d66f42001-05-16 21:05:17 +00005034 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005035 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005036 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5037 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005038 return;
5039 }
Daniel Veillard37334572008-07-31 08:20:02 +00005040 if (xmlStrchr(name, ':') != NULL) {
5041 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5042 "colon are forbidden from entities names '%s'\n",
5043 name, NULL, NULL);
5044 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005045 skipped = SKIP_BLANKS;
5046 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005047 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5048 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005049 }
Owen Taylor3473f882001-02-23 17:55:21 +00005050
Daniel Veillardf5582f12002-06-11 10:08:16 +00005051 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005052 /*
5053 * handle the various case of definitions...
5054 */
5055 if (isParameter) {
5056 if ((RAW == '"') || (RAW == '\'')) {
5057 value = xmlParseEntityValue(ctxt, &orig);
5058 if (value) {
5059 if ((ctxt->sax != NULL) &&
5060 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5061 ctxt->sax->entityDecl(ctxt->userData, name,
5062 XML_INTERNAL_PARAMETER_ENTITY,
5063 NULL, NULL, value);
5064 }
5065 } else {
5066 URI = xmlParseExternalID(ctxt, &literal, 1);
5067 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005068 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005069 }
5070 if (URI) {
5071 xmlURIPtr uri;
5072
5073 uri = xmlParseURI((const char *) URI);
5074 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005075 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5076 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005077 /*
5078 * This really ought to be a well formedness error
5079 * but the XML Core WG decided otherwise c.f. issue
5080 * E26 of the XML erratas.
5081 */
Owen Taylor3473f882001-02-23 17:55:21 +00005082 } else {
5083 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005084 /*
5085 * Okay this is foolish to block those but not
5086 * invalid URIs.
5087 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005088 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005089 } else {
5090 if ((ctxt->sax != NULL) &&
5091 (!ctxt->disableSAX) &&
5092 (ctxt->sax->entityDecl != NULL))
5093 ctxt->sax->entityDecl(ctxt->userData, name,
5094 XML_EXTERNAL_PARAMETER_ENTITY,
5095 literal, URI, NULL);
5096 }
5097 xmlFreeURI(uri);
5098 }
5099 }
5100 }
5101 } else {
5102 if ((RAW == '"') || (RAW == '\'')) {
5103 value = xmlParseEntityValue(ctxt, &orig);
5104 if ((ctxt->sax != NULL) &&
5105 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5106 ctxt->sax->entityDecl(ctxt->userData, name,
5107 XML_INTERNAL_GENERAL_ENTITY,
5108 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005109 /*
5110 * For expat compatibility in SAX mode.
5111 */
5112 if ((ctxt->myDoc == NULL) ||
5113 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5114 if (ctxt->myDoc == NULL) {
5115 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005116 if (ctxt->myDoc == NULL) {
5117 xmlErrMemory(ctxt, "New Doc failed");
5118 return;
5119 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005120 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005121 }
5122 if (ctxt->myDoc->intSubset == NULL)
5123 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5124 BAD_CAST "fake", NULL, NULL);
5125
Daniel Veillard1af9a412003-08-20 22:54:39 +00005126 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5127 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005128 }
Owen Taylor3473f882001-02-23 17:55:21 +00005129 } else {
5130 URI = xmlParseExternalID(ctxt, &literal, 1);
5131 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005132 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 }
5134 if (URI) {
5135 xmlURIPtr uri;
5136
5137 uri = xmlParseURI((const char *)URI);
5138 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005139 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5140 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005141 /*
5142 * This really ought to be a well formedness error
5143 * but the XML Core WG decided otherwise c.f. issue
5144 * E26 of the XML erratas.
5145 */
Owen Taylor3473f882001-02-23 17:55:21 +00005146 } else {
5147 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005148 /*
5149 * Okay this is foolish to block those but not
5150 * invalid URIs.
5151 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005152 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005153 }
5154 xmlFreeURI(uri);
5155 }
5156 }
William M. Brack76e95df2003-10-18 16:20:14 +00005157 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005158 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5159 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005160 }
5161 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005162 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005163 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005164 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005165 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5166 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005167 }
5168 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005169 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005170 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5171 (ctxt->sax->unparsedEntityDecl != NULL))
5172 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5173 literal, URI, ndata);
5174 } else {
5175 if ((ctxt->sax != NULL) &&
5176 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5177 ctxt->sax->entityDecl(ctxt->userData, name,
5178 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5179 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005180 /*
5181 * For expat compatibility in SAX mode.
5182 * assuming the entity repalcement was asked for
5183 */
5184 if ((ctxt->replaceEntities != 0) &&
5185 ((ctxt->myDoc == NULL) ||
5186 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5187 if (ctxt->myDoc == NULL) {
5188 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005189 if (ctxt->myDoc == NULL) {
5190 xmlErrMemory(ctxt, "New Doc failed");
5191 return;
5192 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005193 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005194 }
5195
5196 if (ctxt->myDoc->intSubset == NULL)
5197 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5198 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005199 xmlSAX2EntityDecl(ctxt, name,
5200 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5201 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005202 }
Owen Taylor3473f882001-02-23 17:55:21 +00005203 }
5204 }
5205 }
5206 SKIP_BLANKS;
5207 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005208 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005209 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005210 } else {
5211 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005212 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5213 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005214 }
5215 NEXT;
5216 }
5217 if (orig != NULL) {
5218 /*
5219 * Ugly mechanism to save the raw entity value.
5220 */
5221 xmlEntityPtr cur = NULL;
5222
5223 if (isParameter) {
5224 if ((ctxt->sax != NULL) &&
5225 (ctxt->sax->getParameterEntity != NULL))
5226 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5227 } else {
5228 if ((ctxt->sax != NULL) &&
5229 (ctxt->sax->getEntity != NULL))
5230 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005231 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005232 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005233 }
Owen Taylor3473f882001-02-23 17:55:21 +00005234 }
5235 if (cur != NULL) {
5236 if (cur->orig != NULL)
5237 xmlFree(orig);
5238 else
5239 cur->orig = orig;
5240 } else
5241 xmlFree(orig);
5242 }
Owen Taylor3473f882001-02-23 17:55:21 +00005243 if (value != NULL) xmlFree(value);
5244 if (URI != NULL) xmlFree(URI);
5245 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005246 }
5247}
5248
5249/**
5250 * xmlParseDefaultDecl:
5251 * @ctxt: an XML parser context
5252 * @value: Receive a possible fixed default value for the attribute
5253 *
5254 * Parse an attribute default declaration
5255 *
5256 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5257 *
5258 * [ VC: Required Attribute ]
5259 * if the default declaration is the keyword #REQUIRED, then the
5260 * attribute must be specified for all elements of the type in the
5261 * attribute-list declaration.
5262 *
5263 * [ VC: Attribute Default Legal ]
5264 * The declared default value must meet the lexical constraints of
5265 * the declared attribute type c.f. xmlValidateAttributeDecl()
5266 *
5267 * [ VC: Fixed Attribute Default ]
5268 * if an attribute has a default value declared with the #FIXED
5269 * keyword, instances of that attribute must match the default value.
5270 *
5271 * [ WFC: No < in Attribute Values ]
5272 * handled in xmlParseAttValue()
5273 *
5274 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5275 * or XML_ATTRIBUTE_FIXED.
5276 */
5277
5278int
5279xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5280 int val;
5281 xmlChar *ret;
5282
5283 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005284 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005285 SKIP(9);
5286 return(XML_ATTRIBUTE_REQUIRED);
5287 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005288 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005289 SKIP(8);
5290 return(XML_ATTRIBUTE_IMPLIED);
5291 }
5292 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005293 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005294 SKIP(6);
5295 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005296 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5298 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005299 }
5300 SKIP_BLANKS;
5301 }
5302 ret = xmlParseAttValue(ctxt);
5303 ctxt->instate = XML_PARSER_DTD;
5304 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005305 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005306 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005307 } else
5308 *value = ret;
5309 return(val);
5310}
5311
5312/**
5313 * xmlParseNotationType:
5314 * @ctxt: an XML parser context
5315 *
5316 * parse an Notation attribute type.
5317 *
5318 * Note: the leading 'NOTATION' S part has already being parsed...
5319 *
5320 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5321 *
5322 * [ VC: Notation Attributes ]
5323 * Values of this type must match one of the notation names included
5324 * in the declaration; all notation names in the declaration must be declared.
5325 *
5326 * Returns: the notation attribute tree built while parsing
5327 */
5328
5329xmlEnumerationPtr
5330xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005331 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005332 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005333
5334 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005335 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005336 return(NULL);
5337 }
5338 SHRINK;
5339 do {
5340 NEXT;
5341 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005342 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005343 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005344 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5345 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005346 xmlFreeEnumeration(ret);
5347 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005348 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005349 tmp = ret;
5350 while (tmp != NULL) {
5351 if (xmlStrEqual(name, tmp->name)) {
5352 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5353 "standalone: attribute notation value token %s duplicated\n",
5354 name, NULL);
5355 if (!xmlDictOwns(ctxt->dict, name))
5356 xmlFree((xmlChar *) name);
5357 break;
5358 }
5359 tmp = tmp->next;
5360 }
5361 if (tmp == NULL) {
5362 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005363 if (cur == NULL) {
5364 xmlFreeEnumeration(ret);
5365 return(NULL);
5366 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005367 if (last == NULL) ret = last = cur;
5368 else {
5369 last->next = cur;
5370 last = cur;
5371 }
Owen Taylor3473f882001-02-23 17:55:21 +00005372 }
5373 SKIP_BLANKS;
5374 } while (RAW == '|');
5375 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005376 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005377 xmlFreeEnumeration(ret);
5378 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005379 }
5380 NEXT;
5381 return(ret);
5382}
5383
5384/**
5385 * xmlParseEnumerationType:
5386 * @ctxt: an XML parser context
5387 *
5388 * parse an Enumeration attribute type.
5389 *
5390 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5391 *
5392 * [ VC: Enumeration ]
5393 * Values of this type must match one of the Nmtoken tokens in
5394 * the declaration
5395 *
5396 * Returns: the enumeration attribute tree built while parsing
5397 */
5398
5399xmlEnumerationPtr
5400xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5401 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005402 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005403
5404 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005405 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005406 return(NULL);
5407 }
5408 SHRINK;
5409 do {
5410 NEXT;
5411 SKIP_BLANKS;
5412 name = xmlParseNmtoken(ctxt);
5413 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005414 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005415 return(ret);
5416 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005417 tmp = ret;
5418 while (tmp != NULL) {
5419 if (xmlStrEqual(name, tmp->name)) {
5420 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5421 "standalone: attribute enumeration value token %s duplicated\n",
5422 name, NULL);
5423 if (!xmlDictOwns(ctxt->dict, name))
5424 xmlFree(name);
5425 break;
5426 }
5427 tmp = tmp->next;
5428 }
5429 if (tmp == NULL) {
5430 cur = xmlCreateEnumeration(name);
5431 if (!xmlDictOwns(ctxt->dict, name))
5432 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005433 if (cur == NULL) {
5434 xmlFreeEnumeration(ret);
5435 return(NULL);
5436 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005437 if (last == NULL) ret = last = cur;
5438 else {
5439 last->next = cur;
5440 last = cur;
5441 }
Owen Taylor3473f882001-02-23 17:55:21 +00005442 }
5443 SKIP_BLANKS;
5444 } while (RAW == '|');
5445 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005446 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005447 return(ret);
5448 }
5449 NEXT;
5450 return(ret);
5451}
5452
5453/**
5454 * xmlParseEnumeratedType:
5455 * @ctxt: an XML parser context
5456 * @tree: the enumeration tree built while parsing
5457 *
5458 * parse an Enumerated attribute type.
5459 *
5460 * [57] EnumeratedType ::= NotationType | Enumeration
5461 *
5462 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5463 *
5464 *
5465 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5466 */
5467
5468int
5469xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005470 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005471 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005472 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5474 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005475 return(0);
5476 }
5477 SKIP_BLANKS;
5478 *tree = xmlParseNotationType(ctxt);
5479 if (*tree == NULL) return(0);
5480 return(XML_ATTRIBUTE_NOTATION);
5481 }
5482 *tree = xmlParseEnumerationType(ctxt);
5483 if (*tree == NULL) return(0);
5484 return(XML_ATTRIBUTE_ENUMERATION);
5485}
5486
5487/**
5488 * xmlParseAttributeType:
5489 * @ctxt: an XML parser context
5490 * @tree: the enumeration tree built while parsing
5491 *
5492 * parse the Attribute list def for an element
5493 *
5494 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5495 *
5496 * [55] StringType ::= 'CDATA'
5497 *
5498 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5499 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5500 *
5501 * Validity constraints for attribute values syntax are checked in
5502 * xmlValidateAttributeValue()
5503 *
5504 * [ VC: ID ]
5505 * Values of type ID must match the Name production. A name must not
5506 * appear more than once in an XML document as a value of this type;
5507 * i.e., ID values must uniquely identify the elements which bear them.
5508 *
5509 * [ VC: One ID per Element Type ]
5510 * No element type may have more than one ID attribute specified.
5511 *
5512 * [ VC: ID Attribute Default ]
5513 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5514 *
5515 * [ VC: IDREF ]
5516 * Values of type IDREF must match the Name production, and values
5517 * of type IDREFS must match Names; each IDREF Name must match the value
5518 * of an ID attribute on some element in the XML document; i.e. IDREF
5519 * values must match the value of some ID attribute.
5520 *
5521 * [ VC: Entity Name ]
5522 * Values of type ENTITY must match the Name production, values
5523 * of type ENTITIES must match Names; each Entity Name must match the
5524 * name of an unparsed entity declared in the DTD.
5525 *
5526 * [ VC: Name Token ]
5527 * Values of type NMTOKEN must match the Nmtoken production; values
5528 * of type NMTOKENS must match Nmtokens.
5529 *
5530 * Returns the attribute type
5531 */
5532int
5533xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5534 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005535 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005536 SKIP(5);
5537 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005538 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005539 SKIP(6);
5540 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005541 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005542 SKIP(5);
5543 return(XML_ATTRIBUTE_IDREF);
5544 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5545 SKIP(2);
5546 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005547 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005548 SKIP(6);
5549 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005550 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005551 SKIP(8);
5552 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005553 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005554 SKIP(8);
5555 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005556 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005557 SKIP(7);
5558 return(XML_ATTRIBUTE_NMTOKEN);
5559 }
5560 return(xmlParseEnumeratedType(ctxt, tree));
5561}
5562
5563/**
5564 * xmlParseAttributeListDecl:
5565 * @ctxt: an XML parser context
5566 *
5567 * : parse the Attribute list def for an element
5568 *
5569 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5570 *
5571 * [53] AttDef ::= S Name S AttType S DefaultDecl
5572 *
5573 */
5574void
5575xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005576 const xmlChar *elemName;
5577 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005578 xmlEnumerationPtr tree;
5579
Daniel Veillarda07050d2003-10-19 14:46:32 +00005580 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005581 xmlParserInputPtr input = ctxt->input;
5582
5583 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005584 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005585 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005586 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005587 }
5588 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005589 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005590 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005591 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5592 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005593 return;
5594 }
5595 SKIP_BLANKS;
5596 GROW;
5597 while (RAW != '>') {
5598 const xmlChar *check = CUR_PTR;
5599 int type;
5600 int def;
5601 xmlChar *defaultValue = NULL;
5602
5603 GROW;
5604 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005605 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005606 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005607 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5608 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005609 break;
5610 }
5611 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005612 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005613 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005614 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005615 break;
5616 }
5617 SKIP_BLANKS;
5618
5619 type = xmlParseAttributeType(ctxt, &tree);
5620 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005621 break;
5622 }
5623
5624 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005625 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005626 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5627 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005628 if (tree != NULL)
5629 xmlFreeEnumeration(tree);
5630 break;
5631 }
5632 SKIP_BLANKS;
5633
5634 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5635 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005636 if (defaultValue != NULL)
5637 xmlFree(defaultValue);
5638 if (tree != NULL)
5639 xmlFreeEnumeration(tree);
5640 break;
5641 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005642 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5643 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005644
5645 GROW;
5646 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005647 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005648 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005649 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005650 if (defaultValue != NULL)
5651 xmlFree(defaultValue);
5652 if (tree != NULL)
5653 xmlFreeEnumeration(tree);
5654 break;
5655 }
5656 SKIP_BLANKS;
5657 }
5658 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005659 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5660 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005661 if (defaultValue != NULL)
5662 xmlFree(defaultValue);
5663 if (tree != NULL)
5664 xmlFreeEnumeration(tree);
5665 break;
5666 }
5667 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5668 (ctxt->sax->attributeDecl != NULL))
5669 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5670 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005671 else if (tree != NULL)
5672 xmlFreeEnumeration(tree);
5673
5674 if ((ctxt->sax2) && (defaultValue != NULL) &&
5675 (def != XML_ATTRIBUTE_IMPLIED) &&
5676 (def != XML_ATTRIBUTE_REQUIRED)) {
5677 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5678 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005679 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005680 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5681 }
Owen Taylor3473f882001-02-23 17:55:21 +00005682 if (defaultValue != NULL)
5683 xmlFree(defaultValue);
5684 GROW;
5685 }
5686 if (RAW == '>') {
5687 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005688 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5689 "Attribute list declaration doesn't start and stop in the same entity\n",
5690 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005691 }
5692 NEXT;
5693 }
Owen Taylor3473f882001-02-23 17:55:21 +00005694 }
5695}
5696
5697/**
5698 * xmlParseElementMixedContentDecl:
5699 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005700 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005701 *
5702 * parse the declaration for a Mixed Element content
5703 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5704 *
5705 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5706 * '(' S? '#PCDATA' S? ')'
5707 *
5708 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5709 *
5710 * [ VC: No Duplicate Types ]
5711 * The same name must not appear more than once in a single
5712 * mixed-content declaration.
5713 *
5714 * returns: the list of the xmlElementContentPtr describing the element choices
5715 */
5716xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005717xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005718 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005719 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005720
5721 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005722 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005723 SKIP(7);
5724 SKIP_BLANKS;
5725 SHRINK;
5726 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005727 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005728 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5729"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005730 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005731 }
Owen Taylor3473f882001-02-23 17:55:21 +00005732 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005733 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005734 if (ret == NULL)
5735 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005736 if (RAW == '*') {
5737 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5738 NEXT;
5739 }
5740 return(ret);
5741 }
5742 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005743 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005744 if (ret == NULL) return(NULL);
5745 }
5746 while (RAW == '|') {
5747 NEXT;
5748 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005749 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005750 if (ret == NULL) return(NULL);
5751 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005752 if (cur != NULL)
5753 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005754 cur = ret;
5755 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005756 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005757 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005758 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005759 if (n->c1 != NULL)
5760 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005761 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005762 if (n != NULL)
5763 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005764 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005765 }
5766 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005767 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005768 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005769 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005770 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005771 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005772 return(NULL);
5773 }
5774 SKIP_BLANKS;
5775 GROW;
5776 }
5777 if ((RAW == ')') && (NXT(1) == '*')) {
5778 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005779 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005780 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005781 if (cur->c2 != NULL)
5782 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005783 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02005784 if (ret != NULL)
5785 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005786 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005787 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5788"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005789 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005790 }
Owen Taylor3473f882001-02-23 17:55:21 +00005791 SKIP(2);
5792 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005793 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005794 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005795 return(NULL);
5796 }
5797
5798 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005799 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005800 }
5801 return(ret);
5802}
5803
5804/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005805 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005806 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005807 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005808 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005809 *
5810 * parse the declaration for a Mixed Element content
5811 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5812 *
5813 *
5814 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5815 *
5816 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5817 *
5818 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5819 *
5820 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5821 *
5822 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5823 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005824 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005825 * opening or closing parentheses in a choice, seq, or Mixed
5826 * construct is contained in the replacement text for a parameter
5827 * entity, both must be contained in the same replacement text. For
5828 * interoperability, if a parameter-entity reference appears in a
5829 * choice, seq, or Mixed construct, its replacement text should not
5830 * be empty, and neither the first nor last non-blank character of
5831 * the replacement text should be a connector (| or ,).
5832 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005833 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005834 * hierarchy.
5835 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005836static xmlElementContentPtr
5837xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5838 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005839 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005840 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005841 xmlChar type = 0;
5842
Daniel Veillard489f9672009-08-10 16:49:30 +02005843 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5844 (depth > 2048)) {
5845 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5846"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5847 depth);
5848 return(NULL);
5849 }
Owen Taylor3473f882001-02-23 17:55:21 +00005850 SKIP_BLANKS;
5851 GROW;
5852 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005853 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005854
Owen Taylor3473f882001-02-23 17:55:21 +00005855 /* Recurse on first child */
5856 NEXT;
5857 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005858 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5859 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005860 SKIP_BLANKS;
5861 GROW;
5862 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005863 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005864 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005865 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005866 return(NULL);
5867 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005868 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005869 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005870 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005871 return(NULL);
5872 }
Owen Taylor3473f882001-02-23 17:55:21 +00005873 GROW;
5874 if (RAW == '?') {
5875 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5876 NEXT;
5877 } else if (RAW == '*') {
5878 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5879 NEXT;
5880 } else if (RAW == '+') {
5881 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5882 NEXT;
5883 } else {
5884 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5885 }
Owen Taylor3473f882001-02-23 17:55:21 +00005886 GROW;
5887 }
5888 SKIP_BLANKS;
5889 SHRINK;
5890 while (RAW != ')') {
5891 /*
5892 * Each loop we parse one separator and one element.
5893 */
5894 if (RAW == ',') {
5895 if (type == 0) type = CUR;
5896
5897 /*
5898 * Detect "Name | Name , Name" error
5899 */
5900 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005901 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005902 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005903 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005904 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005905 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005906 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005907 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005908 return(NULL);
5909 }
5910 NEXT;
5911
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005912 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005913 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005914 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005915 xmlFreeDocElementContent(ctxt->myDoc, last);
5916 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005917 return(NULL);
5918 }
5919 if (last == NULL) {
5920 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005921 if (ret != NULL)
5922 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005923 ret = cur = op;
5924 } else {
5925 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005926 if (op != NULL)
5927 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005928 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005929 if (last != NULL)
5930 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005931 cur =op;
5932 last = NULL;
5933 }
5934 } else if (RAW == '|') {
5935 if (type == 0) type = CUR;
5936
5937 /*
5938 * Detect "Name , Name | Name" error
5939 */
5940 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005941 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005942 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005943 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005944 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005945 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005946 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005947 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005948 return(NULL);
5949 }
5950 NEXT;
5951
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005952 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005953 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005954 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005955 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005956 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005957 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005958 return(NULL);
5959 }
5960 if (last == NULL) {
5961 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005962 if (ret != NULL)
5963 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005964 ret = cur = op;
5965 } else {
5966 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005967 if (op != NULL)
5968 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005969 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005970 if (last != NULL)
5971 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005972 cur =op;
5973 last = NULL;
5974 }
5975 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005976 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005977 if ((last != NULL) && (last != ret))
5978 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005979 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005980 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005981 return(NULL);
5982 }
5983 GROW;
5984 SKIP_BLANKS;
5985 GROW;
5986 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005987 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005988 /* Recurse on second child */
5989 NEXT;
5990 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005991 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5992 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005993 SKIP_BLANKS;
5994 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005995 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005996 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005997 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005998 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005999 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006000 return(NULL);
6001 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006002 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006003 if (last == NULL) {
6004 if (ret != NULL)
6005 xmlFreeDocElementContent(ctxt->myDoc, ret);
6006 return(NULL);
6007 }
Owen Taylor3473f882001-02-23 17:55:21 +00006008 if (RAW == '?') {
6009 last->ocur = XML_ELEMENT_CONTENT_OPT;
6010 NEXT;
6011 } else if (RAW == '*') {
6012 last->ocur = XML_ELEMENT_CONTENT_MULT;
6013 NEXT;
6014 } else if (RAW == '+') {
6015 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6016 NEXT;
6017 } else {
6018 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6019 }
6020 }
6021 SKIP_BLANKS;
6022 GROW;
6023 }
6024 if ((cur != NULL) && (last != NULL)) {
6025 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006026 if (last != NULL)
6027 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006028 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006029 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006030 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6031"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006032 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006033 }
Owen Taylor3473f882001-02-23 17:55:21 +00006034 NEXT;
6035 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006036 if (ret != NULL) {
6037 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6038 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6039 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6040 else
6041 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6042 }
Owen Taylor3473f882001-02-23 17:55:21 +00006043 NEXT;
6044 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006045 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006046 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006047 cur = ret;
6048 /*
6049 * Some normalization:
6050 * (a | b* | c?)* == (a | b | c)*
6051 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006052 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006053 if ((cur->c1 != NULL) &&
6054 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6055 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6056 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6057 if ((cur->c2 != NULL) &&
6058 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6059 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6060 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6061 cur = cur->c2;
6062 }
6063 }
Owen Taylor3473f882001-02-23 17:55:21 +00006064 NEXT;
6065 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006066 if (ret != NULL) {
6067 int found = 0;
6068
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006069 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6070 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6071 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006072 else
6073 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006074 /*
6075 * Some normalization:
6076 * (a | b*)+ == (a | b)*
6077 * (a | b?)+ == (a | b)*
6078 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006079 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006080 if ((cur->c1 != NULL) &&
6081 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6082 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6083 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6084 found = 1;
6085 }
6086 if ((cur->c2 != NULL) &&
6087 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6088 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6089 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6090 found = 1;
6091 }
6092 cur = cur->c2;
6093 }
6094 if (found)
6095 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6096 }
Owen Taylor3473f882001-02-23 17:55:21 +00006097 NEXT;
6098 }
6099 return(ret);
6100}
6101
6102/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006103 * xmlParseElementChildrenContentDecl:
6104 * @ctxt: an XML parser context
6105 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006106 *
6107 * parse the declaration for a Mixed Element content
6108 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6109 *
6110 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6111 *
6112 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6113 *
6114 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6115 *
6116 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6117 *
6118 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6119 * TODO Parameter-entity replacement text must be properly nested
6120 * with parenthesized groups. That is to say, if either of the
6121 * opening or closing parentheses in a choice, seq, or Mixed
6122 * construct is contained in the replacement text for a parameter
6123 * entity, both must be contained in the same replacement text. For
6124 * interoperability, if a parameter-entity reference appears in a
6125 * choice, seq, or Mixed construct, its replacement text should not
6126 * be empty, and neither the first nor last non-blank character of
6127 * the replacement text should be a connector (| or ,).
6128 *
6129 * Returns the tree of xmlElementContentPtr describing the element
6130 * hierarchy.
6131 */
6132xmlElementContentPtr
6133xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6134 /* stub left for API/ABI compat */
6135 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6136}
6137
6138/**
Owen Taylor3473f882001-02-23 17:55:21 +00006139 * xmlParseElementContentDecl:
6140 * @ctxt: an XML parser context
6141 * @name: the name of the element being defined.
6142 * @result: the Element Content pointer will be stored here if any
6143 *
6144 * parse the declaration for an Element content either Mixed or Children,
6145 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6146 *
6147 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6148 *
6149 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6150 */
6151
6152int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006153xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006154 xmlElementContentPtr *result) {
6155
6156 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006157 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006158 int res;
6159
6160 *result = NULL;
6161
6162 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006163 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006164 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006165 return(-1);
6166 }
6167 NEXT;
6168 GROW;
6169 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006170 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006171 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 res = XML_ELEMENT_TYPE_MIXED;
6173 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006174 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006175 res = XML_ELEMENT_TYPE_ELEMENT;
6176 }
Owen Taylor3473f882001-02-23 17:55:21 +00006177 SKIP_BLANKS;
6178 *result = tree;
6179 return(res);
6180}
6181
6182/**
6183 * xmlParseElementDecl:
6184 * @ctxt: an XML parser context
6185 *
6186 * parse an Element declaration.
6187 *
6188 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6189 *
6190 * [ VC: Unique Element Type Declaration ]
6191 * No element type may be declared more than once
6192 *
6193 * Returns the type of the element, or -1 in case of error
6194 */
6195int
6196xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006197 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006198 int ret = -1;
6199 xmlElementContentPtr content = NULL;
6200
Daniel Veillard4c778d82005-01-23 17:37:44 +00006201 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006202 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006203 xmlParserInputPtr input = ctxt->input;
6204
6205 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006206 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006207 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6208 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006209 }
6210 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006211 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006212 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006213 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6214 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006215 return(-1);
6216 }
6217 while ((RAW == 0) && (ctxt->inputNr > 1))
6218 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006219 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006220 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6221 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006222 }
6223 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006224 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006225 SKIP(5);
6226 /*
6227 * Element must always be empty.
6228 */
6229 ret = XML_ELEMENT_TYPE_EMPTY;
6230 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6231 (NXT(2) == 'Y')) {
6232 SKIP(3);
6233 /*
6234 * Element is a generic container.
6235 */
6236 ret = XML_ELEMENT_TYPE_ANY;
6237 } else if (RAW == '(') {
6238 ret = xmlParseElementContentDecl(ctxt, name, &content);
6239 } else {
6240 /*
6241 * [ WFC: PEs in Internal Subset ] error handling.
6242 */
6243 if ((RAW == '%') && (ctxt->external == 0) &&
6244 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006245 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006246 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006247 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006248 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006249 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6250 }
Owen Taylor3473f882001-02-23 17:55:21 +00006251 return(-1);
6252 }
6253
6254 SKIP_BLANKS;
6255 /*
6256 * Pop-up of finished entities.
6257 */
6258 while ((RAW == 0) && (ctxt->inputNr > 1))
6259 xmlPopInput(ctxt);
6260 SKIP_BLANKS;
6261
6262 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006263 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006264 if (content != NULL) {
6265 xmlFreeDocElementContent(ctxt->myDoc, content);
6266 }
Owen Taylor3473f882001-02-23 17:55:21 +00006267 } else {
6268 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006269 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006271 }
6272
6273 NEXT;
6274 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006275 (ctxt->sax->elementDecl != NULL)) {
6276 if (content != NULL)
6277 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006278 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6279 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006280 if ((content != NULL) && (content->parent == NULL)) {
6281 /*
6282 * this is a trick: if xmlAddElementDecl is called,
6283 * instead of copying the full tree it is plugged directly
6284 * if called from the parser. Avoid duplicating the
6285 * interfaces or change the API/ABI
6286 */
6287 xmlFreeDocElementContent(ctxt->myDoc, content);
6288 }
6289 } else if (content != NULL) {
6290 xmlFreeDocElementContent(ctxt->myDoc, content);
6291 }
Owen Taylor3473f882001-02-23 17:55:21 +00006292 }
Owen Taylor3473f882001-02-23 17:55:21 +00006293 }
6294 return(ret);
6295}
6296
6297/**
Owen Taylor3473f882001-02-23 17:55:21 +00006298 * xmlParseConditionalSections
6299 * @ctxt: an XML parser context
6300 *
6301 * [61] conditionalSect ::= includeSect | ignoreSect
6302 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6303 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6304 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6305 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6306 */
6307
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006308static void
Owen Taylor3473f882001-02-23 17:55:21 +00006309xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006310 int id = ctxt->input->id;
6311
Owen Taylor3473f882001-02-23 17:55:21 +00006312 SKIP(3);
6313 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006314 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006315 SKIP(7);
6316 SKIP_BLANKS;
6317 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006318 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006319 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006320 if (ctxt->input->id != id) {
6321 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6322 "All markup of the conditional section is not in the same entity\n",
6323 NULL, NULL);
6324 }
Owen Taylor3473f882001-02-23 17:55:21 +00006325 NEXT;
6326 }
6327 if (xmlParserDebugEntities) {
6328 if ((ctxt->input != NULL) && (ctxt->input->filename))
6329 xmlGenericError(xmlGenericErrorContext,
6330 "%s(%d): ", ctxt->input->filename,
6331 ctxt->input->line);
6332 xmlGenericError(xmlGenericErrorContext,
6333 "Entering INCLUDE Conditional Section\n");
6334 }
6335
6336 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6337 (NXT(2) != '>'))) {
6338 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006339 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006340
6341 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6342 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006343 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006344 NEXT;
6345 } else if (RAW == '%') {
6346 xmlParsePEReference(ctxt);
6347 } else
6348 xmlParseMarkupDecl(ctxt);
6349
6350 /*
6351 * Pop-up of finished entities.
6352 */
6353 while ((RAW == 0) && (ctxt->inputNr > 1))
6354 xmlPopInput(ctxt);
6355
Daniel Veillardfdc91562002-07-01 21:52:03 +00006356 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006357 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006358 break;
6359 }
6360 }
6361 if (xmlParserDebugEntities) {
6362 if ((ctxt->input != NULL) && (ctxt->input->filename))
6363 xmlGenericError(xmlGenericErrorContext,
6364 "%s(%d): ", ctxt->input->filename,
6365 ctxt->input->line);
6366 xmlGenericError(xmlGenericErrorContext,
6367 "Leaving INCLUDE Conditional Section\n");
6368 }
6369
Daniel Veillarda07050d2003-10-19 14:46:32 +00006370 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006371 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006372 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006373 int depth = 0;
6374
6375 SKIP(6);
6376 SKIP_BLANKS;
6377 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006378 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006379 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006380 if (ctxt->input->id != id) {
6381 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6382 "All markup of the conditional section is not in the same entity\n",
6383 NULL, NULL);
6384 }
Owen Taylor3473f882001-02-23 17:55:21 +00006385 NEXT;
6386 }
6387 if (xmlParserDebugEntities) {
6388 if ((ctxt->input != NULL) && (ctxt->input->filename))
6389 xmlGenericError(xmlGenericErrorContext,
6390 "%s(%d): ", ctxt->input->filename,
6391 ctxt->input->line);
6392 xmlGenericError(xmlGenericErrorContext,
6393 "Entering IGNORE Conditional Section\n");
6394 }
6395
6396 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006397 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006398 * But disable SAX event generating DTD building in the meantime
6399 */
6400 state = ctxt->disableSAX;
6401 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006402 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006403 ctxt->instate = XML_PARSER_IGNORE;
6404
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006405 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006406 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6407 depth++;
6408 SKIP(3);
6409 continue;
6410 }
6411 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6412 if (--depth >= 0) SKIP(3);
6413 continue;
6414 }
6415 NEXT;
6416 continue;
6417 }
6418
6419 ctxt->disableSAX = state;
6420 ctxt->instate = instate;
6421
6422 if (xmlParserDebugEntities) {
6423 if ((ctxt->input != NULL) && (ctxt->input->filename))
6424 xmlGenericError(xmlGenericErrorContext,
6425 "%s(%d): ", ctxt->input->filename,
6426 ctxt->input->line);
6427 xmlGenericError(xmlGenericErrorContext,
6428 "Leaving IGNORE Conditional Section\n");
6429 }
6430
6431 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006432 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006433 }
6434
6435 if (RAW == 0)
6436 SHRINK;
6437
6438 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006439 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006440 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006441 if (ctxt->input->id != id) {
6442 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6443 "All markup of the conditional section is not in the same entity\n",
6444 NULL, NULL);
6445 }
Owen Taylor3473f882001-02-23 17:55:21 +00006446 SKIP(3);
6447 }
6448}
6449
6450/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006451 * xmlParseMarkupDecl:
6452 * @ctxt: an XML parser context
6453 *
6454 * parse Markup declarations
6455 *
6456 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6457 * NotationDecl | PI | Comment
6458 *
6459 * [ VC: Proper Declaration/PE Nesting ]
6460 * Parameter-entity replacement text must be properly nested with
6461 * markup declarations. That is to say, if either the first character
6462 * or the last character of a markup declaration (markupdecl above) is
6463 * contained in the replacement text for a parameter-entity reference,
6464 * both must be contained in the same replacement text.
6465 *
6466 * [ WFC: PEs in Internal Subset ]
6467 * In the internal DTD subset, parameter-entity references can occur
6468 * only where markup declarations can occur, not within markup declarations.
6469 * (This does not apply to references that occur in external parameter
6470 * entities or to the external subset.)
6471 */
6472void
6473xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6474 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006475 if (CUR == '<') {
6476 if (NXT(1) == '!') {
6477 switch (NXT(2)) {
6478 case 'E':
6479 if (NXT(3) == 'L')
6480 xmlParseElementDecl(ctxt);
6481 else if (NXT(3) == 'N')
6482 xmlParseEntityDecl(ctxt);
6483 break;
6484 case 'A':
6485 xmlParseAttributeListDecl(ctxt);
6486 break;
6487 case 'N':
6488 xmlParseNotationDecl(ctxt);
6489 break;
6490 case '-':
6491 xmlParseComment(ctxt);
6492 break;
6493 default:
6494 /* there is an error but it will be detected later */
6495 break;
6496 }
6497 } else if (NXT(1) == '?') {
6498 xmlParsePI(ctxt);
6499 }
6500 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006501 /*
6502 * This is only for internal subset. On external entities,
6503 * the replacement is done before parsing stage
6504 */
6505 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6506 xmlParsePEReference(ctxt);
6507
6508 /*
6509 * Conditional sections are allowed from entities included
6510 * by PE References in the internal subset.
6511 */
6512 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6513 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6514 xmlParseConditionalSections(ctxt);
6515 }
6516 }
6517
6518 ctxt->instate = XML_PARSER_DTD;
6519}
6520
6521/**
6522 * xmlParseTextDecl:
6523 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006524 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006525 * parse an XML declaration header for external entities
6526 *
6527 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006528 */
6529
6530void
6531xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6532 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006533 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006534
6535 /*
6536 * We know that '<?xml' is here.
6537 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006538 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006539 SKIP(5);
6540 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006541 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006542 return;
6543 }
6544
William M. Brack76e95df2003-10-18 16:20:14 +00006545 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006546 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6547 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006548 }
6549 SKIP_BLANKS;
6550
6551 /*
6552 * We may have the VersionInfo here.
6553 */
6554 version = xmlParseVersionInfo(ctxt);
6555 if (version == NULL)
6556 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006557 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006558 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6560 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006561 }
6562 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006563 ctxt->input->version = version;
6564
6565 /*
6566 * We must have the encoding declaration
6567 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006568 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006569 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6570 /*
6571 * The XML REC instructs us to stop parsing right here
6572 */
6573 return;
6574 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006575 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6576 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6577 "Missing encoding in text declaration\n");
6578 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006579
6580 SKIP_BLANKS;
6581 if ((RAW == '?') && (NXT(1) == '>')) {
6582 SKIP(2);
6583 } else if (RAW == '>') {
6584 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006585 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006586 NEXT;
6587 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006588 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006589 MOVETO_ENDTAG(CUR_PTR);
6590 NEXT;
6591 }
6592}
6593
6594/**
Owen Taylor3473f882001-02-23 17:55:21 +00006595 * xmlParseExternalSubset:
6596 * @ctxt: an XML parser context
6597 * @ExternalID: the external identifier
6598 * @SystemID: the system identifier (or URL)
6599 *
6600 * parse Markup declarations from an external subset
6601 *
6602 * [30] extSubset ::= textDecl? extSubsetDecl
6603 *
6604 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6605 */
6606void
6607xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6608 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006609 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006610 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006611
6612 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6613 (ctxt->input->end - ctxt->input->cur >= 4)) {
6614 xmlChar start[4];
6615 xmlCharEncoding enc;
6616
6617 start[0] = RAW;
6618 start[1] = NXT(1);
6619 start[2] = NXT(2);
6620 start[3] = NXT(3);
6621 enc = xmlDetectCharEncoding(start, 4);
6622 if (enc != XML_CHAR_ENCODING_NONE)
6623 xmlSwitchEncoding(ctxt, enc);
6624 }
6625
Daniel Veillarda07050d2003-10-19 14:46:32 +00006626 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006627 xmlParseTextDecl(ctxt);
6628 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6629 /*
6630 * The XML REC instructs us to stop parsing right here
6631 */
6632 ctxt->instate = XML_PARSER_EOF;
6633 return;
6634 }
6635 }
6636 if (ctxt->myDoc == NULL) {
6637 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006638 if (ctxt->myDoc == NULL) {
6639 xmlErrMemory(ctxt, "New Doc failed");
6640 return;
6641 }
6642 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006643 }
6644 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6645 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6646
6647 ctxt->instate = XML_PARSER_DTD;
6648 ctxt->external = 1;
6649 while (((RAW == '<') && (NXT(1) == '?')) ||
6650 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006651 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006652 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006653 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006654
6655 GROW;
6656 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6657 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006658 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006659 NEXT;
6660 } else if (RAW == '%') {
6661 xmlParsePEReference(ctxt);
6662 } else
6663 xmlParseMarkupDecl(ctxt);
6664
6665 /*
6666 * Pop-up of finished entities.
6667 */
6668 while ((RAW == 0) && (ctxt->inputNr > 1))
6669 xmlPopInput(ctxt);
6670
Daniel Veillardfdc91562002-07-01 21:52:03 +00006671 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006672 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006673 break;
6674 }
6675 }
6676
6677 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006678 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006679 }
6680
6681}
6682
6683/**
6684 * xmlParseReference:
6685 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006686 *
Owen Taylor3473f882001-02-23 17:55:21 +00006687 * parse and handle entity references in content, depending on the SAX
6688 * interface, this may end-up in a call to character() if this is a
6689 * CharRef, a predefined entity, if there is no reference() callback.
6690 * or if the parser was asked to switch to that mode.
6691 *
6692 * [67] Reference ::= EntityRef | CharRef
6693 */
6694void
6695xmlParseReference(xmlParserCtxtPtr ctxt) {
6696 xmlEntityPtr ent;
6697 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006698 int was_checked;
6699 xmlNodePtr list = NULL;
6700 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006701
Daniel Veillard0161e632008-08-28 15:36:32 +00006702
6703 if (RAW != '&')
6704 return;
6705
6706 /*
6707 * Simple case of a CharRef
6708 */
Owen Taylor3473f882001-02-23 17:55:21 +00006709 if (NXT(1) == '#') {
6710 int i = 0;
6711 xmlChar out[10];
6712 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006713 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006714
Daniel Veillarddc171602008-03-26 17:41:38 +00006715 if (value == 0)
6716 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006717 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6718 /*
6719 * So we are using non-UTF-8 buffers
6720 * Check that the char fit on 8bits, if not
6721 * generate a CharRef.
6722 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006723 if (value <= 0xFF) {
6724 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006725 out[1] = 0;
6726 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6727 (!ctxt->disableSAX))
6728 ctxt->sax->characters(ctxt->userData, out, 1);
6729 } else {
6730 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006731 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006732 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006733 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006734 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6735 (!ctxt->disableSAX))
6736 ctxt->sax->reference(ctxt->userData, out);
6737 }
6738 } else {
6739 /*
6740 * Just encode the value in UTF-8
6741 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006742 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006743 out[i] = 0;
6744 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6745 (!ctxt->disableSAX))
6746 ctxt->sax->characters(ctxt->userData, out, i);
6747 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006748 return;
6749 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006750
Daniel Veillard0161e632008-08-28 15:36:32 +00006751 /*
6752 * We are seeing an entity reference
6753 */
6754 ent = xmlParseEntityRef(ctxt);
6755 if (ent == NULL) return;
6756 if (!ctxt->wellFormed)
6757 return;
6758 was_checked = ent->checked;
6759
6760 /* special case of predefined entities */
6761 if ((ent->name == NULL) ||
6762 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6763 val = ent->content;
6764 if (val == NULL) return;
6765 /*
6766 * inline the entity.
6767 */
6768 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6769 (!ctxt->disableSAX))
6770 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6771 return;
6772 }
6773
6774 /*
6775 * The first reference to the entity trigger a parsing phase
6776 * where the ent->children is filled with the result from
6777 * the parsing.
6778 */
6779 if (ent->checked == 0) {
6780 unsigned long oldnbent = ctxt->nbentities;
6781
6782 /*
6783 * This is a bit hackish but this seems the best
6784 * way to make sure both SAX and DOM entity support
6785 * behaves okay.
6786 */
6787 void *user_data;
6788 if (ctxt->userData == ctxt)
6789 user_data = NULL;
6790 else
6791 user_data = ctxt->userData;
6792
6793 /*
6794 * Check that this entity is well formed
6795 * 4.3.2: An internal general parsed entity is well-formed
6796 * if its replacement text matches the production labeled
6797 * content.
6798 */
6799 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6800 ctxt->depth++;
6801 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6802 user_data, &list);
6803 ctxt->depth--;
6804
6805 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6806 ctxt->depth++;
6807 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6808 user_data, ctxt->depth, ent->URI,
6809 ent->ExternalID, &list);
6810 ctxt->depth--;
6811 } else {
6812 ret = XML_ERR_ENTITY_PE_INTERNAL;
6813 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6814 "invalid entity type found\n", NULL);
6815 }
6816
6817 /*
6818 * Store the number of entities needing parsing for this entity
6819 * content and do checkings
6820 */
6821 ent->checked = ctxt->nbentities - oldnbent;
6822 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006823 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006824 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006825 return;
6826 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006827 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6828 xmlFreeNodeList(list);
6829 return;
6830 }
Owen Taylor3473f882001-02-23 17:55:21 +00006831
Daniel Veillard0161e632008-08-28 15:36:32 +00006832 if ((ret == XML_ERR_OK) && (list != NULL)) {
6833 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6834 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6835 (ent->children == NULL)) {
6836 ent->children = list;
6837 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006838 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006839 * Prune it directly in the generated document
6840 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006841 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006842 if (((list->type == XML_TEXT_NODE) &&
6843 (list->next == NULL)) ||
6844 (ctxt->parseMode == XML_PARSE_READER)) {
6845 list->parent = (xmlNodePtr) ent;
6846 list = NULL;
6847 ent->owner = 1;
6848 } else {
6849 ent->owner = 0;
6850 while (list != NULL) {
6851 list->parent = (xmlNodePtr) ctxt->node;
6852 list->doc = ctxt->myDoc;
6853 if (list->next == NULL)
6854 ent->last = list;
6855 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006856 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006857 list = ent->children;
6858#ifdef LIBXML_LEGACY_ENABLED
6859 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6860 xmlAddEntityReference(ent, list, NULL);
6861#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006862 }
6863 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006864 ent->owner = 1;
6865 while (list != NULL) {
6866 list->parent = (xmlNodePtr) ent;
6867 if (list->next == NULL)
6868 ent->last = list;
6869 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006870 }
6871 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006872 } else {
6873 xmlFreeNodeList(list);
6874 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006875 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006876 } else if ((ret != XML_ERR_OK) &&
6877 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6878 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6879 "Entity '%s' failed to parse\n", ent->name);
6880 } else if (list != NULL) {
6881 xmlFreeNodeList(list);
6882 list = NULL;
6883 }
6884 if (ent->checked == 0)
6885 ent->checked = 1;
6886 } else if (ent->checked != 1) {
6887 ctxt->nbentities += ent->checked;
6888 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006889
Daniel Veillard0161e632008-08-28 15:36:32 +00006890 /*
6891 * Now that the entity content has been gathered
6892 * provide it to the application, this can take different forms based
6893 * on the parsing modes.
6894 */
6895 if (ent->children == NULL) {
6896 /*
6897 * Probably running in SAX mode and the callbacks don't
6898 * build the entity content. So unless we already went
6899 * though parsing for first checking go though the entity
6900 * content to generate callbacks associated to the entity
6901 */
6902 if (was_checked != 0) {
6903 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006904 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006905 * This is a bit hackish but this seems the best
6906 * way to make sure both SAX and DOM entity support
6907 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006908 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006909 if (ctxt->userData == ctxt)
6910 user_data = NULL;
6911 else
6912 user_data = ctxt->userData;
6913
6914 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6915 ctxt->depth++;
6916 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6917 ent->content, user_data, NULL);
6918 ctxt->depth--;
6919 } else if (ent->etype ==
6920 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6921 ctxt->depth++;
6922 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6923 ctxt->sax, user_data, ctxt->depth,
6924 ent->URI, ent->ExternalID, NULL);
6925 ctxt->depth--;
6926 } else {
6927 ret = XML_ERR_ENTITY_PE_INTERNAL;
6928 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6929 "invalid entity type found\n", NULL);
6930 }
6931 if (ret == XML_ERR_ENTITY_LOOP) {
6932 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6933 return;
6934 }
6935 }
6936 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6937 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6938 /*
6939 * Entity reference callback comes second, it's somewhat
6940 * superfluous but a compatibility to historical behaviour
6941 */
6942 ctxt->sax->reference(ctxt->userData, ent->name);
6943 }
6944 return;
6945 }
6946
6947 /*
6948 * If we didn't get any children for the entity being built
6949 */
6950 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6951 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6952 /*
6953 * Create a node.
6954 */
6955 ctxt->sax->reference(ctxt->userData, ent->name);
6956 return;
6957 }
6958
6959 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6960 /*
6961 * There is a problem on the handling of _private for entities
6962 * (bug 155816): Should we copy the content of the field from
6963 * the entity (possibly overwriting some value set by the user
6964 * when a copy is created), should we leave it alone, or should
6965 * we try to take care of different situations? The problem
6966 * is exacerbated by the usage of this field by the xmlReader.
6967 * To fix this bug, we look at _private on the created node
6968 * and, if it's NULL, we copy in whatever was in the entity.
6969 * If it's not NULL we leave it alone. This is somewhat of a
6970 * hack - maybe we should have further tests to determine
6971 * what to do.
6972 */
6973 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6974 /*
6975 * Seems we are generating the DOM content, do
6976 * a simple tree copy for all references except the first
6977 * In the first occurrence list contains the replacement.
6978 * progressive == 2 means we are operating on the Reader
6979 * and since nodes are discarded we must copy all the time.
6980 */
6981 if (((list == NULL) && (ent->owner == 0)) ||
6982 (ctxt->parseMode == XML_PARSE_READER)) {
6983 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6984
6985 /*
6986 * when operating on a reader, the entities definitions
6987 * are always owning the entities subtree.
6988 if (ctxt->parseMode == XML_PARSE_READER)
6989 ent->owner = 1;
6990 */
6991
6992 cur = ent->children;
6993 while (cur != NULL) {
6994 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6995 if (nw != NULL) {
6996 if (nw->_private == NULL)
6997 nw->_private = cur->_private;
6998 if (firstChild == NULL){
6999 firstChild = nw;
7000 }
7001 nw = xmlAddChild(ctxt->node, nw);
7002 }
7003 if (cur == ent->last) {
7004 /*
7005 * needed to detect some strange empty
7006 * node cases in the reader tests
7007 */
7008 if ((ctxt->parseMode == XML_PARSE_READER) &&
7009 (nw != NULL) &&
7010 (nw->type == XML_ELEMENT_NODE) &&
7011 (nw->children == NULL))
7012 nw->extra = 1;
7013
7014 break;
7015 }
7016 cur = cur->next;
7017 }
7018#ifdef LIBXML_LEGACY_ENABLED
7019 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7020 xmlAddEntityReference(ent, firstChild, nw);
7021#endif /* LIBXML_LEGACY_ENABLED */
7022 } else if (list == NULL) {
7023 xmlNodePtr nw = NULL, cur, next, last,
7024 firstChild = NULL;
7025 /*
7026 * Copy the entity child list and make it the new
7027 * entity child list. The goal is to make sure any
7028 * ID or REF referenced will be the one from the
7029 * document content and not the entity copy.
7030 */
7031 cur = ent->children;
7032 ent->children = NULL;
7033 last = ent->last;
7034 ent->last = NULL;
7035 while (cur != NULL) {
7036 next = cur->next;
7037 cur->next = NULL;
7038 cur->parent = NULL;
7039 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7040 if (nw != NULL) {
7041 if (nw->_private == NULL)
7042 nw->_private = cur->_private;
7043 if (firstChild == NULL){
7044 firstChild = cur;
7045 }
7046 xmlAddChild((xmlNodePtr) ent, nw);
7047 xmlAddChild(ctxt->node, cur);
7048 }
7049 if (cur == last)
7050 break;
7051 cur = next;
7052 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007053 if (ent->owner == 0)
7054 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007055#ifdef LIBXML_LEGACY_ENABLED
7056 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7057 xmlAddEntityReference(ent, firstChild, nw);
7058#endif /* LIBXML_LEGACY_ENABLED */
7059 } else {
7060 const xmlChar *nbktext;
7061
7062 /*
7063 * the name change is to avoid coalescing of the
7064 * node with a possible previous text one which
7065 * would make ent->children a dangling pointer
7066 */
7067 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7068 -1);
7069 if (ent->children->type == XML_TEXT_NODE)
7070 ent->children->name = nbktext;
7071 if ((ent->last != ent->children) &&
7072 (ent->last->type == XML_TEXT_NODE))
7073 ent->last->name = nbktext;
7074 xmlAddChildList(ctxt->node, ent->children);
7075 }
7076
7077 /*
7078 * This is to avoid a nasty side effect, see
7079 * characters() in SAX.c
7080 */
7081 ctxt->nodemem = 0;
7082 ctxt->nodelen = 0;
7083 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007084 }
7085 }
7086}
7087
7088/**
7089 * xmlParseEntityRef:
7090 * @ctxt: an XML parser context
7091 *
7092 * parse ENTITY references declarations
7093 *
7094 * [68] EntityRef ::= '&' Name ';'
7095 *
7096 * [ WFC: Entity Declared ]
7097 * In a document without any DTD, a document with only an internal DTD
7098 * subset which contains no parameter entity references, or a document
7099 * with "standalone='yes'", the Name given in the entity reference
7100 * must match that in an entity declaration, except that well-formed
7101 * documents need not declare any of the following entities: amp, lt,
7102 * gt, apos, quot. The declaration of a parameter entity must precede
7103 * any reference to it. Similarly, the declaration of a general entity
7104 * must precede any reference to it which appears in a default value in an
7105 * attribute-list declaration. Note that if entities are declared in the
7106 * external subset or in external parameter entities, a non-validating
7107 * processor is not obligated to read and process their declarations;
7108 * for such documents, the rule that an entity must be declared is a
7109 * well-formedness constraint only if standalone='yes'.
7110 *
7111 * [ WFC: Parsed Entity ]
7112 * An entity reference must not contain the name of an unparsed entity
7113 *
7114 * Returns the xmlEntityPtr if found, or NULL otherwise.
7115 */
7116xmlEntityPtr
7117xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007118 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007119 xmlEntityPtr ent = NULL;
7120
7121 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007122
Daniel Veillard0161e632008-08-28 15:36:32 +00007123 if (RAW != '&')
7124 return(NULL);
7125 NEXT;
7126 name = xmlParseName(ctxt);
7127 if (name == NULL) {
7128 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7129 "xmlParseEntityRef: no name\n");
7130 return(NULL);
7131 }
7132 if (RAW != ';') {
7133 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7134 return(NULL);
7135 }
7136 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007137
Daniel Veillard0161e632008-08-28 15:36:32 +00007138 /*
7139 * Predefined entites override any extra definition
7140 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007141 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7142 ent = xmlGetPredefinedEntity(name);
7143 if (ent != NULL)
7144 return(ent);
7145 }
Owen Taylor3473f882001-02-23 17:55:21 +00007146
Daniel Veillard0161e632008-08-28 15:36:32 +00007147 /*
7148 * Increate the number of entity references parsed
7149 */
7150 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007151
Daniel Veillard0161e632008-08-28 15:36:32 +00007152 /*
7153 * Ask first SAX for entity resolution, otherwise try the
7154 * entities which may have stored in the parser context.
7155 */
7156 if (ctxt->sax != NULL) {
7157 if (ctxt->sax->getEntity != NULL)
7158 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007159 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7160 (ctxt->options & XML_PARSE_OLDSAX))
7161 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007162 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7163 (ctxt->userData==ctxt)) {
7164 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007165 }
7166 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007167 /*
7168 * [ WFC: Entity Declared ]
7169 * In a document without any DTD, a document with only an
7170 * internal DTD subset which contains no parameter entity
7171 * references, or a document with "standalone='yes'", the
7172 * Name given in the entity reference must match that in an
7173 * entity declaration, except that well-formed documents
7174 * need not declare any of the following entities: amp, lt,
7175 * gt, apos, quot.
7176 * The declaration of a parameter entity must precede any
7177 * reference to it.
7178 * Similarly, the declaration of a general entity must
7179 * precede any reference to it which appears in a default
7180 * value in an attribute-list declaration. Note that if
7181 * entities are declared in the external subset or in
7182 * external parameter entities, a non-validating processor
7183 * is not obligated to read and process their declarations;
7184 * for such documents, the rule that an entity must be
7185 * declared is a well-formedness constraint only if
7186 * standalone='yes'.
7187 */
7188 if (ent == NULL) {
7189 if ((ctxt->standalone == 1) ||
7190 ((ctxt->hasExternalSubset == 0) &&
7191 (ctxt->hasPErefs == 0))) {
7192 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7193 "Entity '%s' not defined\n", name);
7194 } else {
7195 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7196 "Entity '%s' not defined\n", name);
7197 if ((ctxt->inSubset == 0) &&
7198 (ctxt->sax != NULL) &&
7199 (ctxt->sax->reference != NULL)) {
7200 ctxt->sax->reference(ctxt->userData, name);
7201 }
7202 }
7203 ctxt->valid = 0;
7204 }
7205
7206 /*
7207 * [ WFC: Parsed Entity ]
7208 * An entity reference must not contain the name of an
7209 * unparsed entity
7210 */
7211 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7212 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7213 "Entity reference to unparsed entity %s\n", name);
7214 }
7215
7216 /*
7217 * [ WFC: No External Entity References ]
7218 * Attribute values cannot contain direct or indirect
7219 * entity references to external entities.
7220 */
7221 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7222 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7223 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7224 "Attribute references external entity '%s'\n", name);
7225 }
7226 /*
7227 * [ WFC: No < in Attribute Values ]
7228 * The replacement text of any entity referred to directly or
7229 * indirectly in an attribute value (other than "&lt;") must
7230 * not contain a <.
7231 */
7232 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7233 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007234 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007235 (xmlStrchr(ent->content, '<'))) {
7236 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7237 "'<' in entity '%s' is not allowed in attributes values\n", name);
7238 }
7239
7240 /*
7241 * Internal check, no parameter entities here ...
7242 */
7243 else {
7244 switch (ent->etype) {
7245 case XML_INTERNAL_PARAMETER_ENTITY:
7246 case XML_EXTERNAL_PARAMETER_ENTITY:
7247 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7248 "Attempt to reference the parameter entity '%s'\n",
7249 name);
7250 break;
7251 default:
7252 break;
7253 }
7254 }
7255
7256 /*
7257 * [ WFC: No Recursion ]
7258 * A parsed entity must not contain a recursive reference
7259 * to itself, either directly or indirectly.
7260 * Done somewhere else
7261 */
Owen Taylor3473f882001-02-23 17:55:21 +00007262 return(ent);
7263}
7264
7265/**
7266 * xmlParseStringEntityRef:
7267 * @ctxt: an XML parser context
7268 * @str: a pointer to an index in the string
7269 *
7270 * parse ENTITY references declarations, but this version parses it from
7271 * a string value.
7272 *
7273 * [68] EntityRef ::= '&' Name ';'
7274 *
7275 * [ WFC: Entity Declared ]
7276 * In a document without any DTD, a document with only an internal DTD
7277 * subset which contains no parameter entity references, or a document
7278 * with "standalone='yes'", the Name given in the entity reference
7279 * must match that in an entity declaration, except that well-formed
7280 * documents need not declare any of the following entities: amp, lt,
7281 * gt, apos, quot. The declaration of a parameter entity must precede
7282 * any reference to it. Similarly, the declaration of a general entity
7283 * must precede any reference to it which appears in a default value in an
7284 * attribute-list declaration. Note that if entities are declared in the
7285 * external subset or in external parameter entities, a non-validating
7286 * processor is not obligated to read and process their declarations;
7287 * for such documents, the rule that an entity must be declared is a
7288 * well-formedness constraint only if standalone='yes'.
7289 *
7290 * [ WFC: Parsed Entity ]
7291 * An entity reference must not contain the name of an unparsed entity
7292 *
7293 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7294 * is updated to the current location in the string.
7295 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007296static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007297xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7298 xmlChar *name;
7299 const xmlChar *ptr;
7300 xmlChar cur;
7301 xmlEntityPtr ent = NULL;
7302
7303 if ((str == NULL) || (*str == NULL))
7304 return(NULL);
7305 ptr = *str;
7306 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007307 if (cur != '&')
7308 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007309
Daniel Veillard0161e632008-08-28 15:36:32 +00007310 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007311 name = xmlParseStringName(ctxt, &ptr);
7312 if (name == NULL) {
7313 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7314 "xmlParseStringEntityRef: no name\n");
7315 *str = ptr;
7316 return(NULL);
7317 }
7318 if (*ptr != ';') {
7319 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007320 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007321 *str = ptr;
7322 return(NULL);
7323 }
7324 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007325
Owen Taylor3473f882001-02-23 17:55:21 +00007326
Daniel Veillard0161e632008-08-28 15:36:32 +00007327 /*
7328 * Predefined entites override any extra definition
7329 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007330 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7331 ent = xmlGetPredefinedEntity(name);
7332 if (ent != NULL) {
7333 xmlFree(name);
7334 *str = ptr;
7335 return(ent);
7336 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007337 }
Owen Taylor3473f882001-02-23 17:55:21 +00007338
Daniel Veillard0161e632008-08-28 15:36:32 +00007339 /*
7340 * Increate the number of entity references parsed
7341 */
7342 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007343
Daniel Veillard0161e632008-08-28 15:36:32 +00007344 /*
7345 * Ask first SAX for entity resolution, otherwise try the
7346 * entities which may have stored in the parser context.
7347 */
7348 if (ctxt->sax != NULL) {
7349 if (ctxt->sax->getEntity != NULL)
7350 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007351 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7352 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007353 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7354 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007355 }
7356 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007357
7358 /*
7359 * [ WFC: Entity Declared ]
7360 * In a document without any DTD, a document with only an
7361 * internal DTD subset which contains no parameter entity
7362 * references, or a document with "standalone='yes'", the
7363 * Name given in the entity reference must match that in an
7364 * entity declaration, except that well-formed documents
7365 * need not declare any of the following entities: amp, lt,
7366 * gt, apos, quot.
7367 * The declaration of a parameter entity must precede any
7368 * reference to it.
7369 * Similarly, the declaration of a general entity must
7370 * precede any reference to it which appears in a default
7371 * value in an attribute-list declaration. Note that if
7372 * entities are declared in the external subset or in
7373 * external parameter entities, a non-validating processor
7374 * is not obligated to read and process their declarations;
7375 * for such documents, the rule that an entity must be
7376 * declared is a well-formedness constraint only if
7377 * standalone='yes'.
7378 */
7379 if (ent == NULL) {
7380 if ((ctxt->standalone == 1) ||
7381 ((ctxt->hasExternalSubset == 0) &&
7382 (ctxt->hasPErefs == 0))) {
7383 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7384 "Entity '%s' not defined\n", name);
7385 } else {
7386 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7387 "Entity '%s' not defined\n",
7388 name);
7389 }
7390 /* TODO ? check regressions ctxt->valid = 0; */
7391 }
7392
7393 /*
7394 * [ WFC: Parsed Entity ]
7395 * An entity reference must not contain the name of an
7396 * unparsed entity
7397 */
7398 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7399 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7400 "Entity reference to unparsed entity %s\n", name);
7401 }
7402
7403 /*
7404 * [ WFC: No External Entity References ]
7405 * Attribute values cannot contain direct or indirect
7406 * entity references to external entities.
7407 */
7408 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7409 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7410 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7411 "Attribute references external entity '%s'\n", name);
7412 }
7413 /*
7414 * [ WFC: No < in Attribute Values ]
7415 * The replacement text of any entity referred to directly or
7416 * indirectly in an attribute value (other than "&lt;") must
7417 * not contain a <.
7418 */
7419 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7420 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007421 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007422 (xmlStrchr(ent->content, '<'))) {
7423 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7424 "'<' in entity '%s' is not allowed in attributes values\n",
7425 name);
7426 }
7427
7428 /*
7429 * Internal check, no parameter entities here ...
7430 */
7431 else {
7432 switch (ent->etype) {
7433 case XML_INTERNAL_PARAMETER_ENTITY:
7434 case XML_EXTERNAL_PARAMETER_ENTITY:
7435 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7436 "Attempt to reference the parameter entity '%s'\n",
7437 name);
7438 break;
7439 default:
7440 break;
7441 }
7442 }
7443
7444 /*
7445 * [ WFC: No Recursion ]
7446 * A parsed entity must not contain a recursive reference
7447 * to itself, either directly or indirectly.
7448 * Done somewhere else
7449 */
7450
7451 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007452 *str = ptr;
7453 return(ent);
7454}
7455
7456/**
7457 * xmlParsePEReference:
7458 * @ctxt: an XML parser context
7459 *
7460 * parse PEReference declarations
7461 * The entity content is handled directly by pushing it's content as
7462 * a new input stream.
7463 *
7464 * [69] PEReference ::= '%' Name ';'
7465 *
7466 * [ WFC: No Recursion ]
7467 * A parsed entity must not contain a recursive
7468 * reference to itself, either directly or indirectly.
7469 *
7470 * [ WFC: Entity Declared ]
7471 * In a document without any DTD, a document with only an internal DTD
7472 * subset which contains no parameter entity references, or a document
7473 * with "standalone='yes'", ... ... The declaration of a parameter
7474 * entity must precede any reference to it...
7475 *
7476 * [ VC: Entity Declared ]
7477 * In a document with an external subset or external parameter entities
7478 * with "standalone='no'", ... ... The declaration of a parameter entity
7479 * must precede any reference to it...
7480 *
7481 * [ WFC: In DTD ]
7482 * Parameter-entity references may only appear in the DTD.
7483 * NOTE: misleading but this is handled.
7484 */
7485void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007486xmlParsePEReference(xmlParserCtxtPtr ctxt)
7487{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007488 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007489 xmlEntityPtr entity = NULL;
7490 xmlParserInputPtr input;
7491
Daniel Veillard0161e632008-08-28 15:36:32 +00007492 if (RAW != '%')
7493 return;
7494 NEXT;
7495 name = xmlParseName(ctxt);
7496 if (name == NULL) {
7497 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7498 "xmlParsePEReference: no name\n");
7499 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007500 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007501 if (RAW != ';') {
7502 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7503 return;
7504 }
7505
7506 NEXT;
7507
7508 /*
7509 * Increate the number of entity references parsed
7510 */
7511 ctxt->nbentities++;
7512
7513 /*
7514 * Request the entity from SAX
7515 */
7516 if ((ctxt->sax != NULL) &&
7517 (ctxt->sax->getParameterEntity != NULL))
7518 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7519 name);
7520 if (entity == NULL) {
7521 /*
7522 * [ WFC: Entity Declared ]
7523 * In a document without any DTD, a document with only an
7524 * internal DTD subset which contains no parameter entity
7525 * references, or a document with "standalone='yes'", ...
7526 * ... The declaration of a parameter entity must precede
7527 * any reference to it...
7528 */
7529 if ((ctxt->standalone == 1) ||
7530 ((ctxt->hasExternalSubset == 0) &&
7531 (ctxt->hasPErefs == 0))) {
7532 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7533 "PEReference: %%%s; not found\n",
7534 name);
7535 } else {
7536 /*
7537 * [ VC: Entity Declared ]
7538 * In a document with an external subset or external
7539 * parameter entities with "standalone='no'", ...
7540 * ... The declaration of a parameter entity must
7541 * precede any reference to it...
7542 */
7543 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7544 "PEReference: %%%s; not found\n",
7545 name, NULL);
7546 ctxt->valid = 0;
7547 }
7548 } else {
7549 /*
7550 * Internal checking in case the entity quest barfed
7551 */
7552 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7553 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7554 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7555 "Internal: %%%s; is not a parameter entity\n",
7556 name, NULL);
7557 } else if (ctxt->input->free != deallocblankswrapper) {
7558 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7559 if (xmlPushInput(ctxt, input) < 0)
7560 return;
7561 } else {
7562 /*
7563 * TODO !!!
7564 * handle the extra spaces added before and after
7565 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7566 */
7567 input = xmlNewEntityInputStream(ctxt, entity);
7568 if (xmlPushInput(ctxt, input) < 0)
7569 return;
7570 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7571 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7572 (IS_BLANK_CH(NXT(5)))) {
7573 xmlParseTextDecl(ctxt);
7574 if (ctxt->errNo ==
7575 XML_ERR_UNSUPPORTED_ENCODING) {
7576 /*
7577 * The XML REC instructs us to stop parsing
7578 * right here
7579 */
7580 ctxt->instate = XML_PARSER_EOF;
7581 return;
7582 }
7583 }
7584 }
7585 }
7586 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007587}
7588
7589/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007590 * xmlLoadEntityContent:
7591 * @ctxt: an XML parser context
7592 * @entity: an unloaded system entity
7593 *
7594 * Load the original content of the given system entity from the
7595 * ExternalID/SystemID given. This is to be used for Included in Literal
7596 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7597 *
7598 * Returns 0 in case of success and -1 in case of failure
7599 */
7600static int
7601xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7602 xmlParserInputPtr input;
7603 xmlBufferPtr buf;
7604 int l, c;
7605 int count = 0;
7606
7607 if ((ctxt == NULL) || (entity == NULL) ||
7608 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7609 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7610 (entity->content != NULL)) {
7611 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7612 "xmlLoadEntityContent parameter error");
7613 return(-1);
7614 }
7615
7616 if (xmlParserDebugEntities)
7617 xmlGenericError(xmlGenericErrorContext,
7618 "Reading %s entity content input\n", entity->name);
7619
7620 buf = xmlBufferCreate();
7621 if (buf == NULL) {
7622 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7623 "xmlLoadEntityContent parameter error");
7624 return(-1);
7625 }
7626
7627 input = xmlNewEntityInputStream(ctxt, entity);
7628 if (input == NULL) {
7629 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7630 "xmlLoadEntityContent input error");
7631 xmlBufferFree(buf);
7632 return(-1);
7633 }
7634
7635 /*
7636 * Push the entity as the current input, read char by char
7637 * saving to the buffer until the end of the entity or an error
7638 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007639 if (xmlPushInput(ctxt, input) < 0) {
7640 xmlBufferFree(buf);
7641 return(-1);
7642 }
7643
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007644 GROW;
7645 c = CUR_CHAR(l);
7646 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7647 (IS_CHAR(c))) {
7648 xmlBufferAdd(buf, ctxt->input->cur, l);
7649 if (count++ > 100) {
7650 count = 0;
7651 GROW;
7652 }
7653 NEXTL(l);
7654 c = CUR_CHAR(l);
7655 }
7656
7657 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7658 xmlPopInput(ctxt);
7659 } else if (!IS_CHAR(c)) {
7660 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7661 "xmlLoadEntityContent: invalid char value %d\n",
7662 c);
7663 xmlBufferFree(buf);
7664 return(-1);
7665 }
7666 entity->content = buf->content;
7667 buf->content = NULL;
7668 xmlBufferFree(buf);
7669
7670 return(0);
7671}
7672
7673/**
Owen Taylor3473f882001-02-23 17:55:21 +00007674 * xmlParseStringPEReference:
7675 * @ctxt: an XML parser context
7676 * @str: a pointer to an index in the string
7677 *
7678 * parse PEReference declarations
7679 *
7680 * [69] PEReference ::= '%' Name ';'
7681 *
7682 * [ WFC: No Recursion ]
7683 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007684 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007685 *
7686 * [ WFC: Entity Declared ]
7687 * In a document without any DTD, a document with only an internal DTD
7688 * subset which contains no parameter entity references, or a document
7689 * with "standalone='yes'", ... ... The declaration of a parameter
7690 * entity must precede any reference to it...
7691 *
7692 * [ VC: Entity Declared ]
7693 * In a document with an external subset or external parameter entities
7694 * with "standalone='no'", ... ... The declaration of a parameter entity
7695 * must precede any reference to it...
7696 *
7697 * [ WFC: In DTD ]
7698 * Parameter-entity references may only appear in the DTD.
7699 * NOTE: misleading but this is handled.
7700 *
7701 * Returns the string of the entity content.
7702 * str is updated to the current value of the index
7703 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007704static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007705xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7706 const xmlChar *ptr;
7707 xmlChar cur;
7708 xmlChar *name;
7709 xmlEntityPtr entity = NULL;
7710
7711 if ((str == NULL) || (*str == NULL)) return(NULL);
7712 ptr = *str;
7713 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007714 if (cur != '%')
7715 return(NULL);
7716 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007717 name = xmlParseStringName(ctxt, &ptr);
7718 if (name == NULL) {
7719 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7720 "xmlParseStringPEReference: no name\n");
7721 *str = ptr;
7722 return(NULL);
7723 }
7724 cur = *ptr;
7725 if (cur != ';') {
7726 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7727 xmlFree(name);
7728 *str = ptr;
7729 return(NULL);
7730 }
7731 ptr++;
7732
7733 /*
7734 * Increate the number of entity references parsed
7735 */
7736 ctxt->nbentities++;
7737
7738 /*
7739 * Request the entity from SAX
7740 */
7741 if ((ctxt->sax != NULL) &&
7742 (ctxt->sax->getParameterEntity != NULL))
7743 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7744 name);
7745 if (entity == NULL) {
7746 /*
7747 * [ WFC: Entity Declared ]
7748 * In a document without any DTD, a document with only an
7749 * internal DTD subset which contains no parameter entity
7750 * references, or a document with "standalone='yes'", ...
7751 * ... The declaration of a parameter entity must precede
7752 * any reference to it...
7753 */
7754 if ((ctxt->standalone == 1) ||
7755 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7756 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7757 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007758 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007759 /*
7760 * [ VC: Entity Declared ]
7761 * In a document with an external subset or external
7762 * parameter entities with "standalone='no'", ...
7763 * ... The declaration of a parameter entity must
7764 * precede any reference to it...
7765 */
7766 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7767 "PEReference: %%%s; not found\n",
7768 name, NULL);
7769 ctxt->valid = 0;
7770 }
7771 } else {
7772 /*
7773 * Internal checking in case the entity quest barfed
7774 */
7775 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7776 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7777 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7778 "%%%s; is not a parameter entity\n",
7779 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007780 }
7781 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007782 ctxt->hasPErefs = 1;
7783 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007784 *str = ptr;
7785 return(entity);
7786}
7787
7788/**
7789 * xmlParseDocTypeDecl:
7790 * @ctxt: an XML parser context
7791 *
7792 * parse a DOCTYPE declaration
7793 *
7794 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7795 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7796 *
7797 * [ VC: Root Element Type ]
7798 * The Name in the document type declaration must match the element
7799 * type of the root element.
7800 */
7801
7802void
7803xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007804 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007805 xmlChar *ExternalID = NULL;
7806 xmlChar *URI = NULL;
7807
7808 /*
7809 * We know that '<!DOCTYPE' has been detected.
7810 */
7811 SKIP(9);
7812
7813 SKIP_BLANKS;
7814
7815 /*
7816 * Parse the DOCTYPE name.
7817 */
7818 name = xmlParseName(ctxt);
7819 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007820 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7821 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007822 }
7823 ctxt->intSubName = name;
7824
7825 SKIP_BLANKS;
7826
7827 /*
7828 * Check for SystemID and ExternalID
7829 */
7830 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7831
7832 if ((URI != NULL) || (ExternalID != NULL)) {
7833 ctxt->hasExternalSubset = 1;
7834 }
7835 ctxt->extSubURI = URI;
7836 ctxt->extSubSystem = ExternalID;
7837
7838 SKIP_BLANKS;
7839
7840 /*
7841 * Create and update the internal subset.
7842 */
7843 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7844 (!ctxt->disableSAX))
7845 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7846
7847 /*
7848 * Is there any internal subset declarations ?
7849 * they are handled separately in xmlParseInternalSubset()
7850 */
7851 if (RAW == '[')
7852 return;
7853
7854 /*
7855 * We should be at the end of the DOCTYPE declaration.
7856 */
7857 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007858 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007859 }
7860 NEXT;
7861}
7862
7863/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007864 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007865 * @ctxt: an XML parser context
7866 *
7867 * parse the internal subset declaration
7868 *
7869 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7870 */
7871
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007872static void
Owen Taylor3473f882001-02-23 17:55:21 +00007873xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7874 /*
7875 * Is there any DTD definition ?
7876 */
7877 if (RAW == '[') {
7878 ctxt->instate = XML_PARSER_DTD;
7879 NEXT;
7880 /*
7881 * Parse the succession of Markup declarations and
7882 * PEReferences.
7883 * Subsequence (markupdecl | PEReference | S)*
7884 */
7885 while (RAW != ']') {
7886 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007887 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007888
7889 SKIP_BLANKS;
7890 xmlParseMarkupDecl(ctxt);
7891 xmlParsePEReference(ctxt);
7892
7893 /*
7894 * Pop-up of finished entities.
7895 */
7896 while ((RAW == 0) && (ctxt->inputNr > 1))
7897 xmlPopInput(ctxt);
7898
7899 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007900 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007901 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007902 break;
7903 }
7904 }
7905 if (RAW == ']') {
7906 NEXT;
7907 SKIP_BLANKS;
7908 }
7909 }
7910
7911 /*
7912 * We should be at the end of the DOCTYPE declaration.
7913 */
7914 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007915 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007916 }
7917 NEXT;
7918}
7919
Daniel Veillard81273902003-09-30 00:43:48 +00007920#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007921/**
7922 * xmlParseAttribute:
7923 * @ctxt: an XML parser context
7924 * @value: a xmlChar ** used to store the value of the attribute
7925 *
7926 * parse an attribute
7927 *
7928 * [41] Attribute ::= Name Eq AttValue
7929 *
7930 * [ WFC: No External Entity References ]
7931 * Attribute values cannot contain direct or indirect entity references
7932 * to external entities.
7933 *
7934 * [ WFC: No < in Attribute Values ]
7935 * The replacement text of any entity referred to directly or indirectly in
7936 * an attribute value (other than "&lt;") must not contain a <.
7937 *
7938 * [ VC: Attribute Value Type ]
7939 * The attribute must have been declared; the value must be of the type
7940 * declared for it.
7941 *
7942 * [25] Eq ::= S? '=' S?
7943 *
7944 * With namespace:
7945 *
7946 * [NS 11] Attribute ::= QName Eq AttValue
7947 *
7948 * Also the case QName == xmlns:??? is handled independently as a namespace
7949 * definition.
7950 *
7951 * Returns the attribute name, and the value in *value.
7952 */
7953
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007954const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007955xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007956 const xmlChar *name;
7957 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007958
7959 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007960 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007961 name = xmlParseName(ctxt);
7962 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007963 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007964 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007965 return(NULL);
7966 }
7967
7968 /*
7969 * read the value
7970 */
7971 SKIP_BLANKS;
7972 if (RAW == '=') {
7973 NEXT;
7974 SKIP_BLANKS;
7975 val = xmlParseAttValue(ctxt);
7976 ctxt->instate = XML_PARSER_CONTENT;
7977 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007978 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007979 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007980 return(NULL);
7981 }
7982
7983 /*
7984 * Check that xml:lang conforms to the specification
7985 * No more registered as an error, just generate a warning now
7986 * since this was deprecated in XML second edition
7987 */
7988 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7989 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007990 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7991 "Malformed value for xml:lang : %s\n",
7992 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007993 }
7994 }
7995
7996 /*
7997 * Check that xml:space conforms to the specification
7998 */
7999 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8000 if (xmlStrEqual(val, BAD_CAST "default"))
8001 *(ctxt->space) = 0;
8002 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8003 *(ctxt->space) = 1;
8004 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008005 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008006"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008007 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008008 }
8009 }
8010
8011 *value = val;
8012 return(name);
8013}
8014
8015/**
8016 * xmlParseStartTag:
8017 * @ctxt: an XML parser context
8018 *
8019 * parse a start of tag either for rule element or
8020 * EmptyElement. In both case we don't parse the tag closing chars.
8021 *
8022 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8023 *
8024 * [ WFC: Unique Att Spec ]
8025 * No attribute name may appear more than once in the same start-tag or
8026 * empty-element tag.
8027 *
8028 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8029 *
8030 * [ WFC: Unique Att Spec ]
8031 * No attribute name may appear more than once in the same start-tag or
8032 * empty-element tag.
8033 *
8034 * With namespace:
8035 *
8036 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8037 *
8038 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8039 *
8040 * Returns the element name parsed
8041 */
8042
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008043const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008044xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008045 const xmlChar *name;
8046 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008047 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008048 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008049 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008050 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008051 int i;
8052
8053 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008054 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008055
8056 name = xmlParseName(ctxt);
8057 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008058 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008059 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008060 return(NULL);
8061 }
8062
8063 /*
8064 * Now parse the attributes, it ends up with the ending
8065 *
8066 * (S Attribute)* S?
8067 */
8068 SKIP_BLANKS;
8069 GROW;
8070
Daniel Veillard21a0f912001-02-25 19:54:14 +00008071 while ((RAW != '>') &&
8072 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008073 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008074 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008075 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008076
8077 attname = xmlParseAttribute(ctxt, &attvalue);
8078 if ((attname != NULL) && (attvalue != NULL)) {
8079 /*
8080 * [ WFC: Unique Att Spec ]
8081 * No attribute name may appear more than once in the same
8082 * start-tag or empty-element tag.
8083 */
8084 for (i = 0; i < nbatts;i += 2) {
8085 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008086 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008087 xmlFree(attvalue);
8088 goto failed;
8089 }
8090 }
Owen Taylor3473f882001-02-23 17:55:21 +00008091 /*
8092 * Add the pair to atts
8093 */
8094 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008095 maxatts = 22; /* allow for 10 attrs by default */
8096 atts = (const xmlChar **)
8097 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008098 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008099 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008100 if (attvalue != NULL)
8101 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008102 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008103 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008104 ctxt->atts = atts;
8105 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008106 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008107 const xmlChar **n;
8108
Owen Taylor3473f882001-02-23 17:55:21 +00008109 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008110 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008111 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008112 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008113 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008114 if (attvalue != NULL)
8115 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008116 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008117 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008118 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008119 ctxt->atts = atts;
8120 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008121 }
8122 atts[nbatts++] = attname;
8123 atts[nbatts++] = attvalue;
8124 atts[nbatts] = NULL;
8125 atts[nbatts + 1] = NULL;
8126 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008127 if (attvalue != NULL)
8128 xmlFree(attvalue);
8129 }
8130
8131failed:
8132
Daniel Veillard3772de32002-12-17 10:31:45 +00008133 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008134 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8135 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008136 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008137 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8138 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008139 }
8140 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008141 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8142 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008143 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8144 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008145 break;
8146 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008147 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008148 GROW;
8149 }
8150
8151 /*
8152 * SAX: Start of Element !
8153 */
8154 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008155 (!ctxt->disableSAX)) {
8156 if (nbatts > 0)
8157 ctxt->sax->startElement(ctxt->userData, name, atts);
8158 else
8159 ctxt->sax->startElement(ctxt->userData, name, NULL);
8160 }
Owen Taylor3473f882001-02-23 17:55:21 +00008161
8162 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008163 /* Free only the content strings */
8164 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008165 if (atts[i] != NULL)
8166 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008167 }
8168 return(name);
8169}
8170
8171/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008172 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008173 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008174 * @line: line of the start tag
8175 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008176 *
8177 * parse an end of tag
8178 *
8179 * [42] ETag ::= '</' Name S? '>'
8180 *
8181 * With namespace
8182 *
8183 * [NS 9] ETag ::= '</' QName S? '>'
8184 */
8185
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008186static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008187xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008188 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008189
8190 GROW;
8191 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008192 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008193 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008194 return;
8195 }
8196 SKIP(2);
8197
Daniel Veillard46de64e2002-05-29 08:21:33 +00008198 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008199
8200 /*
8201 * We should definitely be at the ending "S? '>'" part
8202 */
8203 GROW;
8204 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008205 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008206 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008207 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008208 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008209
8210 /*
8211 * [ WFC: Element Type Match ]
8212 * The Name in an element's end-tag must match the element type in the
8213 * start-tag.
8214 *
8215 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008216 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008217 if (name == NULL) name = BAD_CAST "unparseable";
8218 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008219 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008220 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008221 }
8222
8223 /*
8224 * SAX: End of Tag
8225 */
8226 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8227 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008228 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008229
Daniel Veillarde57ec792003-09-10 10:50:59 +00008230 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008231 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008232 return;
8233}
8234
8235/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008236 * xmlParseEndTag:
8237 * @ctxt: an XML parser context
8238 *
8239 * parse an end of tag
8240 *
8241 * [42] ETag ::= '</' Name S? '>'
8242 *
8243 * With namespace
8244 *
8245 * [NS 9] ETag ::= '</' QName S? '>'
8246 */
8247
8248void
8249xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008250 xmlParseEndTag1(ctxt, 0);
8251}
Daniel Veillard81273902003-09-30 00:43:48 +00008252#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008253
8254/************************************************************************
8255 * *
8256 * SAX 2 specific operations *
8257 * *
8258 ************************************************************************/
8259
Daniel Veillard0fb18932003-09-07 09:14:37 +00008260/*
8261 * xmlGetNamespace:
8262 * @ctxt: an XML parser context
8263 * @prefix: the prefix to lookup
8264 *
8265 * Lookup the namespace name for the @prefix (which ca be NULL)
8266 * The prefix must come from the @ctxt->dict dictionnary
8267 *
8268 * Returns the namespace name or NULL if not bound
8269 */
8270static const xmlChar *
8271xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8272 int i;
8273
Daniel Veillarde57ec792003-09-10 10:50:59 +00008274 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008275 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008276 if (ctxt->nsTab[i] == prefix) {
8277 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8278 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008279 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008280 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008281 return(NULL);
8282}
8283
8284/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008285 * xmlParseQName:
8286 * @ctxt: an XML parser context
8287 * @prefix: pointer to store the prefix part
8288 *
8289 * parse an XML Namespace QName
8290 *
8291 * [6] QName ::= (Prefix ':')? LocalPart
8292 * [7] Prefix ::= NCName
8293 * [8] LocalPart ::= NCName
8294 *
8295 * Returns the Name parsed or NULL
8296 */
8297
8298static const xmlChar *
8299xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8300 const xmlChar *l, *p;
8301
8302 GROW;
8303
8304 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008305 if (l == NULL) {
8306 if (CUR == ':') {
8307 l = xmlParseName(ctxt);
8308 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008309 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8310 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008311 *prefix = NULL;
8312 return(l);
8313 }
8314 }
8315 return(NULL);
8316 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008317 if (CUR == ':') {
8318 NEXT;
8319 p = l;
8320 l = xmlParseNCName(ctxt);
8321 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008322 xmlChar *tmp;
8323
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008324 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8325 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008326 l = xmlParseNmtoken(ctxt);
8327 if (l == NULL)
8328 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8329 else {
8330 tmp = xmlBuildQName(l, p, NULL, 0);
8331 xmlFree((char *)l);
8332 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008333 p = xmlDictLookup(ctxt->dict, tmp, -1);
8334 if (tmp != NULL) xmlFree(tmp);
8335 *prefix = NULL;
8336 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008337 }
8338 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008339 xmlChar *tmp;
8340
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008341 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8342 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008343 NEXT;
8344 tmp = (xmlChar *) xmlParseName(ctxt);
8345 if (tmp != NULL) {
8346 tmp = xmlBuildQName(tmp, l, NULL, 0);
8347 l = xmlDictLookup(ctxt->dict, tmp, -1);
8348 if (tmp != NULL) xmlFree(tmp);
8349 *prefix = p;
8350 return(l);
8351 }
8352 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8353 l = xmlDictLookup(ctxt->dict, tmp, -1);
8354 if (tmp != NULL) xmlFree(tmp);
8355 *prefix = p;
8356 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008357 }
8358 *prefix = p;
8359 } else
8360 *prefix = NULL;
8361 return(l);
8362}
8363
8364/**
8365 * xmlParseQNameAndCompare:
8366 * @ctxt: an XML parser context
8367 * @name: the localname
8368 * @prefix: the prefix, if any.
8369 *
8370 * parse an XML name and compares for match
8371 * (specialized for endtag parsing)
8372 *
8373 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8374 * and the name for mismatch
8375 */
8376
8377static const xmlChar *
8378xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8379 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008380 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008381 const xmlChar *in;
8382 const xmlChar *ret;
8383 const xmlChar *prefix2;
8384
8385 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8386
8387 GROW;
8388 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008389
Daniel Veillard0fb18932003-09-07 09:14:37 +00008390 cmp = prefix;
8391 while (*in != 0 && *in == *cmp) {
8392 ++in;
8393 ++cmp;
8394 }
8395 if ((*cmp == 0) && (*in == ':')) {
8396 in++;
8397 cmp = name;
8398 while (*in != 0 && *in == *cmp) {
8399 ++in;
8400 ++cmp;
8401 }
William M. Brack76e95df2003-10-18 16:20:14 +00008402 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008403 /* success */
8404 ctxt->input->cur = in;
8405 return((const xmlChar*) 1);
8406 }
8407 }
8408 /*
8409 * all strings coms from the dictionary, equality can be done directly
8410 */
8411 ret = xmlParseQName (ctxt, &prefix2);
8412 if ((ret == name) && (prefix == prefix2))
8413 return((const xmlChar*) 1);
8414 return ret;
8415}
8416
8417/**
8418 * xmlParseAttValueInternal:
8419 * @ctxt: an XML parser context
8420 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008421 * @alloc: whether the attribute was reallocated as a new string
8422 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008423 *
8424 * parse a value for an attribute.
8425 * NOTE: if no normalization is needed, the routine will return pointers
8426 * directly from the data buffer.
8427 *
8428 * 3.3.3 Attribute-Value Normalization:
8429 * Before the value of an attribute is passed to the application or
8430 * checked for validity, the XML processor must normalize it as follows:
8431 * - a character reference is processed by appending the referenced
8432 * character to the attribute value
8433 * - an entity reference is processed by recursively processing the
8434 * replacement text of the entity
8435 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8436 * appending #x20 to the normalized value, except that only a single
8437 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8438 * parsed entity or the literal entity value of an internal parsed entity
8439 * - other characters are processed by appending them to the normalized value
8440 * If the declared value is not CDATA, then the XML processor must further
8441 * process the normalized attribute value by discarding any leading and
8442 * trailing space (#x20) characters, and by replacing sequences of space
8443 * (#x20) characters by a single space (#x20) character.
8444 * All attributes for which no declaration has been read should be treated
8445 * by a non-validating parser as if declared CDATA.
8446 *
8447 * Returns the AttValue parsed or NULL. The value has to be freed by the
8448 * caller if it was copied, this can be detected by val[*len] == 0.
8449 */
8450
8451static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008452xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8453 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008454{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008455 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008456 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008457 xmlChar *ret = NULL;
8458
8459 GROW;
8460 in = (xmlChar *) CUR_PTR;
8461 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008462 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008463 return (NULL);
8464 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008465 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008466
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008467 /*
8468 * try to handle in this routine the most common case where no
8469 * allocation of a new string is required and where content is
8470 * pure ASCII.
8471 */
8472 limit = *in++;
8473 end = ctxt->input->end;
8474 start = in;
8475 if (in >= end) {
8476 const xmlChar *oldbase = ctxt->input->base;
8477 GROW;
8478 if (oldbase != ctxt->input->base) {
8479 long delta = ctxt->input->base - oldbase;
8480 start = start + delta;
8481 in = in + delta;
8482 }
8483 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008484 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008485 if (normalize) {
8486 /*
8487 * Skip any leading spaces
8488 */
8489 while ((in < end) && (*in != limit) &&
8490 ((*in == 0x20) || (*in == 0x9) ||
8491 (*in == 0xA) || (*in == 0xD))) {
8492 in++;
8493 start = in;
8494 if (in >= end) {
8495 const xmlChar *oldbase = ctxt->input->base;
8496 GROW;
8497 if (oldbase != ctxt->input->base) {
8498 long delta = ctxt->input->base - oldbase;
8499 start = start + delta;
8500 in = in + delta;
8501 }
8502 end = ctxt->input->end;
8503 }
8504 }
8505 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8506 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8507 if ((*in++ == 0x20) && (*in == 0x20)) break;
8508 if (in >= end) {
8509 const xmlChar *oldbase = ctxt->input->base;
8510 GROW;
8511 if (oldbase != ctxt->input->base) {
8512 long delta = ctxt->input->base - oldbase;
8513 start = start + delta;
8514 in = in + delta;
8515 }
8516 end = ctxt->input->end;
8517 }
8518 }
8519 last = in;
8520 /*
8521 * skip the trailing blanks
8522 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008523 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008524 while ((in < end) && (*in != limit) &&
8525 ((*in == 0x20) || (*in == 0x9) ||
8526 (*in == 0xA) || (*in == 0xD))) {
8527 in++;
8528 if (in >= end) {
8529 const xmlChar *oldbase = ctxt->input->base;
8530 GROW;
8531 if (oldbase != ctxt->input->base) {
8532 long delta = ctxt->input->base - oldbase;
8533 start = start + delta;
8534 in = in + delta;
8535 last = last + delta;
8536 }
8537 end = ctxt->input->end;
8538 }
8539 }
8540 if (*in != limit) goto need_complex;
8541 } else {
8542 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8543 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8544 in++;
8545 if (in >= end) {
8546 const xmlChar *oldbase = ctxt->input->base;
8547 GROW;
8548 if (oldbase != ctxt->input->base) {
8549 long delta = ctxt->input->base - oldbase;
8550 start = start + delta;
8551 in = in + delta;
8552 }
8553 end = ctxt->input->end;
8554 }
8555 }
8556 last = in;
8557 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008558 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008559 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008560 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008561 *len = last - start;
8562 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008563 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008564 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008565 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008566 }
8567 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008568 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008569 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008570need_complex:
8571 if (alloc) *alloc = 1;
8572 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008573}
8574
8575/**
8576 * xmlParseAttribute2:
8577 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008578 * @pref: the element prefix
8579 * @elem: the element name
8580 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008581 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008582 * @len: an int * to save the length of the attribute
8583 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008584 *
8585 * parse an attribute in the new SAX2 framework.
8586 *
8587 * Returns the attribute name, and the value in *value, .
8588 */
8589
8590static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008591xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008592 const xmlChar * pref, const xmlChar * elem,
8593 const xmlChar ** prefix, xmlChar ** value,
8594 int *len, int *alloc)
8595{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008596 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008597 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008598 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008599
8600 *value = NULL;
8601 GROW;
8602 name = xmlParseQName(ctxt, prefix);
8603 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008604 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8605 "error parsing attribute name\n");
8606 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008607 }
8608
8609 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008610 * get the type if needed
8611 */
8612 if (ctxt->attsSpecial != NULL) {
8613 int type;
8614
8615 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008616 pref, elem, *prefix, name);
8617 if (type != 0)
8618 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008619 }
8620
8621 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008622 * read the value
8623 */
8624 SKIP_BLANKS;
8625 if (RAW == '=') {
8626 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008627 SKIP_BLANKS;
8628 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8629 if (normalize) {
8630 /*
8631 * Sometimes a second normalisation pass for spaces is needed
8632 * but that only happens if charrefs or entities refernces
8633 * have been used in the attribute value, i.e. the attribute
8634 * value have been extracted in an allocated string already.
8635 */
8636 if (*alloc) {
8637 const xmlChar *val2;
8638
8639 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008640 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008641 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008642 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008643 }
8644 }
8645 }
8646 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008647 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008648 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8649 "Specification mandate value for attribute %s\n",
8650 name);
8651 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008652 }
8653
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008654 if (*prefix == ctxt->str_xml) {
8655 /*
8656 * Check that xml:lang conforms to the specification
8657 * No more registered as an error, just generate a warning now
8658 * since this was deprecated in XML second edition
8659 */
8660 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8661 internal_val = xmlStrndup(val, *len);
8662 if (!xmlCheckLanguageID(internal_val)) {
8663 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8664 "Malformed value for xml:lang : %s\n",
8665 internal_val, NULL);
8666 }
8667 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008668
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008669 /*
8670 * Check that xml:space conforms to the specification
8671 */
8672 if (xmlStrEqual(name, BAD_CAST "space")) {
8673 internal_val = xmlStrndup(val, *len);
8674 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8675 *(ctxt->space) = 0;
8676 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8677 *(ctxt->space) = 1;
8678 else {
8679 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8680 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8681 internal_val, NULL);
8682 }
8683 }
8684 if (internal_val) {
8685 xmlFree(internal_val);
8686 }
8687 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008688
8689 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008690 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008691}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008692/**
8693 * xmlParseStartTag2:
8694 * @ctxt: an XML parser context
8695 *
8696 * parse a start of tag either for rule element or
8697 * EmptyElement. In both case we don't parse the tag closing chars.
8698 * This routine is called when running SAX2 parsing
8699 *
8700 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8701 *
8702 * [ WFC: Unique Att Spec ]
8703 * No attribute name may appear more than once in the same start-tag or
8704 * empty-element tag.
8705 *
8706 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8707 *
8708 * [ WFC: Unique Att Spec ]
8709 * No attribute name may appear more than once in the same start-tag or
8710 * empty-element tag.
8711 *
8712 * With namespace:
8713 *
8714 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8715 *
8716 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8717 *
8718 * Returns the element name parsed
8719 */
8720
8721static const xmlChar *
8722xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008723 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008724 const xmlChar *localname;
8725 const xmlChar *prefix;
8726 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008727 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008728 const xmlChar *nsname;
8729 xmlChar *attvalue;
8730 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008731 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008732 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008733 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008734 const xmlChar *base;
8735 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008736 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008737
8738 if (RAW != '<') return(NULL);
8739 NEXT1;
8740
8741 /*
8742 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8743 * point since the attribute values may be stored as pointers to
8744 * the buffer and calling SHRINK would destroy them !
8745 * The Shrinking is only possible once the full set of attribute
8746 * callbacks have been done.
8747 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008748reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008749 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008750 base = ctxt->input->base;
8751 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008752 oldline = ctxt->input->line;
8753 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008754 nbatts = 0;
8755 nratts = 0;
8756 nbdef = 0;
8757 nbNs = 0;
8758 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008759 /* Forget any namespaces added during an earlier parse of this element. */
8760 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008761
8762 localname = xmlParseQName(ctxt, &prefix);
8763 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008764 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8765 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008766 return(NULL);
8767 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008768 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008769
8770 /*
8771 * Now parse the attributes, it ends up with the ending
8772 *
8773 * (S Attribute)* S?
8774 */
8775 SKIP_BLANKS;
8776 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008777 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008778
8779 while ((RAW != '>') &&
8780 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008781 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008782 const xmlChar *q = CUR_PTR;
8783 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008784 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008785
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008786 attname = xmlParseAttribute2(ctxt, prefix, localname,
8787 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008788 if (ctxt->input->base != base) {
8789 if ((attvalue != NULL) && (alloc != 0))
8790 xmlFree(attvalue);
8791 attvalue = NULL;
8792 goto base_changed;
8793 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008794 if ((attname != NULL) && (attvalue != NULL)) {
8795 if (len < 0) len = xmlStrlen(attvalue);
8796 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008797 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8798 xmlURIPtr uri;
8799
8800 if (*URL != 0) {
8801 uri = xmlParseURI((const char *) URL);
8802 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008803 xmlNsErr(ctxt, XML_WAR_NS_URI,
8804 "xmlns: '%s' is not a valid URI\n",
8805 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008806 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008807 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008808 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8809 "xmlns: URI %s is not absolute\n",
8810 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008811 }
8812 xmlFreeURI(uri);
8813 }
Daniel Veillard37334572008-07-31 08:20:02 +00008814 if (URL == ctxt->str_xml_ns) {
8815 if (attname != ctxt->str_xml) {
8816 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8817 "xml namespace URI cannot be the default namespace\n",
8818 NULL, NULL, NULL);
8819 }
8820 goto skip_default_ns;
8821 }
8822 if ((len == 29) &&
8823 (xmlStrEqual(URL,
8824 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8825 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8826 "reuse of the xmlns namespace name is forbidden\n",
8827 NULL, NULL, NULL);
8828 goto skip_default_ns;
8829 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008830 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008831 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008832 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008833 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008834 for (j = 1;j <= nbNs;j++)
8835 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8836 break;
8837 if (j <= nbNs)
8838 xmlErrAttributeDup(ctxt, NULL, attname);
8839 else
8840 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008841skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008842 if (alloc != 0) xmlFree(attvalue);
8843 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008844 continue;
8845 }
8846 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008847 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8848 xmlURIPtr uri;
8849
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008850 if (attname == ctxt->str_xml) {
8851 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008852 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8853 "xml namespace prefix mapped to wrong URI\n",
8854 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008855 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008856 /*
8857 * Do not keep a namespace definition node
8858 */
Daniel Veillard37334572008-07-31 08:20:02 +00008859 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008860 }
Daniel Veillard37334572008-07-31 08:20:02 +00008861 if (URL == ctxt->str_xml_ns) {
8862 if (attname != ctxt->str_xml) {
8863 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8864 "xml namespace URI mapped to wrong prefix\n",
8865 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008866 }
Daniel Veillard37334572008-07-31 08:20:02 +00008867 goto skip_ns;
8868 }
8869 if (attname == ctxt->str_xmlns) {
8870 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8871 "redefinition of the xmlns prefix is forbidden\n",
8872 NULL, NULL, NULL);
8873 goto skip_ns;
8874 }
8875 if ((len == 29) &&
8876 (xmlStrEqual(URL,
8877 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8878 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8879 "reuse of the xmlns namespace name is forbidden\n",
8880 NULL, NULL, NULL);
8881 goto skip_ns;
8882 }
8883 if ((URL == NULL) || (URL[0] == 0)) {
8884 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8885 "xmlns:%s: Empty XML namespace is not allowed\n",
8886 attname, NULL, NULL);
8887 goto skip_ns;
8888 } else {
8889 uri = xmlParseURI((const char *) URL);
8890 if (uri == NULL) {
8891 xmlNsErr(ctxt, XML_WAR_NS_URI,
8892 "xmlns:%s: '%s' is not a valid URI\n",
8893 attname, URL, NULL);
8894 } else {
8895 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8896 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8897 "xmlns:%s: URI %s is not absolute\n",
8898 attname, URL, NULL);
8899 }
8900 xmlFreeURI(uri);
8901 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008902 }
8903
Daniel Veillard0fb18932003-09-07 09:14:37 +00008904 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008905 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008906 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008907 for (j = 1;j <= nbNs;j++)
8908 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8909 break;
8910 if (j <= nbNs)
8911 xmlErrAttributeDup(ctxt, aprefix, attname);
8912 else
8913 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008914skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008915 if (alloc != 0) xmlFree(attvalue);
8916 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008917 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008918 continue;
8919 }
8920
8921 /*
8922 * Add the pair to atts
8923 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008924 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8925 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008926 if (attvalue[len] == 0)
8927 xmlFree(attvalue);
8928 goto failed;
8929 }
8930 maxatts = ctxt->maxatts;
8931 atts = ctxt->atts;
8932 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008933 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008934 atts[nbatts++] = attname;
8935 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008936 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008937 atts[nbatts++] = attvalue;
8938 attvalue += len;
8939 atts[nbatts++] = attvalue;
8940 /*
8941 * tag if some deallocation is needed
8942 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008943 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008944 } else {
8945 if ((attvalue != NULL) && (attvalue[len] == 0))
8946 xmlFree(attvalue);
8947 }
8948
Daniel Veillard37334572008-07-31 08:20:02 +00008949failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008950
8951 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008952 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008953 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8954 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008955 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008956 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8957 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008958 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008959 }
8960 SKIP_BLANKS;
8961 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8962 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008963 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008964 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008965 break;
8966 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008967 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008968 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008969 }
8970
Daniel Veillard0fb18932003-09-07 09:14:37 +00008971 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008972 * The attributes defaulting
8973 */
8974 if (ctxt->attsDefault != NULL) {
8975 xmlDefAttrsPtr defaults;
8976
8977 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8978 if (defaults != NULL) {
8979 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008980 attname = defaults->values[5 * i];
8981 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008982
8983 /*
8984 * special work for namespaces defaulted defs
8985 */
8986 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8987 /*
8988 * check that it's not a defined namespace
8989 */
8990 for (j = 1;j <= nbNs;j++)
8991 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8992 break;
8993 if (j <= nbNs) continue;
8994
8995 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008996 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008997 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008998 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008999 nbNs++;
9000 }
9001 } else if (aprefix == ctxt->str_xmlns) {
9002 /*
9003 * check that it's not a defined namespace
9004 */
9005 for (j = 1;j <= nbNs;j++)
9006 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9007 break;
9008 if (j <= nbNs) continue;
9009
9010 nsname = xmlGetNamespace(ctxt, attname);
9011 if (nsname != defaults->values[2]) {
9012 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009013 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009014 nbNs++;
9015 }
9016 } else {
9017 /*
9018 * check that it's not a defined attribute
9019 */
9020 for (j = 0;j < nbatts;j+=5) {
9021 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9022 break;
9023 }
9024 if (j < nbatts) continue;
9025
9026 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9027 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009028 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009029 }
9030 maxatts = ctxt->maxatts;
9031 atts = ctxt->atts;
9032 }
9033 atts[nbatts++] = attname;
9034 atts[nbatts++] = aprefix;
9035 if (aprefix == NULL)
9036 atts[nbatts++] = NULL;
9037 else
9038 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009039 atts[nbatts++] = defaults->values[5 * i + 2];
9040 atts[nbatts++] = defaults->values[5 * i + 3];
9041 if ((ctxt->standalone == 1) &&
9042 (defaults->values[5 * i + 4] != NULL)) {
9043 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9044 "standalone: attribute %s on %s defaulted from external subset\n",
9045 attname, localname);
9046 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009047 nbdef++;
9048 }
9049 }
9050 }
9051 }
9052
Daniel Veillarde70c8772003-11-25 07:21:18 +00009053 /*
9054 * The attributes checkings
9055 */
9056 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009057 /*
9058 * The default namespace does not apply to attribute names.
9059 */
9060 if (atts[i + 1] != NULL) {
9061 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9062 if (nsname == NULL) {
9063 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9064 "Namespace prefix %s for %s on %s is not defined\n",
9065 atts[i + 1], atts[i], localname);
9066 }
9067 atts[i + 2] = nsname;
9068 } else
9069 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009070 /*
9071 * [ WFC: Unique Att Spec ]
9072 * No attribute name may appear more than once in the same
9073 * start-tag or empty-element tag.
9074 * As extended by the Namespace in XML REC.
9075 */
9076 for (j = 0; j < i;j += 5) {
9077 if (atts[i] == atts[j]) {
9078 if (atts[i+1] == atts[j+1]) {
9079 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9080 break;
9081 }
9082 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9083 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9084 "Namespaced Attribute %s in '%s' redefined\n",
9085 atts[i], nsname, NULL);
9086 break;
9087 }
9088 }
9089 }
9090 }
9091
Daniel Veillarde57ec792003-09-10 10:50:59 +00009092 nsname = xmlGetNamespace(ctxt, prefix);
9093 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009094 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9095 "Namespace prefix %s on %s is not defined\n",
9096 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009097 }
9098 *pref = prefix;
9099 *URI = nsname;
9100
9101 /*
9102 * SAX: Start of Element !
9103 */
9104 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9105 (!ctxt->disableSAX)) {
9106 if (nbNs > 0)
9107 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9108 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9109 nbatts / 5, nbdef, atts);
9110 else
9111 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9112 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9113 }
9114
9115 /*
9116 * Free up attribute allocated strings if needed
9117 */
9118 if (attval != 0) {
9119 for (i = 3,j = 0; j < nratts;i += 5,j++)
9120 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9121 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009122 }
9123
9124 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009125
9126base_changed:
9127 /*
9128 * the attribute strings are valid iif the base didn't changed
9129 */
9130 if (attval != 0) {
9131 for (i = 3,j = 0; j < nratts;i += 5,j++)
9132 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9133 xmlFree((xmlChar *) atts[i]);
9134 }
9135 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009136 ctxt->input->line = oldline;
9137 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009138 if (ctxt->wellFormed == 1) {
9139 goto reparse;
9140 }
9141 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009142}
9143
9144/**
9145 * xmlParseEndTag2:
9146 * @ctxt: an XML parser context
9147 * @line: line of the start tag
9148 * @nsNr: number of namespaces on the start tag
9149 *
9150 * parse an end of tag
9151 *
9152 * [42] ETag ::= '</' Name S? '>'
9153 *
9154 * With namespace
9155 *
9156 * [NS 9] ETag ::= '</' QName S? '>'
9157 */
9158
9159static void
9160xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009161 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009162 const xmlChar *name;
9163
9164 GROW;
9165 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009166 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009167 return;
9168 }
9169 SKIP(2);
9170
William M. Brack13dfa872004-09-18 04:52:08 +00009171 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009172 if (ctxt->input->cur[tlen] == '>') {
9173 ctxt->input->cur += tlen + 1;
9174 goto done;
9175 }
9176 ctxt->input->cur += tlen;
9177 name = (xmlChar*)1;
9178 } else {
9179 if (prefix == NULL)
9180 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9181 else
9182 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9183 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009184
9185 /*
9186 * We should definitely be at the ending "S? '>'" part
9187 */
9188 GROW;
9189 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009190 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009191 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009192 } else
9193 NEXT1;
9194
9195 /*
9196 * [ WFC: Element Type Match ]
9197 * The Name in an element's end-tag must match the element type in the
9198 * start-tag.
9199 *
9200 */
9201 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009202 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009203 if ((line == 0) && (ctxt->node != NULL))
9204 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009205 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009206 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009207 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009208 }
9209
9210 /*
9211 * SAX: End of Tag
9212 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009213done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009214 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9215 (!ctxt->disableSAX))
9216 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9217
Daniel Veillard0fb18932003-09-07 09:14:37 +00009218 spacePop(ctxt);
9219 if (nsNr != 0)
9220 nsPop(ctxt, nsNr);
9221 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009222}
9223
9224/**
Owen Taylor3473f882001-02-23 17:55:21 +00009225 * xmlParseCDSect:
9226 * @ctxt: an XML parser context
9227 *
9228 * Parse escaped pure raw content.
9229 *
9230 * [18] CDSect ::= CDStart CData CDEnd
9231 *
9232 * [19] CDStart ::= '<![CDATA['
9233 *
9234 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9235 *
9236 * [21] CDEnd ::= ']]>'
9237 */
9238void
9239xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9240 xmlChar *buf = NULL;
9241 int len = 0;
9242 int size = XML_PARSER_BUFFER_SIZE;
9243 int r, rl;
9244 int s, sl;
9245 int cur, l;
9246 int count = 0;
9247
Daniel Veillard8f597c32003-10-06 08:19:27 +00009248 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009249 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009250 SKIP(9);
9251 } else
9252 return;
9253
9254 ctxt->instate = XML_PARSER_CDATA_SECTION;
9255 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009256 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009257 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009258 ctxt->instate = XML_PARSER_CONTENT;
9259 return;
9260 }
9261 NEXTL(rl);
9262 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009263 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009264 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009265 ctxt->instate = XML_PARSER_CONTENT;
9266 return;
9267 }
9268 NEXTL(sl);
9269 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009270 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009271 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009272 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009273 return;
9274 }
William M. Brack871611b2003-10-18 04:53:14 +00009275 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009276 ((r != ']') || (s != ']') || (cur != '>'))) {
9277 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009278 xmlChar *tmp;
9279
Owen Taylor3473f882001-02-23 17:55:21 +00009280 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009281 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9282 if (tmp == NULL) {
9283 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009284 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009285 return;
9286 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009287 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009288 }
9289 COPY_BUF(rl,buf,len,r);
9290 r = s;
9291 rl = sl;
9292 s = cur;
9293 sl = l;
9294 count++;
9295 if (count > 50) {
9296 GROW;
9297 count = 0;
9298 }
9299 NEXTL(l);
9300 cur = CUR_CHAR(l);
9301 }
9302 buf[len] = 0;
9303 ctxt->instate = XML_PARSER_CONTENT;
9304 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009305 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009306 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009307 xmlFree(buf);
9308 return;
9309 }
9310 NEXTL(l);
9311
9312 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009313 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009314 */
9315 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9316 if (ctxt->sax->cdataBlock != NULL)
9317 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009318 else if (ctxt->sax->characters != NULL)
9319 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009320 }
9321 xmlFree(buf);
9322}
9323
9324/**
9325 * xmlParseContent:
9326 * @ctxt: an XML parser context
9327 *
9328 * Parse a content:
9329 *
9330 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9331 */
9332
9333void
9334xmlParseContent(xmlParserCtxtPtr ctxt) {
9335 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009336 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009337 ((RAW != '<') || (NXT(1) != '/')) &&
9338 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009339 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009340 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009341 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009342
9343 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009344 * First case : a Processing Instruction.
9345 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009346 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009347 xmlParsePI(ctxt);
9348 }
9349
9350 /*
9351 * Second case : a CDSection
9352 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009353 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009354 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009355 xmlParseCDSect(ctxt);
9356 }
9357
9358 /*
9359 * Third case : a comment
9360 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009361 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009362 (NXT(2) == '-') && (NXT(3) == '-')) {
9363 xmlParseComment(ctxt);
9364 ctxt->instate = XML_PARSER_CONTENT;
9365 }
9366
9367 /*
9368 * Fourth case : a sub-element.
9369 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009370 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009371 xmlParseElement(ctxt);
9372 }
9373
9374 /*
9375 * Fifth case : a reference. If if has not been resolved,
9376 * parsing returns it's Name, create the node
9377 */
9378
Daniel Veillard21a0f912001-02-25 19:54:14 +00009379 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009380 xmlParseReference(ctxt);
9381 }
9382
9383 /*
9384 * Last case, text. Note that References are handled directly.
9385 */
9386 else {
9387 xmlParseCharData(ctxt, 0);
9388 }
9389
9390 GROW;
9391 /*
9392 * Pop-up of finished entities.
9393 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009394 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009395 xmlPopInput(ctxt);
9396 SHRINK;
9397
Daniel Veillardfdc91562002-07-01 21:52:03 +00009398 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009399 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9400 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009401 ctxt->instate = XML_PARSER_EOF;
9402 break;
9403 }
9404 }
9405}
9406
9407/**
9408 * xmlParseElement:
9409 * @ctxt: an XML parser context
9410 *
9411 * parse an XML element, this is highly recursive
9412 *
9413 * [39] element ::= EmptyElemTag | STag content ETag
9414 *
9415 * [ WFC: Element Type Match ]
9416 * The Name in an element's end-tag must match the element type in the
9417 * start-tag.
9418 *
Owen Taylor3473f882001-02-23 17:55:21 +00009419 */
9420
9421void
9422xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009423 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009424 const xmlChar *prefix = NULL;
9425 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009426 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009427 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009428 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009429 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009430
Daniel Veillard8915c152008-08-26 13:05:34 +00009431 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9432 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9433 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9434 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9435 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009436 ctxt->instate = XML_PARSER_EOF;
9437 return;
9438 }
9439
Owen Taylor3473f882001-02-23 17:55:21 +00009440 /* Capture start position */
9441 if (ctxt->record_info) {
9442 node_info.begin_pos = ctxt->input->consumed +
9443 (CUR_PTR - ctxt->input->base);
9444 node_info.begin_line = ctxt->input->line;
9445 }
9446
9447 if (ctxt->spaceNr == 0)
9448 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009449 else if (*ctxt->space == -2)
9450 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009451 else
9452 spacePush(ctxt, *ctxt->space);
9453
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009454 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009455#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009456 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009457#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009458 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009459#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009460 else
9461 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009462#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009463 if (name == NULL) {
9464 spacePop(ctxt);
9465 return;
9466 }
9467 namePush(ctxt, name);
9468 ret = ctxt->node;
9469
Daniel Veillard4432df22003-09-28 18:58:27 +00009470#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009471 /*
9472 * [ VC: Root Element Type ]
9473 * The Name in the document type declaration must match the element
9474 * type of the root element.
9475 */
9476 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9477 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9478 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009479#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009480
9481 /*
9482 * Check for an Empty Element.
9483 */
9484 if ((RAW == '/') && (NXT(1) == '>')) {
9485 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009486 if (ctxt->sax2) {
9487 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9488 (!ctxt->disableSAX))
9489 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009490#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009491 } else {
9492 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9493 (!ctxt->disableSAX))
9494 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009495#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009496 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009497 namePop(ctxt);
9498 spacePop(ctxt);
9499 if (nsNr != ctxt->nsNr)
9500 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009501 if ( ret != NULL && ctxt->record_info ) {
9502 node_info.end_pos = ctxt->input->consumed +
9503 (CUR_PTR - ctxt->input->base);
9504 node_info.end_line = ctxt->input->line;
9505 node_info.node = ret;
9506 xmlParserAddNodeInfo(ctxt, &node_info);
9507 }
9508 return;
9509 }
9510 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009511 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009512 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009513 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9514 "Couldn't find end of Start Tag %s line %d\n",
9515 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009516
9517 /*
9518 * end of parsing of this node.
9519 */
9520 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009521 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009522 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009523 if (nsNr != ctxt->nsNr)
9524 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009525
9526 /*
9527 * Capture end position and add node
9528 */
9529 if ( ret != NULL && ctxt->record_info ) {
9530 node_info.end_pos = ctxt->input->consumed +
9531 (CUR_PTR - ctxt->input->base);
9532 node_info.end_line = ctxt->input->line;
9533 node_info.node = ret;
9534 xmlParserAddNodeInfo(ctxt, &node_info);
9535 }
9536 return;
9537 }
9538
9539 /*
9540 * Parse the content of the element:
9541 */
9542 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009543 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009544 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009545 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009546 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009547
9548 /*
9549 * end of parsing of this node.
9550 */
9551 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009552 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009553 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009554 if (nsNr != ctxt->nsNr)
9555 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009556 return;
9557 }
9558
9559 /*
9560 * parse the end of tag: '</' should be here.
9561 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009562 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009563 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009564 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009565 }
9566#ifdef LIBXML_SAX1_ENABLED
9567 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009568 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009569#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009570
9571 /*
9572 * Capture end position and add node
9573 */
9574 if ( ret != NULL && ctxt->record_info ) {
9575 node_info.end_pos = ctxt->input->consumed +
9576 (CUR_PTR - ctxt->input->base);
9577 node_info.end_line = ctxt->input->line;
9578 node_info.node = ret;
9579 xmlParserAddNodeInfo(ctxt, &node_info);
9580 }
9581}
9582
9583/**
9584 * xmlParseVersionNum:
9585 * @ctxt: an XML parser context
9586 *
9587 * parse the XML version value.
9588 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009589 * [26] VersionNum ::= '1.' [0-9]+
9590 *
9591 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009592 *
9593 * Returns the string giving the XML version number, or NULL
9594 */
9595xmlChar *
9596xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9597 xmlChar *buf = NULL;
9598 int len = 0;
9599 int size = 10;
9600 xmlChar cur;
9601
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009602 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009603 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009604 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009605 return(NULL);
9606 }
9607 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009608 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009609 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009610 return(NULL);
9611 }
9612 buf[len++] = cur;
9613 NEXT;
9614 cur=CUR;
9615 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009616 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009617 return(NULL);
9618 }
9619 buf[len++] = cur;
9620 NEXT;
9621 cur=CUR;
9622 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009623 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009624 xmlChar *tmp;
9625
Owen Taylor3473f882001-02-23 17:55:21 +00009626 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009627 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9628 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009629 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009630 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009631 return(NULL);
9632 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009633 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009634 }
9635 buf[len++] = cur;
9636 NEXT;
9637 cur=CUR;
9638 }
9639 buf[len] = 0;
9640 return(buf);
9641}
9642
9643/**
9644 * xmlParseVersionInfo:
9645 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009646 *
Owen Taylor3473f882001-02-23 17:55:21 +00009647 * parse the XML version.
9648 *
9649 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009650 *
Owen Taylor3473f882001-02-23 17:55:21 +00009651 * [25] Eq ::= S? '=' S?
9652 *
9653 * Returns the version string, e.g. "1.0"
9654 */
9655
9656xmlChar *
9657xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9658 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009659
Daniel Veillarda07050d2003-10-19 14:46:32 +00009660 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009661 SKIP(7);
9662 SKIP_BLANKS;
9663 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009664 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009665 return(NULL);
9666 }
9667 NEXT;
9668 SKIP_BLANKS;
9669 if (RAW == '"') {
9670 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009671 version = xmlParseVersionNum(ctxt);
9672 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009673 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009674 } else
9675 NEXT;
9676 } else if (RAW == '\''){
9677 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009678 version = xmlParseVersionNum(ctxt);
9679 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009680 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009681 } else
9682 NEXT;
9683 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009684 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009685 }
9686 }
9687 return(version);
9688}
9689
9690/**
9691 * xmlParseEncName:
9692 * @ctxt: an XML parser context
9693 *
9694 * parse the XML encoding name
9695 *
9696 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9697 *
9698 * Returns the encoding name value or NULL
9699 */
9700xmlChar *
9701xmlParseEncName(xmlParserCtxtPtr ctxt) {
9702 xmlChar *buf = NULL;
9703 int len = 0;
9704 int size = 10;
9705 xmlChar cur;
9706
9707 cur = CUR;
9708 if (((cur >= 'a') && (cur <= 'z')) ||
9709 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009710 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009711 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009712 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009713 return(NULL);
9714 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009715
Owen Taylor3473f882001-02-23 17:55:21 +00009716 buf[len++] = cur;
9717 NEXT;
9718 cur = CUR;
9719 while (((cur >= 'a') && (cur <= 'z')) ||
9720 ((cur >= 'A') && (cur <= 'Z')) ||
9721 ((cur >= '0') && (cur <= '9')) ||
9722 (cur == '.') || (cur == '_') ||
9723 (cur == '-')) {
9724 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009725 xmlChar *tmp;
9726
Owen Taylor3473f882001-02-23 17:55:21 +00009727 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009728 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9729 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009730 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009731 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009732 return(NULL);
9733 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009734 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009735 }
9736 buf[len++] = cur;
9737 NEXT;
9738 cur = CUR;
9739 if (cur == 0) {
9740 SHRINK;
9741 GROW;
9742 cur = CUR;
9743 }
9744 }
9745 buf[len] = 0;
9746 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009747 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009748 }
9749 return(buf);
9750}
9751
9752/**
9753 * xmlParseEncodingDecl:
9754 * @ctxt: an XML parser context
9755 *
9756 * parse the XML encoding declaration
9757 *
9758 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9759 *
9760 * this setups the conversion filters.
9761 *
9762 * Returns the encoding value or NULL
9763 */
9764
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009765const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009766xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9767 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009768
9769 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009770 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009771 SKIP(8);
9772 SKIP_BLANKS;
9773 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009774 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009775 return(NULL);
9776 }
9777 NEXT;
9778 SKIP_BLANKS;
9779 if (RAW == '"') {
9780 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009781 encoding = xmlParseEncName(ctxt);
9782 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009783 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009784 } else
9785 NEXT;
9786 } else if (RAW == '\''){
9787 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009788 encoding = xmlParseEncName(ctxt);
9789 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009790 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009791 } else
9792 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009793 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009794 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009795 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009796 /*
9797 * UTF-16 encoding stwich has already taken place at this stage,
9798 * more over the little-endian/big-endian selection is already done
9799 */
9800 if ((encoding != NULL) &&
9801 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9802 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009803 /*
9804 * If no encoding was passed to the parser, that we are
9805 * using UTF-16 and no decoder is present i.e. the
9806 * document is apparently UTF-8 compatible, then raise an
9807 * encoding mismatch fatal error
9808 */
9809 if ((ctxt->encoding == NULL) &&
9810 (ctxt->input->buf != NULL) &&
9811 (ctxt->input->buf->encoder == NULL)) {
9812 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9813 "Document labelled UTF-16 but has UTF-8 content\n");
9814 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009815 if (ctxt->encoding != NULL)
9816 xmlFree((xmlChar *) ctxt->encoding);
9817 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009818 }
9819 /*
9820 * UTF-8 encoding is handled natively
9821 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009822 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009823 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9824 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009825 if (ctxt->encoding != NULL)
9826 xmlFree((xmlChar *) ctxt->encoding);
9827 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009828 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009829 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009830 xmlCharEncodingHandlerPtr handler;
9831
9832 if (ctxt->input->encoding != NULL)
9833 xmlFree((xmlChar *) ctxt->input->encoding);
9834 ctxt->input->encoding = encoding;
9835
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009836 handler = xmlFindCharEncodingHandler((const char *) encoding);
9837 if (handler != NULL) {
9838 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009839 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009840 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009841 "Unsupported encoding %s\n", encoding);
9842 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009843 }
9844 }
9845 }
9846 return(encoding);
9847}
9848
9849/**
9850 * xmlParseSDDecl:
9851 * @ctxt: an XML parser context
9852 *
9853 * parse the XML standalone declaration
9854 *
9855 * [32] SDDecl ::= S 'standalone' Eq
9856 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9857 *
9858 * [ VC: Standalone Document Declaration ]
9859 * TODO The standalone document declaration must have the value "no"
9860 * if any external markup declarations contain declarations of:
9861 * - attributes with default values, if elements to which these
9862 * attributes apply appear in the document without specifications
9863 * of values for these attributes, or
9864 * - entities (other than amp, lt, gt, apos, quot), if references
9865 * to those entities appear in the document, or
9866 * - attributes with values subject to normalization, where the
9867 * attribute appears in the document with a value which will change
9868 * as a result of normalization, or
9869 * - element types with element content, if white space occurs directly
9870 * within any instance of those types.
9871 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009872 * Returns:
9873 * 1 if standalone="yes"
9874 * 0 if standalone="no"
9875 * -2 if standalone attribute is missing or invalid
9876 * (A standalone value of -2 means that the XML declaration was found,
9877 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009878 */
9879
9880int
9881xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009882 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009883
9884 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009885 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009886 SKIP(10);
9887 SKIP_BLANKS;
9888 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009889 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009890 return(standalone);
9891 }
9892 NEXT;
9893 SKIP_BLANKS;
9894 if (RAW == '\''){
9895 NEXT;
9896 if ((RAW == 'n') && (NXT(1) == 'o')) {
9897 standalone = 0;
9898 SKIP(2);
9899 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9900 (NXT(2) == 's')) {
9901 standalone = 1;
9902 SKIP(3);
9903 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009904 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009905 }
9906 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009907 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009908 } else
9909 NEXT;
9910 } else if (RAW == '"'){
9911 NEXT;
9912 if ((RAW == 'n') && (NXT(1) == 'o')) {
9913 standalone = 0;
9914 SKIP(2);
9915 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9916 (NXT(2) == 's')) {
9917 standalone = 1;
9918 SKIP(3);
9919 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009920 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009921 }
9922 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009923 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009924 } else
9925 NEXT;
9926 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009927 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009928 }
9929 }
9930 return(standalone);
9931}
9932
9933/**
9934 * xmlParseXMLDecl:
9935 * @ctxt: an XML parser context
9936 *
9937 * parse an XML declaration header
9938 *
9939 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9940 */
9941
9942void
9943xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9944 xmlChar *version;
9945
9946 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009947 * This value for standalone indicates that the document has an
9948 * XML declaration but it does not have a standalone attribute.
9949 * It will be overwritten later if a standalone attribute is found.
9950 */
9951 ctxt->input->standalone = -2;
9952
9953 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009954 * We know that '<?xml' is here.
9955 */
9956 SKIP(5);
9957
William M. Brack76e95df2003-10-18 16:20:14 +00009958 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9960 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009961 }
9962 SKIP_BLANKS;
9963
9964 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009965 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009966 */
9967 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009968 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009969 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009970 } else {
9971 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9972 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009973 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009974 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009975 if (ctxt->options & XML_PARSE_OLD10) {
9976 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9977 "Unsupported version '%s'\n",
9978 version);
9979 } else {
9980 if ((version[0] == '1') && ((version[1] == '.'))) {
9981 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9982 "Unsupported version '%s'\n",
9983 version, NULL);
9984 } else {
9985 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9986 "Unsupported version '%s'\n",
9987 version);
9988 }
9989 }
Daniel Veillard19840942001-11-29 16:11:38 +00009990 }
9991 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009992 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009993 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009994 }
Owen Taylor3473f882001-02-23 17:55:21 +00009995
9996 /*
9997 * We may have the encoding declaration
9998 */
William M. Brack76e95df2003-10-18 16:20:14 +00009999 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010000 if ((RAW == '?') && (NXT(1) == '>')) {
10001 SKIP(2);
10002 return;
10003 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010004 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010005 }
10006 xmlParseEncodingDecl(ctxt);
10007 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10008 /*
10009 * The XML REC instructs us to stop parsing right here
10010 */
10011 return;
10012 }
10013
10014 /*
10015 * We may have the standalone status.
10016 */
William M. Brack76e95df2003-10-18 16:20:14 +000010017 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010018 if ((RAW == '?') && (NXT(1) == '>')) {
10019 SKIP(2);
10020 return;
10021 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010023 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010024
10025 /*
10026 * We can grow the input buffer freely at that point
10027 */
10028 GROW;
10029
Owen Taylor3473f882001-02-23 17:55:21 +000010030 SKIP_BLANKS;
10031 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10032
10033 SKIP_BLANKS;
10034 if ((RAW == '?') && (NXT(1) == '>')) {
10035 SKIP(2);
10036 } else if (RAW == '>') {
10037 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010038 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010039 NEXT;
10040 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010041 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010042 MOVETO_ENDTAG(CUR_PTR);
10043 NEXT;
10044 }
10045}
10046
10047/**
10048 * xmlParseMisc:
10049 * @ctxt: an XML parser context
10050 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010051 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010052 *
10053 * [27] Misc ::= Comment | PI | S
10054 */
10055
10056void
10057xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010058 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010059 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010060 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010061 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010062 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010063 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010064 NEXT;
10065 } else
10066 xmlParseComment(ctxt);
10067 }
10068}
10069
10070/**
10071 * xmlParseDocument:
10072 * @ctxt: an XML parser context
10073 *
10074 * parse an XML document (and build a tree if using the standard SAX
10075 * interface).
10076 *
10077 * [1] document ::= prolog element Misc*
10078 *
10079 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10080 *
10081 * Returns 0, -1 in case of error. the parser context is augmented
10082 * as a result of the parsing.
10083 */
10084
10085int
10086xmlParseDocument(xmlParserCtxtPtr ctxt) {
10087 xmlChar start[4];
10088 xmlCharEncoding enc;
10089
10090 xmlInitParser();
10091
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010092 if ((ctxt == NULL) || (ctxt->input == NULL))
10093 return(-1);
10094
Owen Taylor3473f882001-02-23 17:55:21 +000010095 GROW;
10096
10097 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010098 * SAX: detecting the level.
10099 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010100 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010101
10102 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010103 * SAX: beginning of the document processing.
10104 */
10105 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10106 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10107
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010108 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10109 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010110 /*
10111 * Get the 4 first bytes and decode the charset
10112 * if enc != XML_CHAR_ENCODING_NONE
10113 * plug some encoding conversion routines.
10114 */
10115 start[0] = RAW;
10116 start[1] = NXT(1);
10117 start[2] = NXT(2);
10118 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010119 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010120 if (enc != XML_CHAR_ENCODING_NONE) {
10121 xmlSwitchEncoding(ctxt, enc);
10122 }
Owen Taylor3473f882001-02-23 17:55:21 +000010123 }
10124
10125
10126 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010127 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010128 }
10129
10130 /*
10131 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010132 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010133 * than just the first line, unless the amount of data is really
10134 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010135 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010136 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10137 GROW;
10138 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010139 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010140
10141 /*
10142 * Note that we will switch encoding on the fly.
10143 */
10144 xmlParseXMLDecl(ctxt);
10145 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10146 /*
10147 * The XML REC instructs us to stop parsing right here
10148 */
10149 return(-1);
10150 }
10151 ctxt->standalone = ctxt->input->standalone;
10152 SKIP_BLANKS;
10153 } else {
10154 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10155 }
10156 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10157 ctxt->sax->startDocument(ctxt->userData);
10158
10159 /*
10160 * The Misc part of the Prolog
10161 */
10162 GROW;
10163 xmlParseMisc(ctxt);
10164
10165 /*
10166 * Then possibly doc type declaration(s) and more Misc
10167 * (doctypedecl Misc*)?
10168 */
10169 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010170 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010171
10172 ctxt->inSubset = 1;
10173 xmlParseDocTypeDecl(ctxt);
10174 if (RAW == '[') {
10175 ctxt->instate = XML_PARSER_DTD;
10176 xmlParseInternalSubset(ctxt);
10177 }
10178
10179 /*
10180 * Create and update the external subset.
10181 */
10182 ctxt->inSubset = 2;
10183 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10184 (!ctxt->disableSAX))
10185 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10186 ctxt->extSubSystem, ctxt->extSubURI);
10187 ctxt->inSubset = 0;
10188
Daniel Veillardac4118d2008-01-11 05:27:32 +000010189 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010190
10191 ctxt->instate = XML_PARSER_PROLOG;
10192 xmlParseMisc(ctxt);
10193 }
10194
10195 /*
10196 * Time to start parsing the tree itself
10197 */
10198 GROW;
10199 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010200 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10201 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010202 } else {
10203 ctxt->instate = XML_PARSER_CONTENT;
10204 xmlParseElement(ctxt);
10205 ctxt->instate = XML_PARSER_EPILOG;
10206
10207
10208 /*
10209 * The Misc part at the end
10210 */
10211 xmlParseMisc(ctxt);
10212
Daniel Veillard561b7f82002-03-20 21:55:57 +000010213 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010214 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010215 }
10216 ctxt->instate = XML_PARSER_EOF;
10217 }
10218
10219 /*
10220 * SAX: end of the document processing.
10221 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010222 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010223 ctxt->sax->endDocument(ctxt->userData);
10224
Daniel Veillard5997aca2002-03-18 18:36:20 +000010225 /*
10226 * Remove locally kept entity definitions if the tree was not built
10227 */
10228 if ((ctxt->myDoc != NULL) &&
10229 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10230 xmlFreeDoc(ctxt->myDoc);
10231 ctxt->myDoc = NULL;
10232 }
10233
Daniel Veillardae0765b2008-07-31 19:54:59 +000010234 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10235 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10236 if (ctxt->valid)
10237 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10238 if (ctxt->nsWellFormed)
10239 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10240 if (ctxt->options & XML_PARSE_OLD10)
10241 ctxt->myDoc->properties |= XML_DOC_OLD10;
10242 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010243 if (! ctxt->wellFormed) {
10244 ctxt->valid = 0;
10245 return(-1);
10246 }
Owen Taylor3473f882001-02-23 17:55:21 +000010247 return(0);
10248}
10249
10250/**
10251 * xmlParseExtParsedEnt:
10252 * @ctxt: an XML parser context
10253 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010254 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010255 * An external general parsed entity is well-formed if it matches the
10256 * production labeled extParsedEnt.
10257 *
10258 * [78] extParsedEnt ::= TextDecl? content
10259 *
10260 * Returns 0, -1 in case of error. the parser context is augmented
10261 * as a result of the parsing.
10262 */
10263
10264int
10265xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10266 xmlChar start[4];
10267 xmlCharEncoding enc;
10268
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010269 if ((ctxt == NULL) || (ctxt->input == NULL))
10270 return(-1);
10271
Owen Taylor3473f882001-02-23 17:55:21 +000010272 xmlDefaultSAXHandlerInit();
10273
Daniel Veillard309f81d2003-09-23 09:02:53 +000010274 xmlDetectSAX2(ctxt);
10275
Owen Taylor3473f882001-02-23 17:55:21 +000010276 GROW;
10277
10278 /*
10279 * SAX: beginning of the document processing.
10280 */
10281 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10282 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10283
10284 /*
10285 * Get the 4 first bytes and decode the charset
10286 * if enc != XML_CHAR_ENCODING_NONE
10287 * plug some encoding conversion routines.
10288 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010289 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10290 start[0] = RAW;
10291 start[1] = NXT(1);
10292 start[2] = NXT(2);
10293 start[3] = NXT(3);
10294 enc = xmlDetectCharEncoding(start, 4);
10295 if (enc != XML_CHAR_ENCODING_NONE) {
10296 xmlSwitchEncoding(ctxt, enc);
10297 }
Owen Taylor3473f882001-02-23 17:55:21 +000010298 }
10299
10300
10301 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010302 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010303 }
10304
10305 /*
10306 * Check for the XMLDecl in the Prolog.
10307 */
10308 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010309 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010310
10311 /*
10312 * Note that we will switch encoding on the fly.
10313 */
10314 xmlParseXMLDecl(ctxt);
10315 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10316 /*
10317 * The XML REC instructs us to stop parsing right here
10318 */
10319 return(-1);
10320 }
10321 SKIP_BLANKS;
10322 } else {
10323 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10324 }
10325 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10326 ctxt->sax->startDocument(ctxt->userData);
10327
10328 /*
10329 * Doing validity checking on chunk doesn't make sense
10330 */
10331 ctxt->instate = XML_PARSER_CONTENT;
10332 ctxt->validate = 0;
10333 ctxt->loadsubset = 0;
10334 ctxt->depth = 0;
10335
10336 xmlParseContent(ctxt);
10337
10338 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010339 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010340 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010341 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010342 }
10343
10344 /*
10345 * SAX: end of the document processing.
10346 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010347 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010348 ctxt->sax->endDocument(ctxt->userData);
10349
10350 if (! ctxt->wellFormed) return(-1);
10351 return(0);
10352}
10353
Daniel Veillard73b013f2003-09-30 12:36:01 +000010354#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010355/************************************************************************
10356 * *
10357 * Progressive parsing interfaces *
10358 * *
10359 ************************************************************************/
10360
10361/**
10362 * xmlParseLookupSequence:
10363 * @ctxt: an XML parser context
10364 * @first: the first char to lookup
10365 * @next: the next char to lookup or zero
10366 * @third: the next char to lookup or zero
10367 *
10368 * Try to find if a sequence (first, next, third) or just (first next) or
10369 * (first) is available in the input stream.
10370 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10371 * to avoid rescanning sequences of bytes, it DOES change the state of the
10372 * parser, do not use liberally.
10373 *
10374 * Returns the index to the current parsing point if the full sequence
10375 * is available, -1 otherwise.
10376 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010377static int
Owen Taylor3473f882001-02-23 17:55:21 +000010378xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10379 xmlChar next, xmlChar third) {
10380 int base, len;
10381 xmlParserInputPtr in;
10382 const xmlChar *buf;
10383
10384 in = ctxt->input;
10385 if (in == NULL) return(-1);
10386 base = in->cur - in->base;
10387 if (base < 0) return(-1);
10388 if (ctxt->checkIndex > base)
10389 base = ctxt->checkIndex;
10390 if (in->buf == NULL) {
10391 buf = in->base;
10392 len = in->length;
10393 } else {
10394 buf = in->buf->buffer->content;
10395 len = in->buf->buffer->use;
10396 }
10397 /* take into account the sequence length */
10398 if (third) len -= 2;
10399 else if (next) len --;
10400 for (;base < len;base++) {
10401 if (buf[base] == first) {
10402 if (third != 0) {
10403 if ((buf[base + 1] != next) ||
10404 (buf[base + 2] != third)) continue;
10405 } else if (next != 0) {
10406 if (buf[base + 1] != next) continue;
10407 }
10408 ctxt->checkIndex = 0;
10409#ifdef DEBUG_PUSH
10410 if (next == 0)
10411 xmlGenericError(xmlGenericErrorContext,
10412 "PP: lookup '%c' found at %d\n",
10413 first, base);
10414 else if (third == 0)
10415 xmlGenericError(xmlGenericErrorContext,
10416 "PP: lookup '%c%c' found at %d\n",
10417 first, next, base);
10418 else
10419 xmlGenericError(xmlGenericErrorContext,
10420 "PP: lookup '%c%c%c' found at %d\n",
10421 first, next, third, base);
10422#endif
10423 return(base - (in->cur - in->base));
10424 }
10425 }
10426 ctxt->checkIndex = base;
10427#ifdef DEBUG_PUSH
10428 if (next == 0)
10429 xmlGenericError(xmlGenericErrorContext,
10430 "PP: lookup '%c' failed\n", first);
10431 else if (third == 0)
10432 xmlGenericError(xmlGenericErrorContext,
10433 "PP: lookup '%c%c' failed\n", first, next);
10434 else
10435 xmlGenericError(xmlGenericErrorContext,
10436 "PP: lookup '%c%c%c' failed\n", first, next, third);
10437#endif
10438 return(-1);
10439}
10440
10441/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010442 * xmlParseGetLasts:
10443 * @ctxt: an XML parser context
10444 * @lastlt: pointer to store the last '<' from the input
10445 * @lastgt: pointer to store the last '>' from the input
10446 *
10447 * Lookup the last < and > in the current chunk
10448 */
10449static void
10450xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10451 const xmlChar **lastgt) {
10452 const xmlChar *tmp;
10453
10454 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10455 xmlGenericError(xmlGenericErrorContext,
10456 "Internal error: xmlParseGetLasts\n");
10457 return;
10458 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010459 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010460 tmp = ctxt->input->end;
10461 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010462 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010463 if (tmp < ctxt->input->base) {
10464 *lastlt = NULL;
10465 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010466 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010467 *lastlt = tmp;
10468 tmp++;
10469 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10470 if (*tmp == '\'') {
10471 tmp++;
10472 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10473 if (tmp < ctxt->input->end) tmp++;
10474 } else if (*tmp == '"') {
10475 tmp++;
10476 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10477 if (tmp < ctxt->input->end) tmp++;
10478 } else
10479 tmp++;
10480 }
10481 if (tmp < ctxt->input->end)
10482 *lastgt = tmp;
10483 else {
10484 tmp = *lastlt;
10485 tmp--;
10486 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10487 if (tmp >= ctxt->input->base)
10488 *lastgt = tmp;
10489 else
10490 *lastgt = NULL;
10491 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010492 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010493 } else {
10494 *lastlt = NULL;
10495 *lastgt = NULL;
10496 }
10497}
10498/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010499 * xmlCheckCdataPush:
10500 * @cur: pointer to the bock of characters
10501 * @len: length of the block in bytes
10502 *
10503 * Check that the block of characters is okay as SCdata content [20]
10504 *
10505 * Returns the number of bytes to pass if okay, a negative index where an
10506 * UTF-8 error occured otherwise
10507 */
10508static int
10509xmlCheckCdataPush(const xmlChar *utf, int len) {
10510 int ix;
10511 unsigned char c;
10512 int codepoint;
10513
10514 if ((utf == NULL) || (len <= 0))
10515 return(0);
10516
10517 for (ix = 0; ix < len;) { /* string is 0-terminated */
10518 c = utf[ix];
10519 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10520 if (c >= 0x20)
10521 ix++;
10522 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10523 ix++;
10524 else
10525 return(-ix);
10526 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10527 if (ix + 2 > len) return(ix);
10528 if ((utf[ix+1] & 0xc0 ) != 0x80)
10529 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010530 codepoint = (utf[ix] & 0x1f) << 6;
10531 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010532 if (!xmlIsCharQ(codepoint))
10533 return(-ix);
10534 ix += 2;
10535 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10536 if (ix + 3 > len) return(ix);
10537 if (((utf[ix+1] & 0xc0) != 0x80) ||
10538 ((utf[ix+2] & 0xc0) != 0x80))
10539 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010540 codepoint = (utf[ix] & 0xf) << 12;
10541 codepoint |= (utf[ix+1] & 0x3f) << 6;
10542 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010543 if (!xmlIsCharQ(codepoint))
10544 return(-ix);
10545 ix += 3;
10546 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10547 if (ix + 4 > len) return(ix);
10548 if (((utf[ix+1] & 0xc0) != 0x80) ||
10549 ((utf[ix+2] & 0xc0) != 0x80) ||
10550 ((utf[ix+3] & 0xc0) != 0x80))
10551 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010552 codepoint = (utf[ix] & 0x7) << 18;
10553 codepoint |= (utf[ix+1] & 0x3f) << 12;
10554 codepoint |= (utf[ix+2] & 0x3f) << 6;
10555 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010556 if (!xmlIsCharQ(codepoint))
10557 return(-ix);
10558 ix += 4;
10559 } else /* unknown encoding */
10560 return(-ix);
10561 }
10562 return(ix);
10563}
10564
10565/**
Owen Taylor3473f882001-02-23 17:55:21 +000010566 * xmlParseTryOrFinish:
10567 * @ctxt: an XML parser context
10568 * @terminate: last chunk indicator
10569 *
10570 * Try to progress on parsing
10571 *
10572 * Returns zero if no parsing was possible
10573 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010574static int
Owen Taylor3473f882001-02-23 17:55:21 +000010575xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10576 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010577 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010578 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010579 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010580
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010581 if (ctxt->input == NULL)
10582 return(0);
10583
Owen Taylor3473f882001-02-23 17:55:21 +000010584#ifdef DEBUG_PUSH
10585 switch (ctxt->instate) {
10586 case XML_PARSER_EOF:
10587 xmlGenericError(xmlGenericErrorContext,
10588 "PP: try EOF\n"); break;
10589 case XML_PARSER_START:
10590 xmlGenericError(xmlGenericErrorContext,
10591 "PP: try START\n"); break;
10592 case XML_PARSER_MISC:
10593 xmlGenericError(xmlGenericErrorContext,
10594 "PP: try MISC\n");break;
10595 case XML_PARSER_COMMENT:
10596 xmlGenericError(xmlGenericErrorContext,
10597 "PP: try COMMENT\n");break;
10598 case XML_PARSER_PROLOG:
10599 xmlGenericError(xmlGenericErrorContext,
10600 "PP: try PROLOG\n");break;
10601 case XML_PARSER_START_TAG:
10602 xmlGenericError(xmlGenericErrorContext,
10603 "PP: try START_TAG\n");break;
10604 case XML_PARSER_CONTENT:
10605 xmlGenericError(xmlGenericErrorContext,
10606 "PP: try CONTENT\n");break;
10607 case XML_PARSER_CDATA_SECTION:
10608 xmlGenericError(xmlGenericErrorContext,
10609 "PP: try CDATA_SECTION\n");break;
10610 case XML_PARSER_END_TAG:
10611 xmlGenericError(xmlGenericErrorContext,
10612 "PP: try END_TAG\n");break;
10613 case XML_PARSER_ENTITY_DECL:
10614 xmlGenericError(xmlGenericErrorContext,
10615 "PP: try ENTITY_DECL\n");break;
10616 case XML_PARSER_ENTITY_VALUE:
10617 xmlGenericError(xmlGenericErrorContext,
10618 "PP: try ENTITY_VALUE\n");break;
10619 case XML_PARSER_ATTRIBUTE_VALUE:
10620 xmlGenericError(xmlGenericErrorContext,
10621 "PP: try ATTRIBUTE_VALUE\n");break;
10622 case XML_PARSER_DTD:
10623 xmlGenericError(xmlGenericErrorContext,
10624 "PP: try DTD\n");break;
10625 case XML_PARSER_EPILOG:
10626 xmlGenericError(xmlGenericErrorContext,
10627 "PP: try EPILOG\n");break;
10628 case XML_PARSER_PI:
10629 xmlGenericError(xmlGenericErrorContext,
10630 "PP: try PI\n");break;
10631 case XML_PARSER_IGNORE:
10632 xmlGenericError(xmlGenericErrorContext,
10633 "PP: try IGNORE\n");break;
10634 }
10635#endif
10636
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010637 if ((ctxt->input != NULL) &&
10638 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010639 xmlSHRINK(ctxt);
10640 ctxt->checkIndex = 0;
10641 }
10642 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010643
Daniel Veillarda880b122003-04-21 21:36:41 +000010644 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010645 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010646 return(0);
10647
10648
Owen Taylor3473f882001-02-23 17:55:21 +000010649 /*
10650 * Pop-up of finished entities.
10651 */
10652 while ((RAW == 0) && (ctxt->inputNr > 1))
10653 xmlPopInput(ctxt);
10654
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010655 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010656 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010657 avail = ctxt->input->length -
10658 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010659 else {
10660 /*
10661 * If we are operating on converted input, try to flush
10662 * remainng chars to avoid them stalling in the non-converted
10663 * buffer.
10664 */
10665 if ((ctxt->input->buf->raw != NULL) &&
10666 (ctxt->input->buf->raw->use > 0)) {
10667 int base = ctxt->input->base -
10668 ctxt->input->buf->buffer->content;
10669 int current = ctxt->input->cur - ctxt->input->base;
10670
10671 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10672 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10673 ctxt->input->cur = ctxt->input->base + current;
10674 ctxt->input->end =
10675 &ctxt->input->buf->buffer->content[
10676 ctxt->input->buf->buffer->use];
10677 }
10678 avail = ctxt->input->buf->buffer->use -
10679 (ctxt->input->cur - ctxt->input->base);
10680 }
Owen Taylor3473f882001-02-23 17:55:21 +000010681 if (avail < 1)
10682 goto done;
10683 switch (ctxt->instate) {
10684 case XML_PARSER_EOF:
10685 /*
10686 * Document parsing is done !
10687 */
10688 goto done;
10689 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010690 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10691 xmlChar start[4];
10692 xmlCharEncoding enc;
10693
10694 /*
10695 * Very first chars read from the document flow.
10696 */
10697 if (avail < 4)
10698 goto done;
10699
10700 /*
10701 * Get the 4 first bytes and decode the charset
10702 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010703 * plug some encoding conversion routines,
10704 * else xmlSwitchEncoding will set to (default)
10705 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010706 */
10707 start[0] = RAW;
10708 start[1] = NXT(1);
10709 start[2] = NXT(2);
10710 start[3] = NXT(3);
10711 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010712 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010713 break;
10714 }
Owen Taylor3473f882001-02-23 17:55:21 +000010715
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010716 if (avail < 2)
10717 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010718 cur = ctxt->input->cur[0];
10719 next = ctxt->input->cur[1];
10720 if (cur == 0) {
10721 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10722 ctxt->sax->setDocumentLocator(ctxt->userData,
10723 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010724 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010725 ctxt->instate = XML_PARSER_EOF;
10726#ifdef DEBUG_PUSH
10727 xmlGenericError(xmlGenericErrorContext,
10728 "PP: entering EOF\n");
10729#endif
10730 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10731 ctxt->sax->endDocument(ctxt->userData);
10732 goto done;
10733 }
10734 if ((cur == '<') && (next == '?')) {
10735 /* PI or XML decl */
10736 if (avail < 5) return(ret);
10737 if ((!terminate) &&
10738 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10739 return(ret);
10740 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10741 ctxt->sax->setDocumentLocator(ctxt->userData,
10742 &xmlDefaultSAXLocator);
10743 if ((ctxt->input->cur[2] == 'x') &&
10744 (ctxt->input->cur[3] == 'm') &&
10745 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010746 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010747 ret += 5;
10748#ifdef DEBUG_PUSH
10749 xmlGenericError(xmlGenericErrorContext,
10750 "PP: Parsing XML Decl\n");
10751#endif
10752 xmlParseXMLDecl(ctxt);
10753 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10754 /*
10755 * The XML REC instructs us to stop parsing right
10756 * here
10757 */
10758 ctxt->instate = XML_PARSER_EOF;
10759 return(0);
10760 }
10761 ctxt->standalone = ctxt->input->standalone;
10762 if ((ctxt->encoding == NULL) &&
10763 (ctxt->input->encoding != NULL))
10764 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10765 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10766 (!ctxt->disableSAX))
10767 ctxt->sax->startDocument(ctxt->userData);
10768 ctxt->instate = XML_PARSER_MISC;
10769#ifdef DEBUG_PUSH
10770 xmlGenericError(xmlGenericErrorContext,
10771 "PP: entering MISC\n");
10772#endif
10773 } else {
10774 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10775 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10776 (!ctxt->disableSAX))
10777 ctxt->sax->startDocument(ctxt->userData);
10778 ctxt->instate = XML_PARSER_MISC;
10779#ifdef DEBUG_PUSH
10780 xmlGenericError(xmlGenericErrorContext,
10781 "PP: entering MISC\n");
10782#endif
10783 }
10784 } else {
10785 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10786 ctxt->sax->setDocumentLocator(ctxt->userData,
10787 &xmlDefaultSAXLocator);
10788 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010789 if (ctxt->version == NULL) {
10790 xmlErrMemory(ctxt, NULL);
10791 break;
10792 }
Owen Taylor3473f882001-02-23 17:55:21 +000010793 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10794 (!ctxt->disableSAX))
10795 ctxt->sax->startDocument(ctxt->userData);
10796 ctxt->instate = XML_PARSER_MISC;
10797#ifdef DEBUG_PUSH
10798 xmlGenericError(xmlGenericErrorContext,
10799 "PP: entering MISC\n");
10800#endif
10801 }
10802 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010803 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010804 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010805 const xmlChar *prefix = NULL;
10806 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010807 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010808
10809 if ((avail < 2) && (ctxt->inputNr == 1))
10810 goto done;
10811 cur = ctxt->input->cur[0];
10812 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010813 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010814 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010815 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10816 ctxt->sax->endDocument(ctxt->userData);
10817 goto done;
10818 }
10819 if (!terminate) {
10820 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010821 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010822 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010823 goto done;
10824 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10825 goto done;
10826 }
10827 }
10828 if (ctxt->spaceNr == 0)
10829 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010830 else if (*ctxt->space == -2)
10831 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010832 else
10833 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010834#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010835 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010836#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010837 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010838#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010839 else
10840 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010841#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010842 if (name == NULL) {
10843 spacePop(ctxt);
10844 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010845 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10846 ctxt->sax->endDocument(ctxt->userData);
10847 goto done;
10848 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010849#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010850 /*
10851 * [ VC: Root Element Type ]
10852 * The Name in the document type declaration must match
10853 * the element type of the root element.
10854 */
10855 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10856 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10857 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010858#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010859
10860 /*
10861 * Check for an Empty Element.
10862 */
10863 if ((RAW == '/') && (NXT(1) == '>')) {
10864 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010865
10866 if (ctxt->sax2) {
10867 if ((ctxt->sax != NULL) &&
10868 (ctxt->sax->endElementNs != NULL) &&
10869 (!ctxt->disableSAX))
10870 ctxt->sax->endElementNs(ctxt->userData, name,
10871 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010872 if (ctxt->nsNr - nsNr > 0)
10873 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010874#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010875 } else {
10876 if ((ctxt->sax != NULL) &&
10877 (ctxt->sax->endElement != NULL) &&
10878 (!ctxt->disableSAX))
10879 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010880#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010881 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010882 spacePop(ctxt);
10883 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010884 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010885 } else {
10886 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010887 }
10888 break;
10889 }
10890 if (RAW == '>') {
10891 NEXT;
10892 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010893 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010894 "Couldn't find end of Start Tag %s\n",
10895 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010896 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010897 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010898 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010899 if (ctxt->sax2)
10900 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010901#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010902 else
10903 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010904#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010905
Daniel Veillarda880b122003-04-21 21:36:41 +000010906 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010907 break;
10908 }
10909 case XML_PARSER_CONTENT: {
10910 const xmlChar *test;
10911 unsigned int cons;
10912 if ((avail < 2) && (ctxt->inputNr == 1))
10913 goto done;
10914 cur = ctxt->input->cur[0];
10915 next = ctxt->input->cur[1];
10916
10917 test = CUR_PTR;
10918 cons = ctxt->input->consumed;
10919 if ((cur == '<') && (next == '/')) {
10920 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010921 break;
10922 } else if ((cur == '<') && (next == '?')) {
10923 if ((!terminate) &&
10924 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10925 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010926 xmlParsePI(ctxt);
10927 } else if ((cur == '<') && (next != '!')) {
10928 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010929 break;
10930 } else if ((cur == '<') && (next == '!') &&
10931 (ctxt->input->cur[2] == '-') &&
10932 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010933 int term;
10934
10935 if (avail < 4)
10936 goto done;
10937 ctxt->input->cur += 4;
10938 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10939 ctxt->input->cur -= 4;
10940 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010941 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010942 xmlParseComment(ctxt);
10943 ctxt->instate = XML_PARSER_CONTENT;
10944 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10945 (ctxt->input->cur[2] == '[') &&
10946 (ctxt->input->cur[3] == 'C') &&
10947 (ctxt->input->cur[4] == 'D') &&
10948 (ctxt->input->cur[5] == 'A') &&
10949 (ctxt->input->cur[6] == 'T') &&
10950 (ctxt->input->cur[7] == 'A') &&
10951 (ctxt->input->cur[8] == '[')) {
10952 SKIP(9);
10953 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010954 break;
10955 } else if ((cur == '<') && (next == '!') &&
10956 (avail < 9)) {
10957 goto done;
10958 } else if (cur == '&') {
10959 if ((!terminate) &&
10960 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10961 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010962 xmlParseReference(ctxt);
10963 } else {
10964 /* TODO Avoid the extra copy, handle directly !!! */
10965 /*
10966 * Goal of the following test is:
10967 * - minimize calls to the SAX 'character' callback
10968 * when they are mergeable
10969 * - handle an problem for isBlank when we only parse
10970 * a sequence of blank chars and the next one is
10971 * not available to check against '<' presence.
10972 * - tries to homogenize the differences in SAX
10973 * callbacks between the push and pull versions
10974 * of the parser.
10975 */
10976 if ((ctxt->inputNr == 1) &&
10977 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10978 if (!terminate) {
10979 if (ctxt->progressive) {
10980 if ((lastlt == NULL) ||
10981 (ctxt->input->cur > lastlt))
10982 goto done;
10983 } else if (xmlParseLookupSequence(ctxt,
10984 '<', 0, 0) < 0) {
10985 goto done;
10986 }
10987 }
10988 }
10989 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010990 xmlParseCharData(ctxt, 0);
10991 }
10992 /*
10993 * Pop-up of finished entities.
10994 */
10995 while ((RAW == 0) && (ctxt->inputNr > 1))
10996 xmlPopInput(ctxt);
10997 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010998 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10999 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011000 ctxt->instate = XML_PARSER_EOF;
11001 break;
11002 }
11003 break;
11004 }
11005 case XML_PARSER_END_TAG:
11006 if (avail < 2)
11007 goto done;
11008 if (!terminate) {
11009 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011010 /* > can be found unescaped in attribute values */
11011 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011012 goto done;
11013 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11014 goto done;
11015 }
11016 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011017 if (ctxt->sax2) {
11018 xmlParseEndTag2(ctxt,
11019 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11020 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011021 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011022 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011023 }
11024#ifdef LIBXML_SAX1_ENABLED
11025 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011026 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011027#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011028 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011029 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011030 } else {
11031 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011032 }
11033 break;
11034 case XML_PARSER_CDATA_SECTION: {
11035 /*
11036 * The Push mode need to have the SAX callback for
11037 * cdataBlock merge back contiguous callbacks.
11038 */
11039 int base;
11040
11041 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11042 if (base < 0) {
11043 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011044 int tmp;
11045
11046 tmp = xmlCheckCdataPush(ctxt->input->cur,
11047 XML_PARSER_BIG_BUFFER_SIZE);
11048 if (tmp < 0) {
11049 tmp = -tmp;
11050 ctxt->input->cur += tmp;
11051 goto encoding_error;
11052 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011053 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11054 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011055 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011056 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011057 else if (ctxt->sax->characters != NULL)
11058 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011059 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011060 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011061 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011062 ctxt->checkIndex = 0;
11063 }
11064 goto done;
11065 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011066 int tmp;
11067
11068 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11069 if ((tmp < 0) || (tmp != base)) {
11070 tmp = -tmp;
11071 ctxt->input->cur += tmp;
11072 goto encoding_error;
11073 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011074 if ((ctxt->sax != NULL) && (base == 0) &&
11075 (ctxt->sax->cdataBlock != NULL) &&
11076 (!ctxt->disableSAX)) {
11077 /*
11078 * Special case to provide identical behaviour
11079 * between pull and push parsers on enpty CDATA
11080 * sections
11081 */
11082 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11083 (!strncmp((const char *)&ctxt->input->cur[-9],
11084 "<![CDATA[", 9)))
11085 ctxt->sax->cdataBlock(ctxt->userData,
11086 BAD_CAST "", 0);
11087 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011088 (!ctxt->disableSAX)) {
11089 if (ctxt->sax->cdataBlock != NULL)
11090 ctxt->sax->cdataBlock(ctxt->userData,
11091 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011092 else if (ctxt->sax->characters != NULL)
11093 ctxt->sax->characters(ctxt->userData,
11094 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011095 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011096 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011097 ctxt->checkIndex = 0;
11098 ctxt->instate = XML_PARSER_CONTENT;
11099#ifdef DEBUG_PUSH
11100 xmlGenericError(xmlGenericErrorContext,
11101 "PP: entering CONTENT\n");
11102#endif
11103 }
11104 break;
11105 }
Owen Taylor3473f882001-02-23 17:55:21 +000011106 case XML_PARSER_MISC:
11107 SKIP_BLANKS;
11108 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011109 avail = ctxt->input->length -
11110 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011111 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011112 avail = ctxt->input->buf->buffer->use -
11113 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011114 if (avail < 2)
11115 goto done;
11116 cur = ctxt->input->cur[0];
11117 next = ctxt->input->cur[1];
11118 if ((cur == '<') && (next == '?')) {
11119 if ((!terminate) &&
11120 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11121 goto done;
11122#ifdef DEBUG_PUSH
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: Parsing PI\n");
11125#endif
11126 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011127 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011128 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011129 (ctxt->input->cur[2] == '-') &&
11130 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011131 if ((!terminate) &&
11132 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11133 goto done;
11134#ifdef DEBUG_PUSH
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: Parsing Comment\n");
11137#endif
11138 xmlParseComment(ctxt);
11139 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011140 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011141 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011142 (ctxt->input->cur[2] == 'D') &&
11143 (ctxt->input->cur[3] == 'O') &&
11144 (ctxt->input->cur[4] == 'C') &&
11145 (ctxt->input->cur[5] == 'T') &&
11146 (ctxt->input->cur[6] == 'Y') &&
11147 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011148 (ctxt->input->cur[8] == 'E')) {
11149 if ((!terminate) &&
11150 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11151 goto done;
11152#ifdef DEBUG_PUSH
11153 xmlGenericError(xmlGenericErrorContext,
11154 "PP: Parsing internal subset\n");
11155#endif
11156 ctxt->inSubset = 1;
11157 xmlParseDocTypeDecl(ctxt);
11158 if (RAW == '[') {
11159 ctxt->instate = XML_PARSER_DTD;
11160#ifdef DEBUG_PUSH
11161 xmlGenericError(xmlGenericErrorContext,
11162 "PP: entering DTD\n");
11163#endif
11164 } else {
11165 /*
11166 * Create and update the external subset.
11167 */
11168 ctxt->inSubset = 2;
11169 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11170 (ctxt->sax->externalSubset != NULL))
11171 ctxt->sax->externalSubset(ctxt->userData,
11172 ctxt->intSubName, ctxt->extSubSystem,
11173 ctxt->extSubURI);
11174 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011175 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011176 ctxt->instate = XML_PARSER_PROLOG;
11177#ifdef DEBUG_PUSH
11178 xmlGenericError(xmlGenericErrorContext,
11179 "PP: entering PROLOG\n");
11180#endif
11181 }
11182 } else if ((cur == '<') && (next == '!') &&
11183 (avail < 9)) {
11184 goto done;
11185 } else {
11186 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011187 ctxt->progressive = 1;
11188 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011189#ifdef DEBUG_PUSH
11190 xmlGenericError(xmlGenericErrorContext,
11191 "PP: entering START_TAG\n");
11192#endif
11193 }
11194 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011195 case XML_PARSER_PROLOG:
11196 SKIP_BLANKS;
11197 if (ctxt->input->buf == NULL)
11198 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11199 else
11200 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11201 if (avail < 2)
11202 goto done;
11203 cur = ctxt->input->cur[0];
11204 next = ctxt->input->cur[1];
11205 if ((cur == '<') && (next == '?')) {
11206 if ((!terminate) &&
11207 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11208 goto done;
11209#ifdef DEBUG_PUSH
11210 xmlGenericError(xmlGenericErrorContext,
11211 "PP: Parsing PI\n");
11212#endif
11213 xmlParsePI(ctxt);
11214 } else if ((cur == '<') && (next == '!') &&
11215 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11216 if ((!terminate) &&
11217 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11218 goto done;
11219#ifdef DEBUG_PUSH
11220 xmlGenericError(xmlGenericErrorContext,
11221 "PP: Parsing Comment\n");
11222#endif
11223 xmlParseComment(ctxt);
11224 ctxt->instate = XML_PARSER_PROLOG;
11225 } else if ((cur == '<') && (next == '!') &&
11226 (avail < 4)) {
11227 goto done;
11228 } else {
11229 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011230 if (ctxt->progressive == 0)
11231 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011232 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011233#ifdef DEBUG_PUSH
11234 xmlGenericError(xmlGenericErrorContext,
11235 "PP: entering START_TAG\n");
11236#endif
11237 }
11238 break;
11239 case XML_PARSER_EPILOG:
11240 SKIP_BLANKS;
11241 if (ctxt->input->buf == NULL)
11242 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11243 else
11244 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11245 if (avail < 2)
11246 goto done;
11247 cur = ctxt->input->cur[0];
11248 next = ctxt->input->cur[1];
11249 if ((cur == '<') && (next == '?')) {
11250 if ((!terminate) &&
11251 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11252 goto done;
11253#ifdef DEBUG_PUSH
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: Parsing PI\n");
11256#endif
11257 xmlParsePI(ctxt);
11258 ctxt->instate = XML_PARSER_EPILOG;
11259 } else if ((cur == '<') && (next == '!') &&
11260 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11261 if ((!terminate) &&
11262 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11263 goto done;
11264#ifdef DEBUG_PUSH
11265 xmlGenericError(xmlGenericErrorContext,
11266 "PP: Parsing Comment\n");
11267#endif
11268 xmlParseComment(ctxt);
11269 ctxt->instate = XML_PARSER_EPILOG;
11270 } else if ((cur == '<') && (next == '!') &&
11271 (avail < 4)) {
11272 goto done;
11273 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011274 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011275 ctxt->instate = XML_PARSER_EOF;
11276#ifdef DEBUG_PUSH
11277 xmlGenericError(xmlGenericErrorContext,
11278 "PP: entering EOF\n");
11279#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011280 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011281 ctxt->sax->endDocument(ctxt->userData);
11282 goto done;
11283 }
11284 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011285 case XML_PARSER_DTD: {
11286 /*
11287 * Sorry but progressive parsing of the internal subset
11288 * is not expected to be supported. We first check that
11289 * the full content of the internal subset is available and
11290 * the parsing is launched only at that point.
11291 * Internal subset ends up with "']' S? '>'" in an unescaped
11292 * section and not in a ']]>' sequence which are conditional
11293 * sections (whoever argued to keep that crap in XML deserve
11294 * a place in hell !).
11295 */
11296 int base, i;
11297 xmlChar *buf;
11298 xmlChar quote = 0;
11299
11300 base = ctxt->input->cur - ctxt->input->base;
11301 if (base < 0) return(0);
11302 if (ctxt->checkIndex > base)
11303 base = ctxt->checkIndex;
11304 buf = ctxt->input->buf->buffer->content;
11305 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11306 base++) {
11307 if (quote != 0) {
11308 if (buf[base] == quote)
11309 quote = 0;
11310 continue;
11311 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011312 if ((quote == 0) && (buf[base] == '<')) {
11313 int found = 0;
11314 /* special handling of comments */
11315 if (((unsigned int) base + 4 <
11316 ctxt->input->buf->buffer->use) &&
11317 (buf[base + 1] == '!') &&
11318 (buf[base + 2] == '-') &&
11319 (buf[base + 3] == '-')) {
11320 for (;(unsigned int) base + 3 <
11321 ctxt->input->buf->buffer->use; base++) {
11322 if ((buf[base] == '-') &&
11323 (buf[base + 1] == '-') &&
11324 (buf[base + 2] == '>')) {
11325 found = 1;
11326 base += 2;
11327 break;
11328 }
11329 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011330 if (!found) {
11331#if 0
11332 fprintf(stderr, "unfinished comment\n");
11333#endif
11334 break; /* for */
11335 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011336 continue;
11337 }
11338 }
Owen Taylor3473f882001-02-23 17:55:21 +000011339 if (buf[base] == '"') {
11340 quote = '"';
11341 continue;
11342 }
11343 if (buf[base] == '\'') {
11344 quote = '\'';
11345 continue;
11346 }
11347 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011348#if 0
11349 fprintf(stderr, "%c%c%c%c: ", buf[base],
11350 buf[base + 1], buf[base + 2], buf[base + 3]);
11351#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011352 if ((unsigned int) base +1 >=
11353 ctxt->input->buf->buffer->use)
11354 break;
11355 if (buf[base + 1] == ']') {
11356 /* conditional crap, skip both ']' ! */
11357 base++;
11358 continue;
11359 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011360 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011361 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11362 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011363 if (buf[base + i] == '>') {
11364#if 0
11365 fprintf(stderr, "found\n");
11366#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011367 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011368 }
11369 if (!IS_BLANK_CH(buf[base + i])) {
11370#if 0
11371 fprintf(stderr, "not found\n");
11372#endif
11373 goto not_end_of_int_subset;
11374 }
Owen Taylor3473f882001-02-23 17:55:21 +000011375 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011376#if 0
11377 fprintf(stderr, "end of stream\n");
11378#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011379 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011380
Owen Taylor3473f882001-02-23 17:55:21 +000011381 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011382not_end_of_int_subset:
11383 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011384 }
11385 /*
11386 * We didn't found the end of the Internal subset
11387 */
Owen Taylor3473f882001-02-23 17:55:21 +000011388#ifdef DEBUG_PUSH
11389 if (next == 0)
11390 xmlGenericError(xmlGenericErrorContext,
11391 "PP: lookup of int subset end filed\n");
11392#endif
11393 goto done;
11394
11395found_end_int_subset:
11396 xmlParseInternalSubset(ctxt);
11397 ctxt->inSubset = 2;
11398 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11399 (ctxt->sax->externalSubset != NULL))
11400 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11401 ctxt->extSubSystem, ctxt->extSubURI);
11402 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011403 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011404 ctxt->instate = XML_PARSER_PROLOG;
11405 ctxt->checkIndex = 0;
11406#ifdef DEBUG_PUSH
11407 xmlGenericError(xmlGenericErrorContext,
11408 "PP: entering PROLOG\n");
11409#endif
11410 break;
11411 }
11412 case XML_PARSER_COMMENT:
11413 xmlGenericError(xmlGenericErrorContext,
11414 "PP: internal error, state == COMMENT\n");
11415 ctxt->instate = XML_PARSER_CONTENT;
11416#ifdef DEBUG_PUSH
11417 xmlGenericError(xmlGenericErrorContext,
11418 "PP: entering CONTENT\n");
11419#endif
11420 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011421 case XML_PARSER_IGNORE:
11422 xmlGenericError(xmlGenericErrorContext,
11423 "PP: internal error, state == IGNORE");
11424 ctxt->instate = XML_PARSER_DTD;
11425#ifdef DEBUG_PUSH
11426 xmlGenericError(xmlGenericErrorContext,
11427 "PP: entering DTD\n");
11428#endif
11429 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011430 case XML_PARSER_PI:
11431 xmlGenericError(xmlGenericErrorContext,
11432 "PP: internal error, state == PI\n");
11433 ctxt->instate = XML_PARSER_CONTENT;
11434#ifdef DEBUG_PUSH
11435 xmlGenericError(xmlGenericErrorContext,
11436 "PP: entering CONTENT\n");
11437#endif
11438 break;
11439 case XML_PARSER_ENTITY_DECL:
11440 xmlGenericError(xmlGenericErrorContext,
11441 "PP: internal error, state == ENTITY_DECL\n");
11442 ctxt->instate = XML_PARSER_DTD;
11443#ifdef DEBUG_PUSH
11444 xmlGenericError(xmlGenericErrorContext,
11445 "PP: entering DTD\n");
11446#endif
11447 break;
11448 case XML_PARSER_ENTITY_VALUE:
11449 xmlGenericError(xmlGenericErrorContext,
11450 "PP: internal error, state == ENTITY_VALUE\n");
11451 ctxt->instate = XML_PARSER_CONTENT;
11452#ifdef DEBUG_PUSH
11453 xmlGenericError(xmlGenericErrorContext,
11454 "PP: entering DTD\n");
11455#endif
11456 break;
11457 case XML_PARSER_ATTRIBUTE_VALUE:
11458 xmlGenericError(xmlGenericErrorContext,
11459 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11460 ctxt->instate = XML_PARSER_START_TAG;
11461#ifdef DEBUG_PUSH
11462 xmlGenericError(xmlGenericErrorContext,
11463 "PP: entering START_TAG\n");
11464#endif
11465 break;
11466 case XML_PARSER_SYSTEM_LITERAL:
11467 xmlGenericError(xmlGenericErrorContext,
11468 "PP: internal error, state == SYSTEM_LITERAL\n");
11469 ctxt->instate = XML_PARSER_START_TAG;
11470#ifdef DEBUG_PUSH
11471 xmlGenericError(xmlGenericErrorContext,
11472 "PP: entering START_TAG\n");
11473#endif
11474 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011475 case XML_PARSER_PUBLIC_LITERAL:
11476 xmlGenericError(xmlGenericErrorContext,
11477 "PP: internal error, state == PUBLIC_LITERAL\n");
11478 ctxt->instate = XML_PARSER_START_TAG;
11479#ifdef DEBUG_PUSH
11480 xmlGenericError(xmlGenericErrorContext,
11481 "PP: entering START_TAG\n");
11482#endif
11483 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011484 }
11485 }
11486done:
11487#ifdef DEBUG_PUSH
11488 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11489#endif
11490 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011491encoding_error:
11492 {
11493 char buffer[150];
11494
11495 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11496 ctxt->input->cur[0], ctxt->input->cur[1],
11497 ctxt->input->cur[2], ctxt->input->cur[3]);
11498 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11499 "Input is not proper UTF-8, indicate encoding !\n%s",
11500 BAD_CAST buffer, NULL);
11501 }
11502 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011503}
11504
11505/**
Owen Taylor3473f882001-02-23 17:55:21 +000011506 * xmlParseChunk:
11507 * @ctxt: an XML parser context
11508 * @chunk: an char array
11509 * @size: the size in byte of the chunk
11510 * @terminate: last chunk indicator
11511 *
11512 * Parse a Chunk of memory
11513 *
11514 * Returns zero if no error, the xmlParserErrors otherwise.
11515 */
11516int
11517xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11518 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011519 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011520 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011521
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011522 if (ctxt == NULL)
11523 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011524 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011525 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011526 if (ctxt->instate == XML_PARSER_START)
11527 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011528 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11529 (chunk[size - 1] == '\r')) {
11530 end_in_lf = 1;
11531 size--;
11532 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011533
11534xmldecl_done:
11535
Owen Taylor3473f882001-02-23 17:55:21 +000011536 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11537 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11538 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11539 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011540 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011541
11542 /*
11543 * Specific handling if we autodetected an encoding, we should not
11544 * push more than the first line ... which depend on the encoding
11545 * And only push the rest once the final encoding was detected
11546 */
11547 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11548 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010011549 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011550
11551 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11552 BAD_CAST "UTF-16")) ||
11553 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11554 BAD_CAST "UTF16")))
11555 len = 90;
11556 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11557 BAD_CAST "UCS-4")) ||
11558 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11559 BAD_CAST "UCS4")))
11560 len = 180;
11561
11562 if (ctxt->input->buf->rawconsumed < len)
11563 len -= ctxt->input->buf->rawconsumed;
11564
Raul Hudeaba9716a2010-03-15 10:13:29 +010011565 /*
11566 * Change size for reading the initial declaration only
11567 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11568 * will blindly copy extra bytes from memory.
11569 */
11570 if (size > len) {
11571 remain = size - len;
11572 size = len;
11573 } else {
11574 remain = 0;
11575 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011576 }
William M. Bracka3215c72004-07-31 16:24:01 +000011577 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11578 if (res < 0) {
11579 ctxt->errNo = XML_PARSER_EOF;
11580 ctxt->disableSAX = 1;
11581 return (XML_PARSER_EOF);
11582 }
Owen Taylor3473f882001-02-23 17:55:21 +000011583 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11584 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011585 ctxt->input->end =
11586 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011587#ifdef DEBUG_PUSH
11588 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11589#endif
11590
Owen Taylor3473f882001-02-23 17:55:21 +000011591 } else if (ctxt->instate != XML_PARSER_EOF) {
11592 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11593 xmlParserInputBufferPtr in = ctxt->input->buf;
11594 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11595 (in->raw != NULL)) {
11596 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011597
Owen Taylor3473f882001-02-23 17:55:21 +000011598 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11599 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011600 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011601 xmlGenericError(xmlGenericErrorContext,
11602 "xmlParseChunk: encoder error\n");
11603 return(XML_ERR_INVALID_ENCODING);
11604 }
11605 }
11606 }
11607 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011608 if (remain != 0)
11609 xmlParseTryOrFinish(ctxt, 0);
11610 else
11611 xmlParseTryOrFinish(ctxt, terminate);
11612 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11613 return(ctxt->errNo);
11614
11615 if (remain != 0) {
11616 chunk += size;
11617 size = remain;
11618 remain = 0;
11619 goto xmldecl_done;
11620 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011621 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11622 (ctxt->input->buf != NULL)) {
11623 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11624 }
Owen Taylor3473f882001-02-23 17:55:21 +000011625 if (terminate) {
11626 /*
11627 * Check for termination
11628 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011629 int avail = 0;
11630
11631 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011632 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011633 avail = ctxt->input->length -
11634 (ctxt->input->cur - ctxt->input->base);
11635 else
11636 avail = ctxt->input->buf->buffer->use -
11637 (ctxt->input->cur - ctxt->input->base);
11638 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011639
Owen Taylor3473f882001-02-23 17:55:21 +000011640 if ((ctxt->instate != XML_PARSER_EOF) &&
11641 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011642 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011643 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011644 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011645 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011646 }
Owen Taylor3473f882001-02-23 17:55:21 +000011647 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011648 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011649 ctxt->sax->endDocument(ctxt->userData);
11650 }
11651 ctxt->instate = XML_PARSER_EOF;
11652 }
11653 return((xmlParserErrors) ctxt->errNo);
11654}
11655
11656/************************************************************************
11657 * *
11658 * I/O front end functions to the parser *
11659 * *
11660 ************************************************************************/
11661
11662/**
Owen Taylor3473f882001-02-23 17:55:21 +000011663 * xmlCreatePushParserCtxt:
11664 * @sax: a SAX handler
11665 * @user_data: The user data returned on SAX callbacks
11666 * @chunk: a pointer to an array of chars
11667 * @size: number of chars in the array
11668 * @filename: an optional file name or URI
11669 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011670 * Create a parser context for using the XML parser in push mode.
11671 * If @buffer and @size are non-NULL, the data is used to detect
11672 * the encoding. The remaining characters will be parsed so they
11673 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011674 * To allow content encoding detection, @size should be >= 4
11675 * The value of @filename is used for fetching external entities
11676 * and error/warning reports.
11677 *
11678 * Returns the new parser context or NULL
11679 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011680
Owen Taylor3473f882001-02-23 17:55:21 +000011681xmlParserCtxtPtr
11682xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11683 const char *chunk, int size, const char *filename) {
11684 xmlParserCtxtPtr ctxt;
11685 xmlParserInputPtr inputStream;
11686 xmlParserInputBufferPtr buf;
11687 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11688
11689 /*
11690 * plug some encoding conversion routines
11691 */
11692 if ((chunk != NULL) && (size >= 4))
11693 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11694
11695 buf = xmlAllocParserInputBuffer(enc);
11696 if (buf == NULL) return(NULL);
11697
11698 ctxt = xmlNewParserCtxt();
11699 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011700 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011701 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011702 return(NULL);
11703 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011704 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011705 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11706 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011707 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011708 xmlFreeParserInputBuffer(buf);
11709 xmlFreeParserCtxt(ctxt);
11710 return(NULL);
11711 }
Owen Taylor3473f882001-02-23 17:55:21 +000011712 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011713#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011714 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011715#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011716 xmlFree(ctxt->sax);
11717 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11718 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011719 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011720 xmlFreeParserInputBuffer(buf);
11721 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011722 return(NULL);
11723 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011724 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11725 if (sax->initialized == XML_SAX2_MAGIC)
11726 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11727 else
11728 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011729 if (user_data != NULL)
11730 ctxt->userData = user_data;
11731 }
11732 if (filename == NULL) {
11733 ctxt->directory = NULL;
11734 } else {
11735 ctxt->directory = xmlParserGetDirectory(filename);
11736 }
11737
11738 inputStream = xmlNewInputStream(ctxt);
11739 if (inputStream == NULL) {
11740 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011741 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011742 return(NULL);
11743 }
11744
11745 if (filename == NULL)
11746 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011747 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011748 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011749 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011750 if (inputStream->filename == NULL) {
11751 xmlFreeParserCtxt(ctxt);
11752 xmlFreeParserInputBuffer(buf);
11753 return(NULL);
11754 }
11755 }
Owen Taylor3473f882001-02-23 17:55:21 +000011756 inputStream->buf = buf;
11757 inputStream->base = inputStream->buf->buffer->content;
11758 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011759 inputStream->end =
11760 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011761
11762 inputPush(ctxt, inputStream);
11763
William M. Brack3a1cd212005-02-11 14:35:54 +000011764 /*
11765 * If the caller didn't provide an initial 'chunk' for determining
11766 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11767 * that it can be automatically determined later
11768 */
11769 if ((size == 0) || (chunk == NULL)) {
11770 ctxt->charset = XML_CHAR_ENCODING_NONE;
11771 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011772 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11773 int cur = ctxt->input->cur - ctxt->input->base;
11774
Owen Taylor3473f882001-02-23 17:55:21 +000011775 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011776
11777 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11778 ctxt->input->cur = ctxt->input->base + cur;
11779 ctxt->input->end =
11780 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011781#ifdef DEBUG_PUSH
11782 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11783#endif
11784 }
11785
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011786 if (enc != XML_CHAR_ENCODING_NONE) {
11787 xmlSwitchEncoding(ctxt, enc);
11788 }
11789
Owen Taylor3473f882001-02-23 17:55:21 +000011790 return(ctxt);
11791}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011792#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011793
11794/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011795 * xmlStopParser:
11796 * @ctxt: an XML parser context
11797 *
11798 * Blocks further parser processing
11799 */
11800void
11801xmlStopParser(xmlParserCtxtPtr ctxt) {
11802 if (ctxt == NULL)
11803 return;
11804 ctxt->instate = XML_PARSER_EOF;
11805 ctxt->disableSAX = 1;
11806 if (ctxt->input != NULL) {
11807 ctxt->input->cur = BAD_CAST"";
11808 ctxt->input->base = ctxt->input->cur;
11809 }
11810}
11811
11812/**
Owen Taylor3473f882001-02-23 17:55:21 +000011813 * xmlCreateIOParserCtxt:
11814 * @sax: a SAX handler
11815 * @user_data: The user data returned on SAX callbacks
11816 * @ioread: an I/O read function
11817 * @ioclose: an I/O close function
11818 * @ioctx: an I/O handler
11819 * @enc: the charset encoding if known
11820 *
11821 * Create a parser context for using the XML parser with an existing
11822 * I/O stream
11823 *
11824 * Returns the new parser context or NULL
11825 */
11826xmlParserCtxtPtr
11827xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11828 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11829 void *ioctx, xmlCharEncoding enc) {
11830 xmlParserCtxtPtr ctxt;
11831 xmlParserInputPtr inputStream;
11832 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011833
11834 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011835
11836 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11837 if (buf == NULL) return(NULL);
11838
11839 ctxt = xmlNewParserCtxt();
11840 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011841 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011842 return(NULL);
11843 }
11844 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011845#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011846 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011847#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011848 xmlFree(ctxt->sax);
11849 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11850 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011851 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011852 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011853 return(NULL);
11854 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011855 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11856 if (sax->initialized == XML_SAX2_MAGIC)
11857 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11858 else
11859 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011860 if (user_data != NULL)
11861 ctxt->userData = user_data;
11862 }
11863
11864 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11865 if (inputStream == NULL) {
11866 xmlFreeParserCtxt(ctxt);
11867 return(NULL);
11868 }
11869 inputPush(ctxt, inputStream);
11870
11871 return(ctxt);
11872}
11873
Daniel Veillard4432df22003-09-28 18:58:27 +000011874#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011875/************************************************************************
11876 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011877 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011878 * *
11879 ************************************************************************/
11880
11881/**
11882 * xmlIOParseDTD:
11883 * @sax: the SAX handler block or NULL
11884 * @input: an Input Buffer
11885 * @enc: the charset encoding if known
11886 *
11887 * Load and parse a DTD
11888 *
11889 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011890 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011891 */
11892
11893xmlDtdPtr
11894xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11895 xmlCharEncoding enc) {
11896 xmlDtdPtr ret = NULL;
11897 xmlParserCtxtPtr ctxt;
11898 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011899 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011900
11901 if (input == NULL)
11902 return(NULL);
11903
11904 ctxt = xmlNewParserCtxt();
11905 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011906 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011907 return(NULL);
11908 }
11909
11910 /*
11911 * Set-up the SAX context
11912 */
11913 if (sax != NULL) {
11914 if (ctxt->sax != NULL)
11915 xmlFree(ctxt->sax);
11916 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011917 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011918 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011919 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011920
11921 /*
11922 * generate a parser input from the I/O handler
11923 */
11924
Daniel Veillard43caefb2003-12-07 19:32:22 +000011925 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011926 if (pinput == NULL) {
11927 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011928 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011929 xmlFreeParserCtxt(ctxt);
11930 return(NULL);
11931 }
11932
11933 /*
11934 * plug some encoding conversion routines here.
11935 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011936 if (xmlPushInput(ctxt, pinput) < 0) {
11937 if (sax != NULL) ctxt->sax = NULL;
11938 xmlFreeParserCtxt(ctxt);
11939 return(NULL);
11940 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011941 if (enc != XML_CHAR_ENCODING_NONE) {
11942 xmlSwitchEncoding(ctxt, enc);
11943 }
Owen Taylor3473f882001-02-23 17:55:21 +000011944
11945 pinput->filename = NULL;
11946 pinput->line = 1;
11947 pinput->col = 1;
11948 pinput->base = ctxt->input->cur;
11949 pinput->cur = ctxt->input->cur;
11950 pinput->free = NULL;
11951
11952 /*
11953 * let's parse that entity knowing it's an external subset.
11954 */
11955 ctxt->inSubset = 2;
11956 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011957 if (ctxt->myDoc == NULL) {
11958 xmlErrMemory(ctxt, "New Doc failed");
11959 return(NULL);
11960 }
11961 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011962 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11963 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011964
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011965 if ((enc == XML_CHAR_ENCODING_NONE) &&
11966 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011967 /*
11968 * Get the 4 first bytes and decode the charset
11969 * if enc != XML_CHAR_ENCODING_NONE
11970 * plug some encoding conversion routines.
11971 */
11972 start[0] = RAW;
11973 start[1] = NXT(1);
11974 start[2] = NXT(2);
11975 start[3] = NXT(3);
11976 enc = xmlDetectCharEncoding(start, 4);
11977 if (enc != XML_CHAR_ENCODING_NONE) {
11978 xmlSwitchEncoding(ctxt, enc);
11979 }
11980 }
11981
Owen Taylor3473f882001-02-23 17:55:21 +000011982 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11983
11984 if (ctxt->myDoc != NULL) {
11985 if (ctxt->wellFormed) {
11986 ret = ctxt->myDoc->extSubset;
11987 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011988 if (ret != NULL) {
11989 xmlNodePtr tmp;
11990
11991 ret->doc = NULL;
11992 tmp = ret->children;
11993 while (tmp != NULL) {
11994 tmp->doc = NULL;
11995 tmp = tmp->next;
11996 }
11997 }
Owen Taylor3473f882001-02-23 17:55:21 +000011998 } else {
11999 ret = NULL;
12000 }
12001 xmlFreeDoc(ctxt->myDoc);
12002 ctxt->myDoc = NULL;
12003 }
12004 if (sax != NULL) ctxt->sax = NULL;
12005 xmlFreeParserCtxt(ctxt);
12006
12007 return(ret);
12008}
12009
12010/**
12011 * xmlSAXParseDTD:
12012 * @sax: the SAX handler block
12013 * @ExternalID: a NAME* containing the External ID of the DTD
12014 * @SystemID: a NAME* containing the URL to the DTD
12015 *
12016 * Load and parse an external subset.
12017 *
12018 * Returns the resulting xmlDtdPtr or NULL in case of error.
12019 */
12020
12021xmlDtdPtr
12022xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12023 const xmlChar *SystemID) {
12024 xmlDtdPtr ret = NULL;
12025 xmlParserCtxtPtr ctxt;
12026 xmlParserInputPtr input = NULL;
12027 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012028 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012029
12030 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12031
12032 ctxt = xmlNewParserCtxt();
12033 if (ctxt == NULL) {
12034 return(NULL);
12035 }
12036
12037 /*
12038 * Set-up the SAX context
12039 */
12040 if (sax != NULL) {
12041 if (ctxt->sax != NULL)
12042 xmlFree(ctxt->sax);
12043 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012044 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012045 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012046
12047 /*
12048 * Canonicalise the system ID
12049 */
12050 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012051 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012052 xmlFreeParserCtxt(ctxt);
12053 return(NULL);
12054 }
Owen Taylor3473f882001-02-23 17:55:21 +000012055
12056 /*
12057 * Ask the Entity resolver to load the damn thing
12058 */
12059
12060 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012061 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12062 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012063 if (input == NULL) {
12064 if (sax != NULL) ctxt->sax = NULL;
12065 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012066 if (systemIdCanonic != NULL)
12067 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012068 return(NULL);
12069 }
12070
12071 /*
12072 * plug some encoding conversion routines here.
12073 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012074 if (xmlPushInput(ctxt, input) < 0) {
12075 if (sax != NULL) ctxt->sax = NULL;
12076 xmlFreeParserCtxt(ctxt);
12077 if (systemIdCanonic != NULL)
12078 xmlFree(systemIdCanonic);
12079 return(NULL);
12080 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012081 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12082 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12083 xmlSwitchEncoding(ctxt, enc);
12084 }
Owen Taylor3473f882001-02-23 17:55:21 +000012085
12086 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012087 input->filename = (char *) systemIdCanonic;
12088 else
12089 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012090 input->line = 1;
12091 input->col = 1;
12092 input->base = ctxt->input->cur;
12093 input->cur = ctxt->input->cur;
12094 input->free = NULL;
12095
12096 /*
12097 * let's parse that entity knowing it's an external subset.
12098 */
12099 ctxt->inSubset = 2;
12100 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012101 if (ctxt->myDoc == NULL) {
12102 xmlErrMemory(ctxt, "New Doc failed");
12103 if (sax != NULL) ctxt->sax = NULL;
12104 xmlFreeParserCtxt(ctxt);
12105 return(NULL);
12106 }
12107 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012108 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12109 ExternalID, SystemID);
12110 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12111
12112 if (ctxt->myDoc != NULL) {
12113 if (ctxt->wellFormed) {
12114 ret = ctxt->myDoc->extSubset;
12115 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012116 if (ret != NULL) {
12117 xmlNodePtr tmp;
12118
12119 ret->doc = NULL;
12120 tmp = ret->children;
12121 while (tmp != NULL) {
12122 tmp->doc = NULL;
12123 tmp = tmp->next;
12124 }
12125 }
Owen Taylor3473f882001-02-23 17:55:21 +000012126 } else {
12127 ret = NULL;
12128 }
12129 xmlFreeDoc(ctxt->myDoc);
12130 ctxt->myDoc = NULL;
12131 }
12132 if (sax != NULL) ctxt->sax = NULL;
12133 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012134
Owen Taylor3473f882001-02-23 17:55:21 +000012135 return(ret);
12136}
12137
Daniel Veillard4432df22003-09-28 18:58:27 +000012138
Owen Taylor3473f882001-02-23 17:55:21 +000012139/**
12140 * xmlParseDTD:
12141 * @ExternalID: a NAME* containing the External ID of the DTD
12142 * @SystemID: a NAME* containing the URL to the DTD
12143 *
12144 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012145 *
Owen Taylor3473f882001-02-23 17:55:21 +000012146 * Returns the resulting xmlDtdPtr or NULL in case of error.
12147 */
12148
12149xmlDtdPtr
12150xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12151 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12152}
Daniel Veillard4432df22003-09-28 18:58:27 +000012153#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012154
12155/************************************************************************
12156 * *
12157 * Front ends when parsing an Entity *
12158 * *
12159 ************************************************************************/
12160
12161/**
Owen Taylor3473f882001-02-23 17:55:21 +000012162 * xmlParseCtxtExternalEntity:
12163 * @ctx: the existing parsing context
12164 * @URL: the URL for the entity to load
12165 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012166 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012167 *
12168 * Parse an external general entity within an existing parsing context
12169 * An external general parsed entity is well-formed if it matches the
12170 * production labeled extParsedEnt.
12171 *
12172 * [78] extParsedEnt ::= TextDecl? content
12173 *
12174 * Returns 0 if the entity is well formed, -1 in case of args problem and
12175 * the parser error code otherwise
12176 */
12177
12178int
12179xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012180 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012181 xmlParserCtxtPtr ctxt;
12182 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012183 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012184 xmlSAXHandlerPtr oldsax = NULL;
12185 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012186 xmlChar start[4];
12187 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012188
Daniel Veillardce682bc2004-11-05 17:22:25 +000012189 if (ctx == NULL) return(-1);
12190
Daniel Veillard0161e632008-08-28 15:36:32 +000012191 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12192 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012193 return(XML_ERR_ENTITY_LOOP);
12194 }
12195
Daniel Veillardcda96922001-08-21 10:56:31 +000012196 if (lst != NULL)
12197 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012198 if ((URL == NULL) && (ID == NULL))
12199 return(-1);
12200 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12201 return(-1);
12202
Rob Richards798743a2009-06-19 13:54:25 -040012203 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012204 if (ctxt == NULL) {
12205 return(-1);
12206 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012207
Owen Taylor3473f882001-02-23 17:55:21 +000012208 oldsax = ctxt->sax;
12209 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012210 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012211 newDoc = xmlNewDoc(BAD_CAST "1.0");
12212 if (newDoc == NULL) {
12213 xmlFreeParserCtxt(ctxt);
12214 return(-1);
12215 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012216 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012217 if (ctx->myDoc->dict) {
12218 newDoc->dict = ctx->myDoc->dict;
12219 xmlDictReference(newDoc->dict);
12220 }
Owen Taylor3473f882001-02-23 17:55:21 +000012221 if (ctx->myDoc != NULL) {
12222 newDoc->intSubset = ctx->myDoc->intSubset;
12223 newDoc->extSubset = ctx->myDoc->extSubset;
12224 }
12225 if (ctx->myDoc->URL != NULL) {
12226 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12227 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012228 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12229 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012230 ctxt->sax = oldsax;
12231 xmlFreeParserCtxt(ctxt);
12232 newDoc->intSubset = NULL;
12233 newDoc->extSubset = NULL;
12234 xmlFreeDoc(newDoc);
12235 return(-1);
12236 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012237 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012238 nodePush(ctxt, newDoc->children);
12239 if (ctx->myDoc == NULL) {
12240 ctxt->myDoc = newDoc;
12241 } else {
12242 ctxt->myDoc = ctx->myDoc;
12243 newDoc->children->doc = ctx->myDoc;
12244 }
12245
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012246 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012247 * Get the 4 first bytes and decode the charset
12248 * if enc != XML_CHAR_ENCODING_NONE
12249 * plug some encoding conversion routines.
12250 */
12251 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012252 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12253 start[0] = RAW;
12254 start[1] = NXT(1);
12255 start[2] = NXT(2);
12256 start[3] = NXT(3);
12257 enc = xmlDetectCharEncoding(start, 4);
12258 if (enc != XML_CHAR_ENCODING_NONE) {
12259 xmlSwitchEncoding(ctxt, enc);
12260 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012261 }
12262
Owen Taylor3473f882001-02-23 17:55:21 +000012263 /*
12264 * Parse a possible text declaration first
12265 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012266 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012267 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012268 /*
12269 * An XML-1.0 document can't reference an entity not XML-1.0
12270 */
12271 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12272 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12273 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12274 "Version mismatch between document and entity\n");
12275 }
Owen Taylor3473f882001-02-23 17:55:21 +000012276 }
12277
12278 /*
12279 * Doing validity checking on chunk doesn't make sense
12280 */
12281 ctxt->instate = XML_PARSER_CONTENT;
12282 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012283 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012284 ctxt->loadsubset = ctx->loadsubset;
12285 ctxt->depth = ctx->depth + 1;
12286 ctxt->replaceEntities = ctx->replaceEntities;
12287 if (ctxt->validate) {
12288 ctxt->vctxt.error = ctx->vctxt.error;
12289 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012290 } else {
12291 ctxt->vctxt.error = NULL;
12292 ctxt->vctxt.warning = NULL;
12293 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012294 ctxt->vctxt.nodeTab = NULL;
12295 ctxt->vctxt.nodeNr = 0;
12296 ctxt->vctxt.nodeMax = 0;
12297 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012298 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12299 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012300 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12301 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12302 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012303 ctxt->dictNames = ctx->dictNames;
12304 ctxt->attsDefault = ctx->attsDefault;
12305 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012306 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012307
12308 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012309
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012310 ctx->validate = ctxt->validate;
12311 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012312 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012313 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012314 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012315 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012316 }
12317 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012318 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012319 }
12320
12321 if (!ctxt->wellFormed) {
12322 if (ctxt->errNo == 0)
12323 ret = 1;
12324 else
12325 ret = ctxt->errNo;
12326 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012327 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012328 xmlNodePtr cur;
12329
12330 /*
12331 * Return the newly created nodeset after unlinking it from
12332 * they pseudo parent.
12333 */
12334 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012335 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012336 while (cur != NULL) {
12337 cur->parent = NULL;
12338 cur = cur->next;
12339 }
12340 newDoc->children->children = NULL;
12341 }
12342 ret = 0;
12343 }
12344 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012345 ctxt->dict = NULL;
12346 ctxt->attsDefault = NULL;
12347 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012348 xmlFreeParserCtxt(ctxt);
12349 newDoc->intSubset = NULL;
12350 newDoc->extSubset = NULL;
12351 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012352
Owen Taylor3473f882001-02-23 17:55:21 +000012353 return(ret);
12354}
12355
12356/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012357 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012358 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012359 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012360 * @sax: the SAX handler bloc (possibly NULL)
12361 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12362 * @depth: Used for loop detection, use 0
12363 * @URL: the URL for the entity to load
12364 * @ID: the System ID for the entity to load
12365 * @list: the return value for the set of parsed nodes
12366 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012367 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012368 *
12369 * Returns 0 if the entity is well formed, -1 in case of args problem and
12370 * the parser error code otherwise
12371 */
12372
Daniel Veillard7d515752003-09-26 19:12:37 +000012373static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012374xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12375 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012376 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012377 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012378 xmlParserCtxtPtr ctxt;
12379 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012380 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012381 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012382 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012383 xmlChar start[4];
12384 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012385
Daniel Veillard0161e632008-08-28 15:36:32 +000012386 if (((depth > 40) &&
12387 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12388 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012389 return(XML_ERR_ENTITY_LOOP);
12390 }
12391
Owen Taylor3473f882001-02-23 17:55:21 +000012392 if (list != NULL)
12393 *list = NULL;
12394 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012395 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012396 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012397 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012398
12399
Rob Richards9c0aa472009-03-26 18:10:19 +000012400 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012401 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012402 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012403 if (oldctxt != NULL) {
12404 ctxt->_private = oldctxt->_private;
12405 ctxt->loadsubset = oldctxt->loadsubset;
12406 ctxt->validate = oldctxt->validate;
12407 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012408 ctxt->record_info = oldctxt->record_info;
12409 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12410 ctxt->node_seq.length = oldctxt->node_seq.length;
12411 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012412 } else {
12413 /*
12414 * Doing validity checking on chunk without context
12415 * doesn't make sense
12416 */
12417 ctxt->_private = NULL;
12418 ctxt->validate = 0;
12419 ctxt->external = 2;
12420 ctxt->loadsubset = 0;
12421 }
Owen Taylor3473f882001-02-23 17:55:21 +000012422 if (sax != NULL) {
12423 oldsax = ctxt->sax;
12424 ctxt->sax = sax;
12425 if (user_data != NULL)
12426 ctxt->userData = user_data;
12427 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012428 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012429 newDoc = xmlNewDoc(BAD_CAST "1.0");
12430 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012431 ctxt->node_seq.maximum = 0;
12432 ctxt->node_seq.length = 0;
12433 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012434 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012435 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012436 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012437 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012438 newDoc->intSubset = doc->intSubset;
12439 newDoc->extSubset = doc->extSubset;
12440 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012441 xmlDictReference(newDoc->dict);
12442
Owen Taylor3473f882001-02-23 17:55:21 +000012443 if (doc->URL != NULL) {
12444 newDoc->URL = xmlStrdup(doc->URL);
12445 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012446 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12447 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012448 if (sax != NULL)
12449 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012450 ctxt->node_seq.maximum = 0;
12451 ctxt->node_seq.length = 0;
12452 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012453 xmlFreeParserCtxt(ctxt);
12454 newDoc->intSubset = NULL;
12455 newDoc->extSubset = NULL;
12456 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012457 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012458 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012459 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012460 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012461 ctxt->myDoc = doc;
12462 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012463
Daniel Veillard0161e632008-08-28 15:36:32 +000012464 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012465 * Get the 4 first bytes and decode the charset
12466 * if enc != XML_CHAR_ENCODING_NONE
12467 * plug some encoding conversion routines.
12468 */
12469 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012470 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12471 start[0] = RAW;
12472 start[1] = NXT(1);
12473 start[2] = NXT(2);
12474 start[3] = NXT(3);
12475 enc = xmlDetectCharEncoding(start, 4);
12476 if (enc != XML_CHAR_ENCODING_NONE) {
12477 xmlSwitchEncoding(ctxt, enc);
12478 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012479 }
12480
Owen Taylor3473f882001-02-23 17:55:21 +000012481 /*
12482 * Parse a possible text declaration first
12483 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012484 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012485 xmlParseTextDecl(ctxt);
12486 }
12487
Owen Taylor3473f882001-02-23 17:55:21 +000012488 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012489 ctxt->depth = depth;
12490
12491 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012492
Daniel Veillard561b7f82002-03-20 21:55:57 +000012493 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012494 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012495 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012496 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012497 }
12498 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012499 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012500 }
12501
12502 if (!ctxt->wellFormed) {
12503 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012504 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012505 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012506 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012507 } else {
12508 if (list != NULL) {
12509 xmlNodePtr cur;
12510
12511 /*
12512 * Return the newly created nodeset after unlinking it from
12513 * they pseudo parent.
12514 */
12515 cur = newDoc->children->children;
12516 *list = cur;
12517 while (cur != NULL) {
12518 cur->parent = NULL;
12519 cur = cur->next;
12520 }
12521 newDoc->children->children = NULL;
12522 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012523 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012524 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012525
12526 /*
12527 * Record in the parent context the number of entities replacement
12528 * done when parsing that reference.
12529 */
Daniel Veillard76d36452009-09-07 11:19:33 +020012530 if (oldctxt != NULL)
12531 oldctxt->nbentities += ctxt->nbentities;
12532
Daniel Veillard0161e632008-08-28 15:36:32 +000012533 /*
12534 * Also record the size of the entity parsed
12535 */
12536 if (ctxt->input != NULL) {
12537 oldctxt->sizeentities += ctxt->input->consumed;
12538 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12539 }
12540 /*
12541 * And record the last error if any
12542 */
12543 if (ctxt->lastError.code != XML_ERR_OK)
12544 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12545
Owen Taylor3473f882001-02-23 17:55:21 +000012546 if (sax != NULL)
12547 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012548 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12549 oldctxt->node_seq.length = ctxt->node_seq.length;
12550 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012551 ctxt->node_seq.maximum = 0;
12552 ctxt->node_seq.length = 0;
12553 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012554 xmlFreeParserCtxt(ctxt);
12555 newDoc->intSubset = NULL;
12556 newDoc->extSubset = NULL;
12557 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012558
Owen Taylor3473f882001-02-23 17:55:21 +000012559 return(ret);
12560}
12561
Daniel Veillard81273902003-09-30 00:43:48 +000012562#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012563/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012564 * xmlParseExternalEntity:
12565 * @doc: the document the chunk pertains to
12566 * @sax: the SAX handler bloc (possibly NULL)
12567 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12568 * @depth: Used for loop detection, use 0
12569 * @URL: the URL for the entity to load
12570 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012571 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012572 *
12573 * Parse an external general entity
12574 * An external general parsed entity is well-formed if it matches the
12575 * production labeled extParsedEnt.
12576 *
12577 * [78] extParsedEnt ::= TextDecl? content
12578 *
12579 * Returns 0 if the entity is well formed, -1 in case of args problem and
12580 * the parser error code otherwise
12581 */
12582
12583int
12584xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012585 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012586 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012587 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012588}
12589
12590/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012591 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012592 * @doc: the document the chunk pertains to
12593 * @sax: the SAX handler bloc (possibly NULL)
12594 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12595 * @depth: Used for loop detection, use 0
12596 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012597 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012598 *
12599 * Parse a well-balanced chunk of an XML document
12600 * called by the parser
12601 * The allowed sequence for the Well Balanced Chunk is the one defined by
12602 * the content production in the XML grammar:
12603 *
12604 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12605 *
12606 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12607 * the parser error code otherwise
12608 */
12609
12610int
12611xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012612 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012613 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12614 depth, string, lst, 0 );
12615}
Daniel Veillard81273902003-09-30 00:43:48 +000012616#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012617
12618/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012619 * xmlParseBalancedChunkMemoryInternal:
12620 * @oldctxt: the existing parsing context
12621 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12622 * @user_data: the user data field for the parser context
12623 * @lst: the return value for the set of parsed nodes
12624 *
12625 *
12626 * Parse a well-balanced chunk of an XML document
12627 * called by the parser
12628 * The allowed sequence for the Well Balanced Chunk is the one defined by
12629 * the content production in the XML grammar:
12630 *
12631 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12632 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012633 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12634 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012635 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012636 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012637 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012638 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012639static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012640xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12641 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12642 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012643 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012644 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012645 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012646 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012647 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012648 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012649 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012650#ifdef SAX2
12651 int i;
12652#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012653
Daniel Veillard0161e632008-08-28 15:36:32 +000012654 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12655 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012656 return(XML_ERR_ENTITY_LOOP);
12657 }
12658
12659
12660 if (lst != NULL)
12661 *lst = NULL;
12662 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012663 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012664
12665 size = xmlStrlen(string);
12666
12667 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012668 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012669 if (user_data != NULL)
12670 ctxt->userData = user_data;
12671 else
12672 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012673 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12674 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012675 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12676 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12677 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012678
Daniel Veillard74eaec12009-08-26 15:57:20 +020012679#ifdef SAX2
12680 /* propagate namespaces down the entity */
12681 for (i = 0;i < oldctxt->nsNr;i += 2) {
12682 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12683 }
12684#endif
12685
Daniel Veillard328f48c2002-11-15 15:24:34 +000012686 oldsax = ctxt->sax;
12687 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012688 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012689 ctxt->replaceEntities = oldctxt->replaceEntities;
12690 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012691
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012692 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012693 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012694 newDoc = xmlNewDoc(BAD_CAST "1.0");
12695 if (newDoc == NULL) {
12696 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012697 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012698 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012699 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012700 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012701 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012702 newDoc->dict = ctxt->dict;
12703 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012704 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012705 } else {
12706 ctxt->myDoc = oldctxt->myDoc;
12707 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012708 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012709 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012710 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12711 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012712 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012713 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012714 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012715 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012716 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012717 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012718 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012719 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012720 ctxt->myDoc->children = NULL;
12721 ctxt->myDoc->last = NULL;
12722 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012723 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012724 ctxt->instate = XML_PARSER_CONTENT;
12725 ctxt->depth = oldctxt->depth + 1;
12726
Daniel Veillard328f48c2002-11-15 15:24:34 +000012727 ctxt->validate = 0;
12728 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012729 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12730 /*
12731 * ID/IDREF registration will be done in xmlValidateElement below
12732 */
12733 ctxt->loadsubset |= XML_SKIP_IDS;
12734 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012735 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012736 ctxt->attsDefault = oldctxt->attsDefault;
12737 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012738
Daniel Veillard68e9e742002-11-16 15:35:11 +000012739 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012740 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012741 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012742 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012743 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012744 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012745 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012746 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012747 }
12748
12749 if (!ctxt->wellFormed) {
12750 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012751 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012752 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012753 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012754 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012755 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012756 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012757
William M. Brack7b9154b2003-09-27 19:23:50 +000012758 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012759 xmlNodePtr cur;
12760
12761 /*
12762 * Return the newly created nodeset after unlinking it from
12763 * they pseudo parent.
12764 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012765 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012766 *lst = cur;
12767 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012768#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012769 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12770 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12771 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012772 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12773 oldctxt->myDoc, cur);
12774 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012775#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012776 cur->parent = NULL;
12777 cur = cur->next;
12778 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012779 ctxt->myDoc->children->children = NULL;
12780 }
12781 if (ctxt->myDoc != NULL) {
12782 xmlFreeNode(ctxt->myDoc->children);
12783 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012784 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012785 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012786
12787 /*
12788 * Record in the parent context the number of entities replacement
12789 * done when parsing that reference.
12790 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020012791 if (oldctxt != NULL)
12792 oldctxt->nbentities += ctxt->nbentities;
12793
Daniel Veillard0161e632008-08-28 15:36:32 +000012794 /*
12795 * Also record the last error if any
12796 */
12797 if (ctxt->lastError.code != XML_ERR_OK)
12798 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12799
Daniel Veillard328f48c2002-11-15 15:24:34 +000012800 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012801 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012802 ctxt->attsDefault = NULL;
12803 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012804 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012805 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012806 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012807 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012808
Daniel Veillard328f48c2002-11-15 15:24:34 +000012809 return(ret);
12810}
12811
Daniel Veillard29b17482004-08-16 00:39:03 +000012812/**
12813 * xmlParseInNodeContext:
12814 * @node: the context node
12815 * @data: the input string
12816 * @datalen: the input string length in bytes
12817 * @options: a combination of xmlParserOption
12818 * @lst: the return value for the set of parsed nodes
12819 *
12820 * Parse a well-balanced chunk of an XML document
12821 * within the context (DTD, namespaces, etc ...) of the given node.
12822 *
12823 * The allowed sequence for the data is a Well Balanced Chunk defined by
12824 * the content production in the XML grammar:
12825 *
12826 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12827 *
12828 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12829 * error code otherwise
12830 */
12831xmlParserErrors
12832xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12833 int options, xmlNodePtr *lst) {
12834#ifdef SAX2
12835 xmlParserCtxtPtr ctxt;
12836 xmlDocPtr doc = NULL;
12837 xmlNodePtr fake, cur;
12838 int nsnr = 0;
12839
12840 xmlParserErrors ret = XML_ERR_OK;
12841
12842 /*
12843 * check all input parameters, grab the document
12844 */
12845 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12846 return(XML_ERR_INTERNAL_ERROR);
12847 switch (node->type) {
12848 case XML_ELEMENT_NODE:
12849 case XML_ATTRIBUTE_NODE:
12850 case XML_TEXT_NODE:
12851 case XML_CDATA_SECTION_NODE:
12852 case XML_ENTITY_REF_NODE:
12853 case XML_PI_NODE:
12854 case XML_COMMENT_NODE:
12855 case XML_DOCUMENT_NODE:
12856 case XML_HTML_DOCUMENT_NODE:
12857 break;
12858 default:
12859 return(XML_ERR_INTERNAL_ERROR);
12860
12861 }
12862 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12863 (node->type != XML_DOCUMENT_NODE) &&
12864 (node->type != XML_HTML_DOCUMENT_NODE))
12865 node = node->parent;
12866 if (node == NULL)
12867 return(XML_ERR_INTERNAL_ERROR);
12868 if (node->type == XML_ELEMENT_NODE)
12869 doc = node->doc;
12870 else
12871 doc = (xmlDocPtr) node;
12872 if (doc == NULL)
12873 return(XML_ERR_INTERNAL_ERROR);
12874
12875 /*
12876 * allocate a context and set-up everything not related to the
12877 * node position in the tree
12878 */
12879 if (doc->type == XML_DOCUMENT_NODE)
12880 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12881#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010012882 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000012883 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010012884 /*
12885 * When parsing in context, it makes no sense to add implied
12886 * elements like html/body/etc...
12887 */
12888 options |= HTML_PARSE_NOIMPLIED;
12889 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012890#endif
12891 else
12892 return(XML_ERR_INTERNAL_ERROR);
12893
12894 if (ctxt == NULL)
12895 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000012896
Daniel Veillard47cd14e2010-02-04 18:49:01 +010012897 /*
William M. Brackc3f81342004-10-03 01:22:44 +000012898 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12899 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12900 * we must wait until the last moment to free the original one.
12901 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012902 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012903 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012904 xmlDictFree(ctxt->dict);
12905 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012906 } else
12907 options |= XML_PARSE_NODICT;
12908
Daniel Veillard47cd14e2010-02-04 18:49:01 +010012909 if (doc->encoding != NULL) {
12910 xmlCharEncodingHandlerPtr hdlr;
12911
12912 if (ctxt->encoding != NULL)
12913 xmlFree((xmlChar *) ctxt->encoding);
12914 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
12915
12916 hdlr = xmlFindCharEncodingHandler(doc->encoding);
12917 if (hdlr != NULL) {
12918 xmlSwitchToEncoding(ctxt, hdlr);
12919 } else {
12920 return(XML_ERR_UNSUPPORTED_ENCODING);
12921 }
12922 }
12923
Daniel Veillard37334572008-07-31 08:20:02 +000012924 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012925 xmlDetectSAX2(ctxt);
12926 ctxt->myDoc = doc;
12927
Daniel Veillard47cd14e2010-02-04 18:49:01 +010012928 fake = xmlNewComment(NULL);
12929 if (fake == NULL) {
12930 xmlFreeParserCtxt(ctxt);
12931 return(XML_ERR_NO_MEMORY);
12932 }
12933 xmlAddChild(node, fake);
12934
Daniel Veillard29b17482004-08-16 00:39:03 +000012935 if (node->type == XML_ELEMENT_NODE) {
12936 nodePush(ctxt, node);
12937 /*
12938 * initialize the SAX2 namespaces stack
12939 */
12940 cur = node;
12941 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12942 xmlNsPtr ns = cur->nsDef;
12943 const xmlChar *iprefix, *ihref;
12944
12945 while (ns != NULL) {
12946 if (ctxt->dict) {
12947 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12948 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12949 } else {
12950 iprefix = ns->prefix;
12951 ihref = ns->href;
12952 }
12953
12954 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12955 nsPush(ctxt, iprefix, ihref);
12956 nsnr++;
12957 }
12958 ns = ns->next;
12959 }
12960 cur = cur->parent;
12961 }
12962 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012963 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012964
12965 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12966 /*
12967 * ID/IDREF registration will be done in xmlValidateElement below
12968 */
12969 ctxt->loadsubset |= XML_SKIP_IDS;
12970 }
12971
Daniel Veillard499cc922006-01-18 17:22:35 +000012972#ifdef LIBXML_HTML_ENABLED
12973 if (doc->type == XML_HTML_DOCUMENT_NODE)
12974 __htmlParseContent(ctxt);
12975 else
12976#endif
12977 xmlParseContent(ctxt);
12978
Daniel Veillard29b17482004-08-16 00:39:03 +000012979 nsPop(ctxt, nsnr);
12980 if ((RAW == '<') && (NXT(1) == '/')) {
12981 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12982 } else if (RAW != 0) {
12983 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12984 }
12985 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12986 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12987 ctxt->wellFormed = 0;
12988 }
12989
12990 if (!ctxt->wellFormed) {
12991 if (ctxt->errNo == 0)
12992 ret = XML_ERR_INTERNAL_ERROR;
12993 else
12994 ret = (xmlParserErrors)ctxt->errNo;
12995 } else {
12996 ret = XML_ERR_OK;
12997 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012998
Daniel Veillard29b17482004-08-16 00:39:03 +000012999 /*
13000 * Return the newly created nodeset after unlinking it from
13001 * the pseudo sibling.
13002 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013003
Daniel Veillard29b17482004-08-16 00:39:03 +000013004 cur = fake->next;
13005 fake->next = NULL;
13006 node->last = fake;
13007
13008 if (cur != NULL) {
13009 cur->prev = NULL;
13010 }
13011
13012 *lst = cur;
13013
13014 while (cur != NULL) {
13015 cur->parent = NULL;
13016 cur = cur->next;
13017 }
13018
13019 xmlUnlinkNode(fake);
13020 xmlFreeNode(fake);
13021
13022
13023 if (ret != XML_ERR_OK) {
13024 xmlFreeNodeList(*lst);
13025 *lst = NULL;
13026 }
William M. Brackc3f81342004-10-03 01:22:44 +000013027
William M. Brackb7b54de2004-10-06 16:38:01 +000013028 if (doc->dict != NULL)
13029 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013030 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013031
Daniel Veillard29b17482004-08-16 00:39:03 +000013032 return(ret);
13033#else /* !SAX2 */
13034 return(XML_ERR_INTERNAL_ERROR);
13035#endif
13036}
13037
Daniel Veillard81273902003-09-30 00:43:48 +000013038#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013039/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013040 * xmlParseBalancedChunkMemoryRecover:
13041 * @doc: the document the chunk pertains to
13042 * @sax: the SAX handler bloc (possibly NULL)
13043 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13044 * @depth: Used for loop detection, use 0
13045 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13046 * @lst: the return value for the set of parsed nodes
13047 * @recover: return nodes even if the data is broken (use 0)
13048 *
13049 *
13050 * Parse a well-balanced chunk of an XML document
13051 * called by the parser
13052 * The allowed sequence for the Well Balanced Chunk is the one defined by
13053 * the content production in the XML grammar:
13054 *
13055 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13056 *
13057 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13058 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013059 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013060 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013061 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13062 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013063 */
13064int
13065xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013066 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013067 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013068 xmlParserCtxtPtr ctxt;
13069 xmlDocPtr newDoc;
13070 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013071 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013072 int size;
13073 int ret = 0;
13074
Daniel Veillard0161e632008-08-28 15:36:32 +000013075 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013076 return(XML_ERR_ENTITY_LOOP);
13077 }
13078
13079
Daniel Veillardcda96922001-08-21 10:56:31 +000013080 if (lst != NULL)
13081 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013082 if (string == NULL)
13083 return(-1);
13084
13085 size = xmlStrlen(string);
13086
13087 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13088 if (ctxt == NULL) return(-1);
13089 ctxt->userData = ctxt;
13090 if (sax != NULL) {
13091 oldsax = ctxt->sax;
13092 ctxt->sax = sax;
13093 if (user_data != NULL)
13094 ctxt->userData = user_data;
13095 }
13096 newDoc = xmlNewDoc(BAD_CAST "1.0");
13097 if (newDoc == NULL) {
13098 xmlFreeParserCtxt(ctxt);
13099 return(-1);
13100 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013101 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013102 if ((doc != NULL) && (doc->dict != NULL)) {
13103 xmlDictFree(ctxt->dict);
13104 ctxt->dict = doc->dict;
13105 xmlDictReference(ctxt->dict);
13106 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13107 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13108 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13109 ctxt->dictNames = 1;
13110 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013111 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013112 }
Owen Taylor3473f882001-02-23 17:55:21 +000013113 if (doc != NULL) {
13114 newDoc->intSubset = doc->intSubset;
13115 newDoc->extSubset = doc->extSubset;
13116 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013117 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13118 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013119 if (sax != NULL)
13120 ctxt->sax = oldsax;
13121 xmlFreeParserCtxt(ctxt);
13122 newDoc->intSubset = NULL;
13123 newDoc->extSubset = NULL;
13124 xmlFreeDoc(newDoc);
13125 return(-1);
13126 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013127 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13128 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013129 if (doc == NULL) {
13130 ctxt->myDoc = newDoc;
13131 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013132 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013133 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013134 /* Ensure that doc has XML spec namespace */
13135 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13136 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013137 }
13138 ctxt->instate = XML_PARSER_CONTENT;
13139 ctxt->depth = depth;
13140
13141 /*
13142 * Doing validity checking on chunk doesn't make sense
13143 */
13144 ctxt->validate = 0;
13145 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013146 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013147
Daniel Veillardb39bc392002-10-26 19:29:51 +000013148 if ( doc != NULL ){
13149 content = doc->children;
13150 doc->children = NULL;
13151 xmlParseContent(ctxt);
13152 doc->children = content;
13153 }
13154 else {
13155 xmlParseContent(ctxt);
13156 }
Owen Taylor3473f882001-02-23 17:55:21 +000013157 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013158 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013159 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013160 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013161 }
13162 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013163 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013164 }
13165
13166 if (!ctxt->wellFormed) {
13167 if (ctxt->errNo == 0)
13168 ret = 1;
13169 else
13170 ret = ctxt->errNo;
13171 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013172 ret = 0;
13173 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013174
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013175 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13176 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013177
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013178 /*
13179 * Return the newly created nodeset after unlinking it from
13180 * they pseudo parent.
13181 */
13182 cur = newDoc->children->children;
13183 *lst = cur;
13184 while (cur != NULL) {
13185 xmlSetTreeDoc(cur, doc);
13186 cur->parent = NULL;
13187 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013188 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013189 newDoc->children->children = NULL;
13190 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013191
13192 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013193 ctxt->sax = oldsax;
13194 xmlFreeParserCtxt(ctxt);
13195 newDoc->intSubset = NULL;
13196 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013197 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013198 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013199
Owen Taylor3473f882001-02-23 17:55:21 +000013200 return(ret);
13201}
13202
13203/**
13204 * xmlSAXParseEntity:
13205 * @sax: the SAX handler block
13206 * @filename: the filename
13207 *
13208 * parse an XML external entity out of context and build a tree.
13209 * It use the given SAX function block to handle the parsing callback.
13210 * If sax is NULL, fallback to the default DOM tree building routines.
13211 *
13212 * [78] extParsedEnt ::= TextDecl? content
13213 *
13214 * This correspond to a "Well Balanced" chunk
13215 *
13216 * Returns the resulting document tree
13217 */
13218
13219xmlDocPtr
13220xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13221 xmlDocPtr ret;
13222 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013223
13224 ctxt = xmlCreateFileParserCtxt(filename);
13225 if (ctxt == NULL) {
13226 return(NULL);
13227 }
13228 if (sax != NULL) {
13229 if (ctxt->sax != NULL)
13230 xmlFree(ctxt->sax);
13231 ctxt->sax = sax;
13232 ctxt->userData = NULL;
13233 }
13234
Owen Taylor3473f882001-02-23 17:55:21 +000013235 xmlParseExtParsedEnt(ctxt);
13236
13237 if (ctxt->wellFormed)
13238 ret = ctxt->myDoc;
13239 else {
13240 ret = NULL;
13241 xmlFreeDoc(ctxt->myDoc);
13242 ctxt->myDoc = NULL;
13243 }
13244 if (sax != NULL)
13245 ctxt->sax = NULL;
13246 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013247
Owen Taylor3473f882001-02-23 17:55:21 +000013248 return(ret);
13249}
13250
13251/**
13252 * xmlParseEntity:
13253 * @filename: the filename
13254 *
13255 * parse an XML external entity out of context and build a tree.
13256 *
13257 * [78] extParsedEnt ::= TextDecl? content
13258 *
13259 * This correspond to a "Well Balanced" chunk
13260 *
13261 * Returns the resulting document tree
13262 */
13263
13264xmlDocPtr
13265xmlParseEntity(const char *filename) {
13266 return(xmlSAXParseEntity(NULL, filename));
13267}
Daniel Veillard81273902003-09-30 00:43:48 +000013268#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013269
13270/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013271 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013272 * @URL: the entity URL
13273 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013274 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013275 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013276 *
13277 * Create a parser context for an external entity
13278 * Automatic support for ZLIB/Compress compressed document is provided
13279 * by default if found at compile-time.
13280 *
13281 * Returns the new parser context or NULL
13282 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013283static xmlParserCtxtPtr
13284xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13285 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013286 xmlParserCtxtPtr ctxt;
13287 xmlParserInputPtr inputStream;
13288 char *directory = NULL;
13289 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013290
Owen Taylor3473f882001-02-23 17:55:21 +000013291 ctxt = xmlNewParserCtxt();
13292 if (ctxt == NULL) {
13293 return(NULL);
13294 }
13295
Daniel Veillard48247b42009-07-10 16:12:46 +020013296 if (pctx != NULL) {
13297 ctxt->options = pctx->options;
13298 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013299 }
13300
Owen Taylor3473f882001-02-23 17:55:21 +000013301 uri = xmlBuildURI(URL, base);
13302
13303 if (uri == NULL) {
13304 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13305 if (inputStream == NULL) {
13306 xmlFreeParserCtxt(ctxt);
13307 return(NULL);
13308 }
13309
13310 inputPush(ctxt, inputStream);
13311
13312 if ((ctxt->directory == NULL) && (directory == NULL))
13313 directory = xmlParserGetDirectory((char *)URL);
13314 if ((ctxt->directory == NULL) && (directory != NULL))
13315 ctxt->directory = directory;
13316 } else {
13317 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13318 if (inputStream == NULL) {
13319 xmlFree(uri);
13320 xmlFreeParserCtxt(ctxt);
13321 return(NULL);
13322 }
13323
13324 inputPush(ctxt, inputStream);
13325
13326 if ((ctxt->directory == NULL) && (directory == NULL))
13327 directory = xmlParserGetDirectory((char *)uri);
13328 if ((ctxt->directory == NULL) && (directory != NULL))
13329 ctxt->directory = directory;
13330 xmlFree(uri);
13331 }
Owen Taylor3473f882001-02-23 17:55:21 +000013332 return(ctxt);
13333}
13334
Rob Richards9c0aa472009-03-26 18:10:19 +000013335/**
13336 * xmlCreateEntityParserCtxt:
13337 * @URL: the entity URL
13338 * @ID: the entity PUBLIC ID
13339 * @base: a possible base for the target URI
13340 *
13341 * Create a parser context for an external entity
13342 * Automatic support for ZLIB/Compress compressed document is provided
13343 * by default if found at compile-time.
13344 *
13345 * Returns the new parser context or NULL
13346 */
13347xmlParserCtxtPtr
13348xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13349 const xmlChar *base) {
13350 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13351
13352}
13353
Owen Taylor3473f882001-02-23 17:55:21 +000013354/************************************************************************
13355 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013356 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013357 * *
13358 ************************************************************************/
13359
13360/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013361 * xmlCreateURLParserCtxt:
13362 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013363 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013364 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013365 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013366 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013367 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013368 *
13369 * Returns the new parser context or NULL
13370 */
13371xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013372xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013373{
13374 xmlParserCtxtPtr ctxt;
13375 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013376 char *directory = NULL;
13377
Owen Taylor3473f882001-02-23 17:55:21 +000013378 ctxt = xmlNewParserCtxt();
13379 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013380 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013381 return(NULL);
13382 }
13383
Daniel Veillarddf292f72005-01-16 19:00:15 +000013384 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013385 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013386 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013387
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013388 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013389 if (inputStream == NULL) {
13390 xmlFreeParserCtxt(ctxt);
13391 return(NULL);
13392 }
13393
Owen Taylor3473f882001-02-23 17:55:21 +000013394 inputPush(ctxt, inputStream);
13395 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013396 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013397 if ((ctxt->directory == NULL) && (directory != NULL))
13398 ctxt->directory = directory;
13399
13400 return(ctxt);
13401}
13402
Daniel Veillard61b93382003-11-03 14:28:31 +000013403/**
13404 * xmlCreateFileParserCtxt:
13405 * @filename: the filename
13406 *
13407 * Create a parser context for a file content.
13408 * Automatic support for ZLIB/Compress compressed document is provided
13409 * by default if found at compile-time.
13410 *
13411 * Returns the new parser context or NULL
13412 */
13413xmlParserCtxtPtr
13414xmlCreateFileParserCtxt(const char *filename)
13415{
13416 return(xmlCreateURLParserCtxt(filename, 0));
13417}
13418
Daniel Veillard81273902003-09-30 00:43:48 +000013419#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013420/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013421 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013422 * @sax: the SAX handler block
13423 * @filename: the filename
13424 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13425 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013426 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013427 *
13428 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13429 * compressed document is provided by default if found at compile-time.
13430 * It use the given SAX function block to handle the parsing callback.
13431 * If sax is NULL, fallback to the default DOM tree building routines.
13432 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013433 * User data (void *) is stored within the parser context in the
13434 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013435 *
Owen Taylor3473f882001-02-23 17:55:21 +000013436 * Returns the resulting document tree
13437 */
13438
13439xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013440xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13441 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013442 xmlDocPtr ret;
13443 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013444
Daniel Veillard635ef722001-10-29 11:48:19 +000013445 xmlInitParser();
13446
Owen Taylor3473f882001-02-23 17:55:21 +000013447 ctxt = xmlCreateFileParserCtxt(filename);
13448 if (ctxt == NULL) {
13449 return(NULL);
13450 }
13451 if (sax != NULL) {
13452 if (ctxt->sax != NULL)
13453 xmlFree(ctxt->sax);
13454 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013455 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013456 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013457 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013458 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013459 }
Owen Taylor3473f882001-02-23 17:55:21 +000013460
Daniel Veillard37d2d162008-03-14 10:54:00 +000013461 if (ctxt->directory == NULL)
13462 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013463
Daniel Veillarddad3f682002-11-17 16:47:27 +000013464 ctxt->recovery = recovery;
13465
Owen Taylor3473f882001-02-23 17:55:21 +000013466 xmlParseDocument(ctxt);
13467
William M. Brackc07329e2003-09-08 01:57:30 +000013468 if ((ctxt->wellFormed) || recovery) {
13469 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013470 if (ret != NULL) {
13471 if (ctxt->input->buf->compressed > 0)
13472 ret->compression = 9;
13473 else
13474 ret->compression = ctxt->input->buf->compressed;
13475 }
William M. Brackc07329e2003-09-08 01:57:30 +000013476 }
Owen Taylor3473f882001-02-23 17:55:21 +000013477 else {
13478 ret = NULL;
13479 xmlFreeDoc(ctxt->myDoc);
13480 ctxt->myDoc = NULL;
13481 }
13482 if (sax != NULL)
13483 ctxt->sax = NULL;
13484 xmlFreeParserCtxt(ctxt);
13485
13486 return(ret);
13487}
13488
13489/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013490 * xmlSAXParseFile:
13491 * @sax: the SAX handler block
13492 * @filename: the filename
13493 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13494 * documents
13495 *
13496 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13497 * compressed document is provided by default if found at compile-time.
13498 * It use the given SAX function block to handle the parsing callback.
13499 * If sax is NULL, fallback to the default DOM tree building routines.
13500 *
13501 * Returns the resulting document tree
13502 */
13503
13504xmlDocPtr
13505xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13506 int recovery) {
13507 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13508}
13509
13510/**
Owen Taylor3473f882001-02-23 17:55:21 +000013511 * xmlRecoverDoc:
13512 * @cur: a pointer to an array of xmlChar
13513 *
13514 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013515 * In the case the document is not Well Formed, a attempt to build a
13516 * tree is tried anyway
13517 *
13518 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013519 */
13520
13521xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013522xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013523 return(xmlSAXParseDoc(NULL, cur, 1));
13524}
13525
13526/**
13527 * xmlParseFile:
13528 * @filename: the filename
13529 *
13530 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13531 * compressed document is provided by default if found at compile-time.
13532 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013533 * Returns the resulting document tree if the file was wellformed,
13534 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013535 */
13536
13537xmlDocPtr
13538xmlParseFile(const char *filename) {
13539 return(xmlSAXParseFile(NULL, filename, 0));
13540}
13541
13542/**
13543 * xmlRecoverFile:
13544 * @filename: the filename
13545 *
13546 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13547 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013548 * In the case the document is not Well Formed, it attempts to build
13549 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013550 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013551 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013552 */
13553
13554xmlDocPtr
13555xmlRecoverFile(const char *filename) {
13556 return(xmlSAXParseFile(NULL, filename, 1));
13557}
13558
13559
13560/**
13561 * xmlSetupParserForBuffer:
13562 * @ctxt: an XML parser context
13563 * @buffer: a xmlChar * buffer
13564 * @filename: a file name
13565 *
13566 * Setup the parser context to parse a new buffer; Clears any prior
13567 * contents from the parser context. The buffer parameter must not be
13568 * NULL, but the filename parameter can be
13569 */
13570void
13571xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13572 const char* filename)
13573{
13574 xmlParserInputPtr input;
13575
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013576 if ((ctxt == NULL) || (buffer == NULL))
13577 return;
13578
Owen Taylor3473f882001-02-23 17:55:21 +000013579 input = xmlNewInputStream(ctxt);
13580 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013581 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013582 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013583 return;
13584 }
13585
13586 xmlClearParserCtxt(ctxt);
13587 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013588 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013589 input->base = buffer;
13590 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013591 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013592 inputPush(ctxt, input);
13593}
13594
13595/**
13596 * xmlSAXUserParseFile:
13597 * @sax: a SAX handler
13598 * @user_data: The user data returned on SAX callbacks
13599 * @filename: a file name
13600 *
13601 * parse an XML file and call the given SAX handler routines.
13602 * Automatic support for ZLIB/Compress compressed document is provided
13603 *
13604 * Returns 0 in case of success or a error number otherwise
13605 */
13606int
13607xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13608 const char *filename) {
13609 int ret = 0;
13610 xmlParserCtxtPtr ctxt;
13611
13612 ctxt = xmlCreateFileParserCtxt(filename);
13613 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013614 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013615 xmlFree(ctxt->sax);
13616 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013617 xmlDetectSAX2(ctxt);
13618
Owen Taylor3473f882001-02-23 17:55:21 +000013619 if (user_data != NULL)
13620 ctxt->userData = user_data;
13621
13622 xmlParseDocument(ctxt);
13623
13624 if (ctxt->wellFormed)
13625 ret = 0;
13626 else {
13627 if (ctxt->errNo != 0)
13628 ret = ctxt->errNo;
13629 else
13630 ret = -1;
13631 }
13632 if (sax != NULL)
13633 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013634 if (ctxt->myDoc != NULL) {
13635 xmlFreeDoc(ctxt->myDoc);
13636 ctxt->myDoc = NULL;
13637 }
Owen Taylor3473f882001-02-23 17:55:21 +000013638 xmlFreeParserCtxt(ctxt);
13639
13640 return ret;
13641}
Daniel Veillard81273902003-09-30 00:43:48 +000013642#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013643
13644/************************************************************************
13645 * *
13646 * Front ends when parsing from memory *
13647 * *
13648 ************************************************************************/
13649
13650/**
13651 * xmlCreateMemoryParserCtxt:
13652 * @buffer: a pointer to a char array
13653 * @size: the size of the array
13654 *
13655 * Create a parser context for an XML in-memory document.
13656 *
13657 * Returns the new parser context or NULL
13658 */
13659xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013660xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013661 xmlParserCtxtPtr ctxt;
13662 xmlParserInputPtr input;
13663 xmlParserInputBufferPtr buf;
13664
13665 if (buffer == NULL)
13666 return(NULL);
13667 if (size <= 0)
13668 return(NULL);
13669
13670 ctxt = xmlNewParserCtxt();
13671 if (ctxt == NULL)
13672 return(NULL);
13673
Daniel Veillard53350552003-09-18 13:35:51 +000013674 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013675 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013676 if (buf == NULL) {
13677 xmlFreeParserCtxt(ctxt);
13678 return(NULL);
13679 }
Owen Taylor3473f882001-02-23 17:55:21 +000013680
13681 input = xmlNewInputStream(ctxt);
13682 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013683 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013684 xmlFreeParserCtxt(ctxt);
13685 return(NULL);
13686 }
13687
13688 input->filename = NULL;
13689 input->buf = buf;
13690 input->base = input->buf->buffer->content;
13691 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013692 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013693
13694 inputPush(ctxt, input);
13695 return(ctxt);
13696}
13697
Daniel Veillard81273902003-09-30 00:43:48 +000013698#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013699/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013700 * xmlSAXParseMemoryWithData:
13701 * @sax: the SAX handler block
13702 * @buffer: an pointer to a char array
13703 * @size: the size of the array
13704 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13705 * documents
13706 * @data: the userdata
13707 *
13708 * parse an XML in-memory block and use the given SAX function block
13709 * to handle the parsing callback. If sax is NULL, fallback to the default
13710 * DOM tree building routines.
13711 *
13712 * User data (void *) is stored within the parser context in the
13713 * context's _private member, so it is available nearly everywhere in libxml
13714 *
13715 * Returns the resulting document tree
13716 */
13717
13718xmlDocPtr
13719xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13720 int size, int recovery, void *data) {
13721 xmlDocPtr ret;
13722 xmlParserCtxtPtr ctxt;
13723
Daniel Veillardab2a7632009-07-09 08:45:03 +020013724 xmlInitParser();
13725
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013726 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13727 if (ctxt == NULL) return(NULL);
13728 if (sax != NULL) {
13729 if (ctxt->sax != NULL)
13730 xmlFree(ctxt->sax);
13731 ctxt->sax = sax;
13732 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013733 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013734 if (data!=NULL) {
13735 ctxt->_private=data;
13736 }
13737
Daniel Veillardadba5f12003-04-04 16:09:01 +000013738 ctxt->recovery = recovery;
13739
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013740 xmlParseDocument(ctxt);
13741
13742 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13743 else {
13744 ret = NULL;
13745 xmlFreeDoc(ctxt->myDoc);
13746 ctxt->myDoc = NULL;
13747 }
13748 if (sax != NULL)
13749 ctxt->sax = NULL;
13750 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013751
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013752 return(ret);
13753}
13754
13755/**
Owen Taylor3473f882001-02-23 17:55:21 +000013756 * xmlSAXParseMemory:
13757 * @sax: the SAX handler block
13758 * @buffer: an pointer to a char array
13759 * @size: the size of the array
13760 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13761 * documents
13762 *
13763 * parse an XML in-memory block and use the given SAX function block
13764 * to handle the parsing callback. If sax is NULL, fallback to the default
13765 * DOM tree building routines.
13766 *
13767 * Returns the resulting document tree
13768 */
13769xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013770xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13771 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013772 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013773}
13774
13775/**
13776 * xmlParseMemory:
13777 * @buffer: an pointer to a char array
13778 * @size: the size of the array
13779 *
13780 * parse an XML in-memory block and build a tree.
13781 *
13782 * Returns the resulting document tree
13783 */
13784
Daniel Veillard50822cb2001-07-26 20:05:51 +000013785xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013786 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13787}
13788
13789/**
13790 * xmlRecoverMemory:
13791 * @buffer: an pointer to a char array
13792 * @size: the size of the array
13793 *
13794 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013795 * In the case the document is not Well Formed, an attempt to
13796 * build a tree is tried anyway
13797 *
13798 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013799 */
13800
Daniel Veillard50822cb2001-07-26 20:05:51 +000013801xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013802 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13803}
13804
13805/**
13806 * xmlSAXUserParseMemory:
13807 * @sax: a SAX handler
13808 * @user_data: The user data returned on SAX callbacks
13809 * @buffer: an in-memory XML document input
13810 * @size: the length of the XML document in bytes
13811 *
13812 * A better SAX parsing routine.
13813 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013814 *
Owen Taylor3473f882001-02-23 17:55:21 +000013815 * Returns 0 in case of success or a error number otherwise
13816 */
13817int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013818 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013819 int ret = 0;
13820 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013821
13822 xmlInitParser();
13823
Owen Taylor3473f882001-02-23 17:55:21 +000013824 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13825 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013826 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13827 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013828 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013829 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013830
Daniel Veillard30211a02001-04-26 09:33:18 +000013831 if (user_data != NULL)
13832 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013833
Owen Taylor3473f882001-02-23 17:55:21 +000013834 xmlParseDocument(ctxt);
13835
13836 if (ctxt->wellFormed)
13837 ret = 0;
13838 else {
13839 if (ctxt->errNo != 0)
13840 ret = ctxt->errNo;
13841 else
13842 ret = -1;
13843 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013844 if (sax != NULL)
13845 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013846 if (ctxt->myDoc != NULL) {
13847 xmlFreeDoc(ctxt->myDoc);
13848 ctxt->myDoc = NULL;
13849 }
Owen Taylor3473f882001-02-23 17:55:21 +000013850 xmlFreeParserCtxt(ctxt);
13851
13852 return ret;
13853}
Daniel Veillard81273902003-09-30 00:43:48 +000013854#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013855
13856/**
13857 * xmlCreateDocParserCtxt:
13858 * @cur: a pointer to an array of xmlChar
13859 *
13860 * Creates a parser context for an XML in-memory document.
13861 *
13862 * Returns the new parser context or NULL
13863 */
13864xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013865xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013866 int len;
13867
13868 if (cur == NULL)
13869 return(NULL);
13870 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013871 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013872}
13873
Daniel Veillard81273902003-09-30 00:43:48 +000013874#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013875/**
13876 * xmlSAXParseDoc:
13877 * @sax: the SAX handler block
13878 * @cur: a pointer to an array of xmlChar
13879 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13880 * documents
13881 *
13882 * parse an XML in-memory document and build a tree.
13883 * It use the given SAX function block to handle the parsing callback.
13884 * If sax is NULL, fallback to the default DOM tree building routines.
13885 *
13886 * Returns the resulting document tree
13887 */
13888
13889xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013890xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013891 xmlDocPtr ret;
13892 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013893 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013894
Daniel Veillard38936062004-11-04 17:45:11 +000013895 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013896
13897
13898 ctxt = xmlCreateDocParserCtxt(cur);
13899 if (ctxt == NULL) return(NULL);
13900 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013901 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013902 ctxt->sax = sax;
13903 ctxt->userData = NULL;
13904 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013905 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013906
13907 xmlParseDocument(ctxt);
13908 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13909 else {
13910 ret = NULL;
13911 xmlFreeDoc(ctxt->myDoc);
13912 ctxt->myDoc = NULL;
13913 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013914 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013915 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013916 xmlFreeParserCtxt(ctxt);
13917
13918 return(ret);
13919}
13920
13921/**
13922 * xmlParseDoc:
13923 * @cur: a pointer to an array of xmlChar
13924 *
13925 * parse an XML in-memory document and build a tree.
13926 *
13927 * Returns the resulting document tree
13928 */
13929
13930xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013931xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013932 return(xmlSAXParseDoc(NULL, cur, 0));
13933}
Daniel Veillard81273902003-09-30 00:43:48 +000013934#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013935
Daniel Veillard81273902003-09-30 00:43:48 +000013936#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013937/************************************************************************
13938 * *
13939 * Specific function to keep track of entities references *
13940 * and used by the XSLT debugger *
13941 * *
13942 ************************************************************************/
13943
13944static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13945
13946/**
13947 * xmlAddEntityReference:
13948 * @ent : A valid entity
13949 * @firstNode : A valid first node for children of entity
13950 * @lastNode : A valid last node of children entity
13951 *
13952 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13953 */
13954static void
13955xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13956 xmlNodePtr lastNode)
13957{
13958 if (xmlEntityRefFunc != NULL) {
13959 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13960 }
13961}
13962
13963
13964/**
13965 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013966 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013967 *
13968 * Set the function to call call back when a xml reference has been made
13969 */
13970void
13971xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13972{
13973 xmlEntityRefFunc = func;
13974}
Daniel Veillard81273902003-09-30 00:43:48 +000013975#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013976
13977/************************************************************************
13978 * *
13979 * Miscellaneous *
13980 * *
13981 ************************************************************************/
13982
13983#ifdef LIBXML_XPATH_ENABLED
13984#include <libxml/xpath.h>
13985#endif
13986
Daniel Veillardffa3c742005-07-21 13:24:09 +000013987extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013988static int xmlParserInitialized = 0;
13989
13990/**
13991 * xmlInitParser:
13992 *
13993 * Initialization function for the XML parser.
13994 * This is not reentrant. Call once before processing in case of
13995 * use in multithreaded programs.
13996 */
13997
13998void
13999xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014000 if (xmlParserInitialized != 0)
14001 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014002
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014003#ifdef LIBXML_THREAD_ENABLED
14004 __xmlGlobalInitMutexLock();
14005 if (xmlParserInitialized == 0) {
14006#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014007 xmlInitGlobals();
14008 xmlInitThreads();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014009 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14010 (xmlGenericError == NULL))
14011 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014012 xmlInitMemory();
14013 xmlInitCharEncodingHandlers();
14014 xmlDefaultSAXHandlerInit();
14015 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014016#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014017 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014018#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014019#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014020 htmlInitAutoClose();
14021 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014022#endif
14023#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014024 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014025#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014026 xmlParserInitialized = 1;
14027#ifdef LIBXML_THREAD_ENABLED
14028 }
14029 __xmlGlobalInitMutexUnlock();
14030#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014031}
14032
14033/**
14034 * xmlCleanupParser:
14035 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014036 * This function name is somewhat misleading. It does not clean up
14037 * parser state, it cleans up memory allocated by the library itself.
14038 * It is a cleanup function for the XML library. It tries to reclaim all
14039 * related global memory allocated for the library processing.
14040 * It doesn't deallocate any document related memory. One should
14041 * call xmlCleanupParser() only when the process has finished using
14042 * the library and all XML/HTML documents built with it.
14043 * See also xmlInitParser() which has the opposite function of preparing
14044 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014045 *
14046 * WARNING: if your application is multithreaded or has plugin support
14047 * calling this may crash the application if another thread or
14048 * a plugin is still using libxml2. It's sometimes very hard to
14049 * guess if libxml2 is in use in the application, some libraries
14050 * or plugins may use it without notice. In case of doubt abstain
14051 * from calling this function or do it just before calling exit()
14052 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014053 */
14054
14055void
14056xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014057 if (!xmlParserInitialized)
14058 return;
14059
Owen Taylor3473f882001-02-23 17:55:21 +000014060 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014061#ifdef LIBXML_CATALOG_ENABLED
14062 xmlCatalogCleanup();
14063#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014064 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014065 xmlCleanupInputCallbacks();
14066#ifdef LIBXML_OUTPUT_ENABLED
14067 xmlCleanupOutputCallbacks();
14068#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014069#ifdef LIBXML_SCHEMAS_ENABLED
14070 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014071 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014072#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014073 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014074 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014075 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014076 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014077 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014078}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014079
14080/************************************************************************
14081 * *
14082 * New set (2.6.0) of simpler and more flexible APIs *
14083 * *
14084 ************************************************************************/
14085
14086/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014087 * DICT_FREE:
14088 * @str: a string
14089 *
14090 * Free a string if it is not owned by the "dict" dictionnary in the
14091 * current scope
14092 */
14093#define DICT_FREE(str) \
14094 if ((str) && ((!dict) || \
14095 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14096 xmlFree((char *)(str));
14097
14098/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014099 * xmlCtxtReset:
14100 * @ctxt: an XML parser context
14101 *
14102 * Reset a parser context
14103 */
14104void
14105xmlCtxtReset(xmlParserCtxtPtr ctxt)
14106{
14107 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014108 xmlDictPtr dict;
14109
14110 if (ctxt == NULL)
14111 return;
14112
14113 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014114
14115 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14116 xmlFreeInputStream(input);
14117 }
14118 ctxt->inputNr = 0;
14119 ctxt->input = NULL;
14120
14121 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014122 if (ctxt->spaceTab != NULL) {
14123 ctxt->spaceTab[0] = -1;
14124 ctxt->space = &ctxt->spaceTab[0];
14125 } else {
14126 ctxt->space = NULL;
14127 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014128
14129
14130 ctxt->nodeNr = 0;
14131 ctxt->node = NULL;
14132
14133 ctxt->nameNr = 0;
14134 ctxt->name = NULL;
14135
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014136 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014137 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014138 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014139 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014140 DICT_FREE(ctxt->directory);
14141 ctxt->directory = NULL;
14142 DICT_FREE(ctxt->extSubURI);
14143 ctxt->extSubURI = NULL;
14144 DICT_FREE(ctxt->extSubSystem);
14145 ctxt->extSubSystem = NULL;
14146 if (ctxt->myDoc != NULL)
14147 xmlFreeDoc(ctxt->myDoc);
14148 ctxt->myDoc = NULL;
14149
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014150 ctxt->standalone = -1;
14151 ctxt->hasExternalSubset = 0;
14152 ctxt->hasPErefs = 0;
14153 ctxt->html = 0;
14154 ctxt->external = 0;
14155 ctxt->instate = XML_PARSER_START;
14156 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014157
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014158 ctxt->wellFormed = 1;
14159 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014160 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014161 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014162#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014163 ctxt->vctxt.userData = ctxt;
14164 ctxt->vctxt.error = xmlParserValidityError;
14165 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014166#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014167 ctxt->record_info = 0;
14168 ctxt->nbChars = 0;
14169 ctxt->checkIndex = 0;
14170 ctxt->inSubset = 0;
14171 ctxt->errNo = XML_ERR_OK;
14172 ctxt->depth = 0;
14173 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14174 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014175 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014176 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014177 xmlInitNodeInfoSeq(&ctxt->node_seq);
14178
14179 if (ctxt->attsDefault != NULL) {
14180 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14181 ctxt->attsDefault = NULL;
14182 }
14183 if (ctxt->attsSpecial != NULL) {
14184 xmlHashFree(ctxt->attsSpecial, NULL);
14185 ctxt->attsSpecial = NULL;
14186 }
14187
Daniel Veillard4432df22003-09-28 18:58:27 +000014188#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014189 if (ctxt->catalogs != NULL)
14190 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014191#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014192 if (ctxt->lastError.code != XML_ERR_OK)
14193 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014194}
14195
14196/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014197 * xmlCtxtResetPush:
14198 * @ctxt: an XML parser context
14199 * @chunk: a pointer to an array of chars
14200 * @size: number of chars in the array
14201 * @filename: an optional file name or URI
14202 * @encoding: the document encoding, or NULL
14203 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014204 * Reset a push parser context
14205 *
14206 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014207 */
14208int
14209xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14210 int size, const char *filename, const char *encoding)
14211{
14212 xmlParserInputPtr inputStream;
14213 xmlParserInputBufferPtr buf;
14214 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14215
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014216 if (ctxt == NULL)
14217 return(1);
14218
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014219 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14220 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14221
14222 buf = xmlAllocParserInputBuffer(enc);
14223 if (buf == NULL)
14224 return(1);
14225
14226 if (ctxt == NULL) {
14227 xmlFreeParserInputBuffer(buf);
14228 return(1);
14229 }
14230
14231 xmlCtxtReset(ctxt);
14232
14233 if (ctxt->pushTab == NULL) {
14234 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14235 sizeof(xmlChar *));
14236 if (ctxt->pushTab == NULL) {
14237 xmlErrMemory(ctxt, NULL);
14238 xmlFreeParserInputBuffer(buf);
14239 return(1);
14240 }
14241 }
14242
14243 if (filename == NULL) {
14244 ctxt->directory = NULL;
14245 } else {
14246 ctxt->directory = xmlParserGetDirectory(filename);
14247 }
14248
14249 inputStream = xmlNewInputStream(ctxt);
14250 if (inputStream == NULL) {
14251 xmlFreeParserInputBuffer(buf);
14252 return(1);
14253 }
14254
14255 if (filename == NULL)
14256 inputStream->filename = NULL;
14257 else
14258 inputStream->filename = (char *)
14259 xmlCanonicPath((const xmlChar *) filename);
14260 inputStream->buf = buf;
14261 inputStream->base = inputStream->buf->buffer->content;
14262 inputStream->cur = inputStream->buf->buffer->content;
14263 inputStream->end =
14264 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14265
14266 inputPush(ctxt, inputStream);
14267
14268 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14269 (ctxt->input->buf != NULL)) {
14270 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14271 int cur = ctxt->input->cur - ctxt->input->base;
14272
14273 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14274
14275 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14276 ctxt->input->cur = ctxt->input->base + cur;
14277 ctxt->input->end =
14278 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14279 use];
14280#ifdef DEBUG_PUSH
14281 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14282#endif
14283 }
14284
14285 if (encoding != NULL) {
14286 xmlCharEncodingHandlerPtr hdlr;
14287
Daniel Veillard37334572008-07-31 08:20:02 +000014288 if (ctxt->encoding != NULL)
14289 xmlFree((xmlChar *) ctxt->encoding);
14290 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14291
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014292 hdlr = xmlFindCharEncodingHandler(encoding);
14293 if (hdlr != NULL) {
14294 xmlSwitchToEncoding(ctxt, hdlr);
14295 } else {
14296 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14297 "Unsupported encoding %s\n", BAD_CAST encoding);
14298 }
14299 } else if (enc != XML_CHAR_ENCODING_NONE) {
14300 xmlSwitchEncoding(ctxt, enc);
14301 }
14302
14303 return(0);
14304}
14305
Daniel Veillard37334572008-07-31 08:20:02 +000014306
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014307/**
Daniel Veillard37334572008-07-31 08:20:02 +000014308 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014309 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014310 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014311 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014312 *
14313 * Applies the options to the parser context
14314 *
14315 * Returns 0 in case of success, the set of unknown or unimplemented options
14316 * in case of error.
14317 */
Daniel Veillard37334572008-07-31 08:20:02 +000014318static int
14319xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014320{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014321 if (ctxt == NULL)
14322 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014323 if (encoding != NULL) {
14324 if (ctxt->encoding != NULL)
14325 xmlFree((xmlChar *) ctxt->encoding);
14326 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14327 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014328 if (options & XML_PARSE_RECOVER) {
14329 ctxt->recovery = 1;
14330 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014331 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014332 } else
14333 ctxt->recovery = 0;
14334 if (options & XML_PARSE_DTDLOAD) {
14335 ctxt->loadsubset = XML_DETECT_IDS;
14336 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014337 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014338 } else
14339 ctxt->loadsubset = 0;
14340 if (options & XML_PARSE_DTDATTR) {
14341 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14342 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014343 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014344 }
14345 if (options & XML_PARSE_NOENT) {
14346 ctxt->replaceEntities = 1;
14347 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14348 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014349 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014350 } else
14351 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014352 if (options & XML_PARSE_PEDANTIC) {
14353 ctxt->pedantic = 1;
14354 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014355 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014356 } else
14357 ctxt->pedantic = 0;
14358 if (options & XML_PARSE_NOBLANKS) {
14359 ctxt->keepBlanks = 0;
14360 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14361 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014362 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014363 } else
14364 ctxt->keepBlanks = 1;
14365 if (options & XML_PARSE_DTDVALID) {
14366 ctxt->validate = 1;
14367 if (options & XML_PARSE_NOWARNING)
14368 ctxt->vctxt.warning = NULL;
14369 if (options & XML_PARSE_NOERROR)
14370 ctxt->vctxt.error = NULL;
14371 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014372 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014373 } else
14374 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014375 if (options & XML_PARSE_NOWARNING) {
14376 ctxt->sax->warning = NULL;
14377 options -= XML_PARSE_NOWARNING;
14378 }
14379 if (options & XML_PARSE_NOERROR) {
14380 ctxt->sax->error = NULL;
14381 ctxt->sax->fatalError = NULL;
14382 options -= XML_PARSE_NOERROR;
14383 }
Daniel Veillard81273902003-09-30 00:43:48 +000014384#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014385 if (options & XML_PARSE_SAX1) {
14386 ctxt->sax->startElement = xmlSAX2StartElement;
14387 ctxt->sax->endElement = xmlSAX2EndElement;
14388 ctxt->sax->startElementNs = NULL;
14389 ctxt->sax->endElementNs = NULL;
14390 ctxt->sax->initialized = 1;
14391 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014392 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014393 }
Daniel Veillard81273902003-09-30 00:43:48 +000014394#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014395 if (options & XML_PARSE_NODICT) {
14396 ctxt->dictNames = 0;
14397 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014398 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014399 } else {
14400 ctxt->dictNames = 1;
14401 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014402 if (options & XML_PARSE_NOCDATA) {
14403 ctxt->sax->cdataBlock = NULL;
14404 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014405 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014406 }
14407 if (options & XML_PARSE_NSCLEAN) {
14408 ctxt->options |= XML_PARSE_NSCLEAN;
14409 options -= XML_PARSE_NSCLEAN;
14410 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014411 if (options & XML_PARSE_NONET) {
14412 ctxt->options |= XML_PARSE_NONET;
14413 options -= XML_PARSE_NONET;
14414 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014415 if (options & XML_PARSE_COMPACT) {
14416 ctxt->options |= XML_PARSE_COMPACT;
14417 options -= XML_PARSE_COMPACT;
14418 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014419 if (options & XML_PARSE_OLD10) {
14420 ctxt->options |= XML_PARSE_OLD10;
14421 options -= XML_PARSE_OLD10;
14422 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014423 if (options & XML_PARSE_NOBASEFIX) {
14424 ctxt->options |= XML_PARSE_NOBASEFIX;
14425 options -= XML_PARSE_NOBASEFIX;
14426 }
14427 if (options & XML_PARSE_HUGE) {
14428 ctxt->options |= XML_PARSE_HUGE;
14429 options -= XML_PARSE_HUGE;
14430 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014431 if (options & XML_PARSE_OLDSAX) {
14432 ctxt->options |= XML_PARSE_OLDSAX;
14433 options -= XML_PARSE_OLDSAX;
14434 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014435 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014436 return (options);
14437}
14438
14439/**
Daniel Veillard37334572008-07-31 08:20:02 +000014440 * xmlCtxtUseOptions:
14441 * @ctxt: an XML parser context
14442 * @options: a combination of xmlParserOption
14443 *
14444 * Applies the options to the parser context
14445 *
14446 * Returns 0 in case of success, the set of unknown or unimplemented options
14447 * in case of error.
14448 */
14449int
14450xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14451{
14452 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14453}
14454
14455/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014456 * xmlDoRead:
14457 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014458 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014459 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014460 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014461 * @reuse: keep the context for reuse
14462 *
14463 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014464 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014465 * Returns the resulting document tree or NULL
14466 */
14467static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014468xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14469 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014470{
14471 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014472
14473 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014474 if (encoding != NULL) {
14475 xmlCharEncodingHandlerPtr hdlr;
14476
14477 hdlr = xmlFindCharEncodingHandler(encoding);
14478 if (hdlr != NULL)
14479 xmlSwitchToEncoding(ctxt, hdlr);
14480 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014481 if ((URL != NULL) && (ctxt->input != NULL) &&
14482 (ctxt->input->filename == NULL))
14483 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014484 xmlParseDocument(ctxt);
14485 if ((ctxt->wellFormed) || ctxt->recovery)
14486 ret = ctxt->myDoc;
14487 else {
14488 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014489 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014490 xmlFreeDoc(ctxt->myDoc);
14491 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014492 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014493 ctxt->myDoc = NULL;
14494 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014495 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014496 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014497
14498 return (ret);
14499}
14500
14501/**
14502 * xmlReadDoc:
14503 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014504 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014505 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014506 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014507 *
14508 * parse an XML in-memory document and build a tree.
14509 *
14510 * Returns the resulting document tree
14511 */
14512xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014513xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014514{
14515 xmlParserCtxtPtr ctxt;
14516
14517 if (cur == NULL)
14518 return (NULL);
14519
14520 ctxt = xmlCreateDocParserCtxt(cur);
14521 if (ctxt == NULL)
14522 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014523 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014524}
14525
14526/**
14527 * xmlReadFile:
14528 * @filename: a file or URL
14529 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014530 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014531 *
14532 * parse an XML file from the filesystem or the network.
14533 *
14534 * Returns the resulting document tree
14535 */
14536xmlDocPtr
14537xmlReadFile(const char *filename, const char *encoding, int options)
14538{
14539 xmlParserCtxtPtr ctxt;
14540
Daniel Veillard61b93382003-11-03 14:28:31 +000014541 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014542 if (ctxt == NULL)
14543 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014544 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014545}
14546
14547/**
14548 * xmlReadMemory:
14549 * @buffer: a pointer to a char array
14550 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014551 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014552 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014553 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014554 *
14555 * parse an XML in-memory document and build a tree.
14556 *
14557 * Returns the resulting document tree
14558 */
14559xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014560xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014561{
14562 xmlParserCtxtPtr ctxt;
14563
14564 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14565 if (ctxt == NULL)
14566 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014567 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014568}
14569
14570/**
14571 * xmlReadFd:
14572 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014573 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014574 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014575 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014576 *
14577 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014578 * NOTE that the file descriptor will not be closed when the
14579 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014580 *
14581 * Returns the resulting document tree
14582 */
14583xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014584xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014585{
14586 xmlParserCtxtPtr ctxt;
14587 xmlParserInputBufferPtr input;
14588 xmlParserInputPtr stream;
14589
14590 if (fd < 0)
14591 return (NULL);
14592
14593 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14594 if (input == NULL)
14595 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014596 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014597 ctxt = xmlNewParserCtxt();
14598 if (ctxt == NULL) {
14599 xmlFreeParserInputBuffer(input);
14600 return (NULL);
14601 }
14602 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14603 if (stream == NULL) {
14604 xmlFreeParserInputBuffer(input);
14605 xmlFreeParserCtxt(ctxt);
14606 return (NULL);
14607 }
14608 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014609 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014610}
14611
14612/**
14613 * xmlReadIO:
14614 * @ioread: an I/O read function
14615 * @ioclose: an I/O close function
14616 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014617 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014618 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014619 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014620 *
14621 * parse an XML document from I/O functions and source and build a tree.
14622 *
14623 * Returns the resulting document tree
14624 */
14625xmlDocPtr
14626xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014627 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014628{
14629 xmlParserCtxtPtr ctxt;
14630 xmlParserInputBufferPtr input;
14631 xmlParserInputPtr stream;
14632
14633 if (ioread == NULL)
14634 return (NULL);
14635
14636 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14637 XML_CHAR_ENCODING_NONE);
14638 if (input == NULL)
14639 return (NULL);
14640 ctxt = xmlNewParserCtxt();
14641 if (ctxt == NULL) {
14642 xmlFreeParserInputBuffer(input);
14643 return (NULL);
14644 }
14645 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14646 if (stream == NULL) {
14647 xmlFreeParserInputBuffer(input);
14648 xmlFreeParserCtxt(ctxt);
14649 return (NULL);
14650 }
14651 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014652 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014653}
14654
14655/**
14656 * xmlCtxtReadDoc:
14657 * @ctxt: an XML parser context
14658 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014659 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014660 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014661 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014662 *
14663 * parse an XML in-memory document and build a tree.
14664 * This reuses the existing @ctxt parser context
14665 *
14666 * Returns the resulting document tree
14667 */
14668xmlDocPtr
14669xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014670 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014671{
14672 xmlParserInputPtr stream;
14673
14674 if (cur == NULL)
14675 return (NULL);
14676 if (ctxt == NULL)
14677 return (NULL);
14678
14679 xmlCtxtReset(ctxt);
14680
14681 stream = xmlNewStringInputStream(ctxt, cur);
14682 if (stream == NULL) {
14683 return (NULL);
14684 }
14685 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014686 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014687}
14688
14689/**
14690 * xmlCtxtReadFile:
14691 * @ctxt: an XML parser context
14692 * @filename: a file or URL
14693 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014694 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014695 *
14696 * parse an XML file from the filesystem or the network.
14697 * This reuses the existing @ctxt parser context
14698 *
14699 * Returns the resulting document tree
14700 */
14701xmlDocPtr
14702xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14703 const char *encoding, int options)
14704{
14705 xmlParserInputPtr stream;
14706
14707 if (filename == NULL)
14708 return (NULL);
14709 if (ctxt == NULL)
14710 return (NULL);
14711
14712 xmlCtxtReset(ctxt);
14713
Daniel Veillard29614c72004-11-26 10:47:26 +000014714 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014715 if (stream == NULL) {
14716 return (NULL);
14717 }
14718 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014719 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014720}
14721
14722/**
14723 * xmlCtxtReadMemory:
14724 * @ctxt: an XML parser context
14725 * @buffer: a pointer to a char array
14726 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014727 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014728 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014729 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014730 *
14731 * parse an XML in-memory document and build a tree.
14732 * This reuses the existing @ctxt parser context
14733 *
14734 * Returns the resulting document tree
14735 */
14736xmlDocPtr
14737xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014738 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014739{
14740 xmlParserInputBufferPtr input;
14741 xmlParserInputPtr stream;
14742
14743 if (ctxt == NULL)
14744 return (NULL);
14745 if (buffer == NULL)
14746 return (NULL);
14747
14748 xmlCtxtReset(ctxt);
14749
14750 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14751 if (input == NULL) {
14752 return(NULL);
14753 }
14754
14755 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14756 if (stream == NULL) {
14757 xmlFreeParserInputBuffer(input);
14758 return(NULL);
14759 }
14760
14761 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014762 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014763}
14764
14765/**
14766 * xmlCtxtReadFd:
14767 * @ctxt: an XML parser context
14768 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014769 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014770 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014771 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014772 *
14773 * parse an XML from a file descriptor and build a tree.
14774 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014775 * NOTE that the file descriptor will not be closed when the
14776 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014777 *
14778 * Returns the resulting document tree
14779 */
14780xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014781xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14782 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014783{
14784 xmlParserInputBufferPtr input;
14785 xmlParserInputPtr stream;
14786
14787 if (fd < 0)
14788 return (NULL);
14789 if (ctxt == NULL)
14790 return (NULL);
14791
14792 xmlCtxtReset(ctxt);
14793
14794
14795 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14796 if (input == NULL)
14797 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014798 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014799 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14800 if (stream == NULL) {
14801 xmlFreeParserInputBuffer(input);
14802 return (NULL);
14803 }
14804 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014805 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014806}
14807
14808/**
14809 * xmlCtxtReadIO:
14810 * @ctxt: an XML parser context
14811 * @ioread: an I/O read function
14812 * @ioclose: an I/O close function
14813 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014814 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014815 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014816 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014817 *
14818 * parse an XML document from I/O functions and source and build a tree.
14819 * This reuses the existing @ctxt parser context
14820 *
14821 * Returns the resulting document tree
14822 */
14823xmlDocPtr
14824xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14825 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014826 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014827 const char *encoding, int options)
14828{
14829 xmlParserInputBufferPtr input;
14830 xmlParserInputPtr stream;
14831
14832 if (ioread == NULL)
14833 return (NULL);
14834 if (ctxt == NULL)
14835 return (NULL);
14836
14837 xmlCtxtReset(ctxt);
14838
14839 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14840 XML_CHAR_ENCODING_NONE);
14841 if (input == NULL)
14842 return (NULL);
14843 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14844 if (stream == NULL) {
14845 xmlFreeParserInputBuffer(input);
14846 return (NULL);
14847 }
14848 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014849 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014850}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014851
14852#define bottom_parser
14853#include "elfgcchack.h"