blob: 1e4164ae047e4cbd4e1d42375bac5959db043adc [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
Rob Richards9c0aa472009-03-26 18:10:19 +000086static xmlParserCtxtPtr
87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090/************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96#define XML_PARSER_BIG_ENTITY 1000
97#define XML_PARSER_LOT_ENTITY 5000
98
99/*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105#define XML_PARSER_NON_LINEAR 10
106
107/*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116static int
117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119{
Daniel Veillardcba68392008-08-29 12:43:40 +0000120 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175}
176
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000177/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000178 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000179 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000185unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000186
Daniel Veillard0fb18932003-09-07 09:14:37 +0000187
Daniel Veillard0161e632008-08-28 15:36:32 +0000188
189#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000190#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000191#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
Owen Taylor3473f882001-02-23 17:55:21 +0000194/*
Owen Taylor3473f882001-02-23 17:55:21 +0000195 * List of XML prefixed PI allowed by W3C specs
196 */
197
Daniel Veillardb44025c2001-10-11 22:55:55 +0000198static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000199 "xml-stylesheet",
200 NULL
201};
202
Daniel Veillarda07050d2003-10-19 14:46:32 +0000203
Owen Taylor3473f882001-02-23 17:55:21 +0000204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200205static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000207
Daniel Veillard7d515752003-09-26 19:12:37 +0000208static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000211 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000212 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000213
Daniel Veillard37334572008-07-31 08:20:02 +0000214static int
215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000218static void
219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000222
Daniel Veillard7d515752003-09-26 19:12:37 +0000223static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000227static int
228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230/************************************************************************
231 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244static void
245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247{
Daniel Veillard157fee02003-10-31 10:36:03 +0000248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200253
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000254 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 (const char *) localname, NULL, NULL, 0, 0,
258 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000259 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 (const char *) prefix, (const char *) localname,
263 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
264 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000265 if (ctxt != NULL) {
266 ctxt->wellFormed = 0;
267 if (ctxt->recovery == 0)
268 ctxt->disableSAX = 1;
269 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000270}
271
272/**
273 * xmlFatalErr:
274 * @ctxt: an XML parser context
275 * @error: the error number
276 * @extra: extra information string
277 *
278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
279 */
280static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000282{
283 const char *errmsg;
284
Daniel Veillard157fee02003-10-31 10:36:03 +0000285 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286 (ctxt->instate == XML_PARSER_EOF))
287 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000288 switch (error) {
289 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "CharRef: invalid hexadecimal value\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg = "CharRef: invalid decimal value\n";
294 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000295 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000296 errmsg = "CharRef: invalid value\n";
297 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000298 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000299 errmsg = "internal error";
300 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000301 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000302 errmsg = "PEReference at end of document\n";
303 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000304 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000305 errmsg = "PEReference in prolog\n";
306 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000307 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000308 errmsg = "PEReference in epilog\n";
309 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000310 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000311 errmsg = "PEReference: no name\n";
312 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000313 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000314 errmsg = "PEReference: expecting ';'\n";
315 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000316 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000317 errmsg = "Detected an entity reference loop\n";
318 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000319 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000320 errmsg = "EntityValue: \" or ' expected\n";
321 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000322 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000323 errmsg = "PEReferences forbidden in internal subset\n";
324 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000325 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 errmsg = "EntityValue: \" or ' expected\n";
327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000329 errmsg = "AttValue: \" or ' expected\n";
330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 errmsg = "Unescaped '<' not allowed in attributes values\n";
333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "SystemLiteral \" or ' expected\n";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000338 errmsg = "Unfinished System or Public ID \" or ' expected\n";
339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000341 errmsg = "Sequence ']]>' not allowed in content\n";
342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000344 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000347 errmsg = "PUBLIC, the Public Identifier is missing\n";
348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350 errmsg = "Comment must not contain '--' (double-hyphen)\n";
351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000353 errmsg = "xmlParsePI : no target name\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 errmsg = "Invalid PI name\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 errmsg = "NOTATION: Name expected here\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 errmsg = "'>' required to close NOTATION declaration\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 errmsg = "Entity value required\n";
366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 errmsg = "Fragment not allowed";
369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 errmsg = "'(' required to start ATTLIST enumeration\n";
372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 errmsg = "NmToken expected in ATTLIST enumeration\n";
375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377 errmsg = "')' required to finish ATTLIST enumeration\n";
378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000383 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386 errmsg = "ContentDecl : Name or '(' expected\n";
387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000392 errmsg =
393 "PEReference: forbidden within markup decl in internal subset\n";
394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 errmsg = "expected '>'\n";
397 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000398 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 errmsg = "XML conditional section '[' expected\n";
400 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000401 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 errmsg = "Content error in the external subset\n";
403 break;
404 case XML_ERR_CONDSEC_INVALID_KEYWORD:
405 errmsg =
406 "conditional section INCLUDE or IGNORE keyword expected\n";
407 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000408 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000409 errmsg = "XML conditional section not closed\n";
410 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000411 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 errmsg = "Text declaration '<?xml' required\n";
413 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000414 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000415 errmsg = "parsing XML declaration: '?>' expected\n";
416 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000417 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000418 errmsg = "external parsed entities cannot be standalone\n";
419 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000420 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000421 errmsg = "EntityRef: expecting ';'\n";
422 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000423 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000424 errmsg = "DOCTYPE improperly terminated\n";
425 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000426 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 errmsg = "EndTag: '</' not found\n";
428 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000429 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 errmsg = "expected '='\n";
431 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000432 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 errmsg = "String not closed expecting \" or '\n";
434 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000435 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 errmsg = "String not started expecting ' or \"\n";
437 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000438 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 errmsg = "Invalid XML encoding name\n";
440 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000441 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 errmsg = "standalone accepts only 'yes' or 'no'\n";
443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 errmsg = "Document is empty\n";
446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 errmsg = "Extra content at the end of the document\n";
449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 errmsg = "chunk is not well balanced\n";
452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 errmsg = "extra content at the end of well balanced chunk\n";
455 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000456 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000457 errmsg = "Malformed declaration expecting version\n";
458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000460 case:
461 errmsg = "\n";
462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 default:
465 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000466 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000467 if (ctxt != NULL)
468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
471 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000472 if (ctxt != NULL) {
473 ctxt->wellFormed = 0;
474 if (ctxt->recovery == 0)
475 ctxt->disableSAX = 1;
476 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477}
478
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000479/**
480 * xmlFatalErrMsg:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 *
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
486 */
487static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000490{
Daniel Veillard157fee02003-10-31 10:36:03 +0000491 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492 (ctxt->instate == XML_PARSER_EOF))
493 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000494 if (ctxt != NULL)
495 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200497 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000498 if (ctxt != NULL) {
499 ctxt->wellFormed = 0;
500 if (ctxt->recovery == 0)
501 ctxt->disableSAX = 1;
502 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000503}
504
505/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000506 * xmlWarningMsg:
507 * @ctxt: an XML parser context
508 * @error: the error number
509 * @msg: the error message
510 * @str1: extra data
511 * @str2: extra data
512 *
513 * Handle a warning.
514 */
515static void
516xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar *str1, const xmlChar *str2)
518{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000519 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000520
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000526 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200527 if (ctxt != NULL) {
528 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000529 (ctxt->sax) ? ctxt->sax->warning : NULL,
530 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000531 ctxt, NULL, XML_FROM_PARSER, error,
532 XML_ERR_WARNING, NULL, 0,
533 (const char *) str1, (const char *) str2, NULL, 0, 0,
534 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200535 } else {
536 __xmlRaiseError(schannel, NULL, NULL,
537 ctxt, NULL, XML_FROM_PARSER, error,
538 XML_ERR_WARNING, NULL, 0,
539 (const char *) str1, (const char *) str2, NULL, 0, 0,
540 msg, (const char *) str1, (const char *) str2);
541 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000542}
543
544/**
545 * xmlValidityError:
546 * @ctxt: an XML parser context
547 * @error: the error number
548 * @msg: the error message
549 * @str1: extra data
550 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000551 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000552 */
553static void
554xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000555 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000556{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000557 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000558
559 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
560 (ctxt->instate == XML_PARSER_EOF))
561 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000562 if (ctxt != NULL) {
563 ctxt->errNo = error;
564 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
565 schannel = ctxt->sax->serror;
566 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200567 if (ctxt != NULL) {
568 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000569 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000570 ctxt, NULL, XML_FROM_DTD, error,
571 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000572 (const char *) str2, NULL, 0, 0,
573 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000574 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200575 } else {
576 __xmlRaiseError(schannel, NULL, NULL,
577 ctxt, NULL, XML_FROM_DTD, error,
578 XML_ERR_ERROR, NULL, 0, (const char *) str1,
579 (const char *) str2, NULL, 0, 0,
580 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000581 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000582}
583
584/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000585 * xmlFatalErrMsgInt:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @val: an integer value
590 *
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592 */
593static void
594xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000595 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
604 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->wellFormed = 0;
607 if (ctxt->recovery == 0)
608 ctxt->disableSAX = 1;
609 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000610}
611
612/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000613 * xmlFatalErrMsgStrIntStr:
614 * @ctxt: an XML parser context
615 * @error: the error number
616 * @msg: the error message
617 * @str1: an string info
618 * @val: an integer value
619 * @str2: an string info
620 *
621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
622 */
623static void
624xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
625 const char *msg, const xmlChar *str1, int val,
626 const xmlChar *str2)
627{
Daniel Veillard157fee02003-10-31 10:36:03 +0000628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000631 if (ctxt != NULL)
632 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000633 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000634 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
635 NULL, 0, (const char *) str1, (const char *) str2,
636 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000637 if (ctxt != NULL) {
638 ctxt->wellFormed = 0;
639 if (ctxt->recovery == 0)
640 ctxt->disableSAX = 1;
641 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000642}
643
644/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000645 * xmlFatalErrMsgStr:
646 * @ctxt: an XML parser context
647 * @error: the error number
648 * @msg: the error message
649 * @val: a string value
650 *
651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
652 */
653static void
654xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000655 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000656{
Daniel Veillard157fee02003-10-31 10:36:03 +0000657 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
658 (ctxt->instate == XML_PARSER_EOF))
659 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000660 if (ctxt != NULL)
661 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000662 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000663 XML_FROM_PARSER, error, XML_ERR_FATAL,
664 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
665 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000666 if (ctxt != NULL) {
667 ctxt->wellFormed = 0;
668 if (ctxt->recovery == 0)
669 ctxt->disableSAX = 1;
670 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000671}
672
673/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000674 * xmlErrMsgStr:
675 * @ctxt: an XML parser context
676 * @error: the error number
677 * @msg: the error message
678 * @val: a string value
679 *
680 * Handle a non fatal parser error
681 */
682static void
683xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
684 const char *msg, const xmlChar * val)
685{
Daniel Veillard157fee02003-10-31 10:36:03 +0000686 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687 (ctxt->instate == XML_PARSER_EOF))
688 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000689 if (ctxt != NULL)
690 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000691 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000692 XML_FROM_PARSER, error, XML_ERR_ERROR,
693 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
694 val);
695}
696
697/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000698 * xmlNsErr:
699 * @ctxt: an XML parser context
700 * @error: the error number
701 * @msg: the message
702 * @info1: extra information string
703 * @info2: extra information string
704 *
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706 */
707static void
708xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000710 const xmlChar * info1, const xmlChar * info2,
711 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000712{
Daniel Veillard157fee02003-10-31 10:36:03 +0000713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714 (ctxt->instate == XML_PARSER_EOF))
715 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000716 if (ctxt != NULL)
717 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000719 XML_ERR_ERROR, NULL, 0, (const char *) info1,
720 (const char *) info2, (const char *) info3, 0, 0, msg,
721 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000722 if (ctxt != NULL)
723 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000724}
725
Daniel Veillard37334572008-07-31 08:20:02 +0000726/**
727 * xmlNsWarn
728 * @ctxt: an XML parser context
729 * @error: the error number
730 * @msg: the message
731 * @info1: extra information string
732 * @info2: extra information string
733 *
734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
735 */
736static void
737xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
738 const char *msg,
739 const xmlChar * info1, const xmlChar * info2,
740 const xmlChar * info3)
741{
742 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
743 (ctxt->instate == XML_PARSER_EOF))
744 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746 XML_ERR_WARNING, NULL, 0, (const char *) info1,
747 (const char *) info2, (const char *) info3, 0, 0, msg,
748 info1, info2, info3);
749}
750
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000751/************************************************************************
752 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000753 * Library wide options *
754 * *
755 ************************************************************************/
756
757/**
758 * xmlHasFeature:
759 * @feature: the feature to be examined
760 *
761 * Examines if the library has been compiled with a given feature.
762 *
763 * Returns a non-zero value if the feature exist, otherwise zero.
764 * Returns zero (0) if the feature does not exist or an unknown
765 * unknown feature is requested, non-zero otherwise.
766 */
767int
768xmlHasFeature(xmlFeature feature)
769{
770 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_THREAD_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_TREE_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_OUTPUT_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_PUSH_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_READER_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef LIBXML_PATTERN_ENABLED
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_WRITER_ENABLED
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000813 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000814#ifdef LIBXML_SAX1_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000819 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000820#ifdef LIBXML_FTP_ENABLED
821 return(1);
822#else
823 return(0);
824#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000825 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000826#ifdef LIBXML_HTTP_ENABLED
827 return(1);
828#else
829 return(0);
830#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000831 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000832#ifdef LIBXML_VALID_ENABLED
833 return(1);
834#else
835 return(0);
836#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000837 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000838#ifdef LIBXML_HTML_ENABLED
839 return(1);
840#else
841 return(0);
842#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000843 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000844#ifdef LIBXML_LEGACY_ENABLED
845 return(1);
846#else
847 return(0);
848#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000849 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000850#ifdef LIBXML_C14N_ENABLED
851 return(1);
852#else
853 return(0);
854#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000855 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000856#ifdef LIBXML_CATALOG_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000861 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000862#ifdef LIBXML_XPATH_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000867 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000868#ifdef LIBXML_XPTR_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000873 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000874#ifdef LIBXML_XINCLUDE_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000879 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000880#ifdef LIBXML_ICONV_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000885 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000886#ifdef LIBXML_ISO8859X_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000891 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000892#ifdef LIBXML_UNICODE_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000897 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000898#ifdef LIBXML_REGEXP_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000903 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000904#ifdef LIBXML_AUTOMATA_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000909 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000910#ifdef LIBXML_EXPR_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000915 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000916#ifdef LIBXML_SCHEMAS_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000921 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000922#ifdef LIBXML_SCHEMATRON_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000927 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000928#ifdef LIBXML_MODULES_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000933 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000934#ifdef LIBXML_DEBUG_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000939 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940#ifdef DEBUG_MEMORY_LOCATION
941 return(1);
942#else
943 return(0);
944#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000945 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000946#ifdef LIBXML_DEBUG_RUNTIME
947 return(1);
948#else
949 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000950#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000951 case XML_WITH_ZLIB:
952#ifdef LIBXML_ZLIB_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000957 default:
958 break;
959 }
960 return(0);
961}
962
963/************************************************************************
964 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000965 * SAX2 defaulted attributes handling *
966 * *
967 ************************************************************************/
968
969/**
970 * xmlDetectSAX2:
971 * @ctxt: an XML parser context
972 *
973 * Do the SAX2 detection and specific intialization
974 */
975static void
976xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
977 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000978#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000979 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
980 ((ctxt->sax->startElementNs != NULL) ||
981 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000982#else
983 ctxt->sax2 = 1;
984#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985
986 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
987 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
988 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000989 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
990 (ctxt->str_xml_ns == NULL)) {
991 xmlErrMemory(ctxt, NULL);
992 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000993}
994
Daniel Veillarde57ec792003-09-10 10:50:59 +0000995typedef struct _xmlDefAttrs xmlDefAttrs;
996typedef xmlDefAttrs *xmlDefAttrsPtr;
997struct _xmlDefAttrs {
998 int nbAttrs; /* number of defaulted attributes on that element */
999 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001000 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001001};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001002
1003/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001004 * xmlAttrNormalizeSpace:
1005 * @src: the source string
1006 * @dst: the target string
1007 *
1008 * Normalize the space in non CDATA attribute values:
1009 * If the attribute type is not CDATA, then the XML processor MUST further
1010 * process the normalized attribute value by discarding any leading and
1011 * trailing space (#x20) characters, and by replacing sequences of space
1012 * (#x20) characters by a single space (#x20) character.
1013 * Note that the size of dst need to be at least src, and if one doesn't need
1014 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1015 * passing src as dst is just fine.
1016 *
1017 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1018 * is needed.
1019 */
1020static xmlChar *
1021xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1022{
1023 if ((src == NULL) || (dst == NULL))
1024 return(NULL);
1025
1026 while (*src == 0x20) src++;
1027 while (*src != 0) {
1028 if (*src == 0x20) {
1029 while (*src == 0x20) src++;
1030 if (*src != 0)
1031 *dst++ = 0x20;
1032 } else {
1033 *dst++ = *src++;
1034 }
1035 }
1036 *dst = 0;
1037 if (dst == src)
1038 return(NULL);
1039 return(dst);
1040}
1041
1042/**
1043 * xmlAttrNormalizeSpace2:
1044 * @src: the source string
1045 *
1046 * Normalize the space in non CDATA attribute values, a slightly more complex
1047 * front end to avoid allocation problems when running on attribute values
1048 * coming from the input.
1049 *
1050 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1051 * is needed.
1052 */
1053static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001054xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001055{
1056 int i;
1057 int remove_head = 0;
1058 int need_realloc = 0;
1059 const xmlChar *cur;
1060
1061 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1062 return(NULL);
1063 i = *len;
1064 if (i <= 0)
1065 return(NULL);
1066
1067 cur = src;
1068 while (*cur == 0x20) {
1069 cur++;
1070 remove_head++;
1071 }
1072 while (*cur != 0) {
1073 if (*cur == 0x20) {
1074 cur++;
1075 if ((*cur == 0x20) || (*cur == 0)) {
1076 need_realloc = 1;
1077 break;
1078 }
1079 } else
1080 cur++;
1081 }
1082 if (need_realloc) {
1083 xmlChar *ret;
1084
1085 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1086 if (ret == NULL) {
1087 xmlErrMemory(ctxt, NULL);
1088 return(NULL);
1089 }
1090 xmlAttrNormalizeSpace(ret, ret);
1091 *len = (int) strlen((const char *)ret);
1092 return(ret);
1093 } else if (remove_head) {
1094 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001095 memmove(src, src + remove_head, 1 + *len);
1096 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001097 }
1098 return(NULL);
1099}
1100
1101/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102 * xmlAddDefAttrs:
1103 * @ctxt: an XML parser context
1104 * @fullname: the element fullname
1105 * @fullattr: the attribute fullname
1106 * @value: the attribute value
1107 *
1108 * Add a defaulted attribute for an element
1109 */
1110static void
1111xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1112 const xmlChar *fullname,
1113 const xmlChar *fullattr,
1114 const xmlChar *value) {
1115 xmlDefAttrsPtr defaults;
1116 int len;
1117 const xmlChar *name;
1118 const xmlChar *prefix;
1119
Daniel Veillard6a31b832008-03-26 14:06:44 +00001120 /*
1121 * Allows to detect attribute redefinitions
1122 */
1123 if (ctxt->attsSpecial != NULL) {
1124 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1125 return;
1126 }
1127
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001129 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130 if (ctxt->attsDefault == NULL)
1131 goto mem_error;
1132 }
1133
1134 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001135 * split the element name into prefix:localname , the string found
1136 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001137 */
1138 name = xmlSplitQName3(fullname, &len);
1139 if (name == NULL) {
1140 name = xmlDictLookup(ctxt->dict, fullname, -1);
1141 prefix = NULL;
1142 } else {
1143 name = xmlDictLookup(ctxt->dict, name, -1);
1144 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1145 }
1146
1147 /*
1148 * make sure there is some storage
1149 */
1150 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1151 if (defaults == NULL) {
1152 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001153 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 if (defaults == NULL)
1155 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001157 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001158 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1159 defaults, NULL) < 0) {
1160 xmlFree(defaults);
1161 goto mem_error;
1162 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001163 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001164 xmlDefAttrsPtr temp;
1165
1166 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001167 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001168 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001169 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001170 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001171 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001172 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1173 defaults, NULL) < 0) {
1174 xmlFree(defaults);
1175 goto mem_error;
1176 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 }
1178
1179 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001180 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001181 * are within the DTD and hen not associated to namespace names.
1182 */
1183 name = xmlSplitQName3(fullattr, &len);
1184 if (name == NULL) {
1185 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1186 prefix = NULL;
1187 } else {
1188 name = xmlDictLookup(ctxt->dict, name, -1);
1189 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1190 }
1191
Daniel Veillardae0765b2008-07-31 19:54:59 +00001192 defaults->values[5 * defaults->nbAttrs] = name;
1193 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001194 /* intern the string and precompute the end */
1195 len = xmlStrlen(value);
1196 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001197 defaults->values[5 * defaults->nbAttrs + 2] = value;
1198 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1199 if (ctxt->external)
1200 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1201 else
1202 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001203 defaults->nbAttrs++;
1204
1205 return;
1206
1207mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001208 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 return;
1210}
1211
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001212/**
1213 * xmlAddSpecialAttr:
1214 * @ctxt: an XML parser context
1215 * @fullname: the element fullname
1216 * @fullattr: the attribute fullname
1217 * @type: the attribute type
1218 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001219 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001220 */
1221static void
1222xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1223 const xmlChar *fullname,
1224 const xmlChar *fullattr,
1225 int type)
1226{
1227 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001228 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001229 if (ctxt->attsSpecial == NULL)
1230 goto mem_error;
1231 }
1232
Daniel Veillardac4118d2008-01-11 05:27:32 +00001233 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1234 return;
1235
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001236 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1237 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001238 return;
1239
1240mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001241 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001242 return;
1243}
1244
Daniel Veillard4432df22003-09-28 18:58:27 +00001245/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001246 * xmlCleanSpecialAttrCallback:
1247 *
1248 * Removes CDATA attributes from the special attribute table
1249 */
1250static void
1251xmlCleanSpecialAttrCallback(void *payload, void *data,
1252 const xmlChar *fullname, const xmlChar *fullattr,
1253 const xmlChar *unused ATTRIBUTE_UNUSED) {
1254 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1255
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001256 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001257 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1258 }
1259}
1260
1261/**
1262 * xmlCleanSpecialAttr:
1263 * @ctxt: an XML parser context
1264 *
1265 * Trim the list of attributes defined to remove all those of type
1266 * CDATA as they are not special. This call should be done when finishing
1267 * to parse the DTD and before starting to parse the document root.
1268 */
1269static void
1270xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1271{
1272 if (ctxt->attsSpecial == NULL)
1273 return;
1274
1275 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1276
1277 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1278 xmlHashFree(ctxt->attsSpecial, NULL);
1279 ctxt->attsSpecial = NULL;
1280 }
1281 return;
1282}
1283
1284/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001285 * xmlCheckLanguageID:
1286 * @lang: pointer to the string value
1287 *
1288 * Checks that the value conforms to the LanguageID production:
1289 *
1290 * NOTE: this is somewhat deprecated, those productions were removed from
1291 * the XML Second edition.
1292 *
1293 * [33] LanguageID ::= Langcode ('-' Subcode)*
1294 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1295 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1296 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1297 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1298 * [38] Subcode ::= ([a-z] | [A-Z])+
1299 *
1300 * Returns 1 if correct 0 otherwise
1301 **/
1302int
1303xmlCheckLanguageID(const xmlChar * lang)
1304{
1305 const xmlChar *cur = lang;
1306
1307 if (cur == NULL)
1308 return (0);
1309 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1310 ((cur[0] == 'I') && (cur[1] == '-'))) {
1311 /*
1312 * IANA code
1313 */
1314 cur += 2;
1315 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1316 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1317 cur++;
1318 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1319 ((cur[0] == 'X') && (cur[1] == '-'))) {
1320 /*
1321 * User code
1322 */
1323 cur += 2;
1324 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1325 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1326 cur++;
1327 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1328 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1329 /*
1330 * ISO639
1331 */
1332 cur++;
1333 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1334 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1335 cur++;
1336 else
1337 return (0);
1338 } else
1339 return (0);
1340 while (cur[0] != 0) { /* non input consuming */
1341 if (cur[0] != '-')
1342 return (0);
1343 cur++;
1344 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1345 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1346 cur++;
1347 else
1348 return (0);
1349 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1350 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1351 cur++;
1352 }
1353 return (1);
1354}
1355
Owen Taylor3473f882001-02-23 17:55:21 +00001356/************************************************************************
1357 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001358 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001359 * *
1360 ************************************************************************/
1361
Daniel Veillard8ed10722009-08-20 19:17:36 +02001362static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1363 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001364
Daniel Veillard0fb18932003-09-07 09:14:37 +00001365#ifdef SAX2
1366/**
1367 * nsPush:
1368 * @ctxt: an XML parser context
1369 * @prefix: the namespace prefix or NULL
1370 * @URL: the namespace name
1371 *
1372 * Pushes a new parser namespace on top of the ns stack
1373 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001374 * Returns -1 in case of error, -2 if the namespace should be discarded
1375 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001376 */
1377static int
1378nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1379{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001380 if (ctxt->options & XML_PARSE_NSCLEAN) {
1381 int i;
1382 for (i = 0;i < ctxt->nsNr;i += 2) {
1383 if (ctxt->nsTab[i] == prefix) {
1384 /* in scope */
1385 if (ctxt->nsTab[i + 1] == URL)
1386 return(-2);
1387 /* out of scope keep it */
1388 break;
1389 }
1390 }
1391 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001392 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1393 ctxt->nsMax = 10;
1394 ctxt->nsNr = 0;
1395 ctxt->nsTab = (const xmlChar **)
1396 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1397 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001398 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001399 ctxt->nsMax = 0;
1400 return (-1);
1401 }
1402 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001403 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001404 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001405 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1406 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1407 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001408 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001409 ctxt->nsMax /= 2;
1410 return (-1);
1411 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001412 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001413 }
1414 ctxt->nsTab[ctxt->nsNr++] = prefix;
1415 ctxt->nsTab[ctxt->nsNr++] = URL;
1416 return (ctxt->nsNr);
1417}
1418/**
1419 * nsPop:
1420 * @ctxt: an XML parser context
1421 * @nr: the number to pop
1422 *
1423 * Pops the top @nr parser prefix/namespace from the ns stack
1424 *
1425 * Returns the number of namespaces removed
1426 */
1427static int
1428nsPop(xmlParserCtxtPtr ctxt, int nr)
1429{
1430 int i;
1431
1432 if (ctxt->nsTab == NULL) return(0);
1433 if (ctxt->nsNr < nr) {
1434 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1435 nr = ctxt->nsNr;
1436 }
1437 if (ctxt->nsNr <= 0)
1438 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001439
Daniel Veillard0fb18932003-09-07 09:14:37 +00001440 for (i = 0;i < nr;i++) {
1441 ctxt->nsNr--;
1442 ctxt->nsTab[ctxt->nsNr] = NULL;
1443 }
1444 return(nr);
1445}
1446#endif
1447
1448static int
1449xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1450 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001451 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001452 int maxatts;
1453
1454 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001455 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001456 atts = (const xmlChar **)
1457 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001458 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001459 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001460 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1461 if (attallocs == NULL) goto mem_error;
1462 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001463 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001464 } else if (nr + 5 > ctxt->maxatts) {
1465 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001466 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1467 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001468 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001469 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001470 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1471 (maxatts / 5) * sizeof(int));
1472 if (attallocs == NULL) goto mem_error;
1473 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001474 ctxt->maxatts = maxatts;
1475 }
1476 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001477mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001478 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001479 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001480}
1481
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001482/**
1483 * inputPush:
1484 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001485 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001486 *
1487 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001488 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001489 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001490 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001491int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001492inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1493{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001494 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001495 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001496 if (ctxt->inputNr >= ctxt->inputMax) {
1497 ctxt->inputMax *= 2;
1498 ctxt->inputTab =
1499 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1500 ctxt->inputMax *
1501 sizeof(ctxt->inputTab[0]));
1502 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001503 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001504 xmlFreeInputStream(value);
1505 ctxt->inputMax /= 2;
1506 value = NULL;
1507 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001508 }
1509 }
1510 ctxt->inputTab[ctxt->inputNr] = value;
1511 ctxt->input = value;
1512 return (ctxt->inputNr++);
1513}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001514/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001515 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001516 * @ctxt: an XML parser context
1517 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001518 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001519 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001520 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001521 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001522xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001523inputPop(xmlParserCtxtPtr ctxt)
1524{
1525 xmlParserInputPtr ret;
1526
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001527 if (ctxt == NULL)
1528 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001529 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001530 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001531 ctxt->inputNr--;
1532 if (ctxt->inputNr > 0)
1533 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1534 else
1535 ctxt->input = NULL;
1536 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001537 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001538 return (ret);
1539}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001540/**
1541 * nodePush:
1542 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001543 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001544 *
1545 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001546 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001547 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001548 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001549int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001550nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1551{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001552 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001553 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001554 xmlNodePtr *tmp;
1555
1556 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1557 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001558 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001559 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001560 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001561 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001562 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001563 ctxt->nodeTab = tmp;
1564 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001565 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001566 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1567 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001568 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001569 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001570 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001571 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001572 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001573 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001574 ctxt->nodeTab[ctxt->nodeNr] = value;
1575 ctxt->node = value;
1576 return (ctxt->nodeNr++);
1577}
Daniel Veillard8915c152008-08-26 13:05:34 +00001578
Daniel Veillard1c732d22002-11-30 11:22:59 +00001579/**
1580 * nodePop:
1581 * @ctxt: an XML parser context
1582 *
1583 * Pops the top element node from the node stack
1584 *
1585 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001586 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001587xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001588nodePop(xmlParserCtxtPtr ctxt)
1589{
1590 xmlNodePtr ret;
1591
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001592 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001593 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001594 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001595 ctxt->nodeNr--;
1596 if (ctxt->nodeNr > 0)
1597 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1598 else
1599 ctxt->node = NULL;
1600 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001601 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001602 return (ret);
1603}
Daniel Veillarda2351322004-06-27 12:08:10 +00001604
1605#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001606/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001607 * nameNsPush:
1608 * @ctxt: an XML parser context
1609 * @value: the element name
1610 * @prefix: the element prefix
1611 * @URI: the element namespace name
1612 *
1613 * Pushes a new element name/prefix/URL on top of the name stack
1614 *
1615 * Returns -1 in case of error, the index in the stack otherwise
1616 */
1617static int
1618nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1619 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1620{
1621 if (ctxt->nameNr >= ctxt->nameMax) {
1622 const xmlChar * *tmp;
1623 void **tmp2;
1624 ctxt->nameMax *= 2;
1625 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1626 ctxt->nameMax *
1627 sizeof(ctxt->nameTab[0]));
1628 if (tmp == NULL) {
1629 ctxt->nameMax /= 2;
1630 goto mem_error;
1631 }
1632 ctxt->nameTab = tmp;
1633 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1634 ctxt->nameMax * 3 *
1635 sizeof(ctxt->pushTab[0]));
1636 if (tmp2 == NULL) {
1637 ctxt->nameMax /= 2;
1638 goto mem_error;
1639 }
1640 ctxt->pushTab = tmp2;
1641 }
1642 ctxt->nameTab[ctxt->nameNr] = value;
1643 ctxt->name = value;
1644 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1645 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001646 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001647 return (ctxt->nameNr++);
1648mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001649 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001650 return (-1);
1651}
1652/**
1653 * nameNsPop:
1654 * @ctxt: an XML parser context
1655 *
1656 * Pops the top element/prefix/URI name from the name stack
1657 *
1658 * Returns the name just removed
1659 */
1660static const xmlChar *
1661nameNsPop(xmlParserCtxtPtr ctxt)
1662{
1663 const xmlChar *ret;
1664
1665 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001666 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001667 ctxt->nameNr--;
1668 if (ctxt->nameNr > 0)
1669 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1670 else
1671 ctxt->name = NULL;
1672 ret = ctxt->nameTab[ctxt->nameNr];
1673 ctxt->nameTab[ctxt->nameNr] = NULL;
1674 return (ret);
1675}
Daniel Veillarda2351322004-06-27 12:08:10 +00001676#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001677
1678/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 * namePush:
1680 * @ctxt: an XML parser context
1681 * @value: the element name
1682 *
1683 * Pushes a new element name on top of the name stack
1684 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001685 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001687int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001688namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001689{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001690 if (ctxt == NULL) return (-1);
1691
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001693 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001694 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001695 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001696 ctxt->nameMax *
1697 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001698 if (tmp == NULL) {
1699 ctxt->nameMax /= 2;
1700 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001701 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001702 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001703 }
1704 ctxt->nameTab[ctxt->nameNr] = value;
1705 ctxt->name = value;
1706 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001707mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001709 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001710}
1711/**
1712 * namePop:
1713 * @ctxt: an XML parser context
1714 *
1715 * Pops the top element name from the name stack
1716 *
1717 * Returns the name just removed
1718 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001719const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001720namePop(xmlParserCtxtPtr ctxt)
1721{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001722 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001723
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001724 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1725 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001726 ctxt->nameNr--;
1727 if (ctxt->nameNr > 0)
1728 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1729 else
1730 ctxt->name = NULL;
1731 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001732 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001733 return (ret);
1734}
Owen Taylor3473f882001-02-23 17:55:21 +00001735
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001736static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001737 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001738 int *tmp;
1739
Owen Taylor3473f882001-02-23 17:55:21 +00001740 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001741 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1742 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1743 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001744 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001745 ctxt->spaceMax /=2;
1746 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001747 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001748 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001749 }
1750 ctxt->spaceTab[ctxt->spaceNr] = val;
1751 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1752 return(ctxt->spaceNr++);
1753}
1754
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001755static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001756 int ret;
1757 if (ctxt->spaceNr <= 0) return(0);
1758 ctxt->spaceNr--;
1759 if (ctxt->spaceNr > 0)
1760 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1761 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001762 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001763 ret = ctxt->spaceTab[ctxt->spaceNr];
1764 ctxt->spaceTab[ctxt->spaceNr] = -1;
1765 return(ret);
1766}
1767
1768/*
1769 * Macros for accessing the content. Those should be used only by the parser,
1770 * and not exported.
1771 *
1772 * Dirty macros, i.e. one often need to make assumption on the context to
1773 * use them
1774 *
1775 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1776 * To be used with extreme caution since operations consuming
1777 * characters may move the input buffer to a different location !
1778 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1779 * This should be used internally by the parser
1780 * only to compare to ASCII values otherwise it would break when
1781 * running with UTF-8 encoding.
1782 * RAW same as CUR but in the input buffer, bypass any token
1783 * extraction that may have been done
1784 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1785 * to compare on ASCII based substring.
1786 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001787 * strings without newlines within the parser.
1788 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1789 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001790 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1791 *
1792 * NEXT Skip to the next character, this does the proper decoding
1793 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001794 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001795 * CUR_CHAR(l) returns the current unicode character (int), set l
1796 * to the number of xmlChars used for the encoding [0-5].
1797 * CUR_SCHAR same but operate on a string instead of the context
1798 * COPY_BUF copy the current unicode char to the target buffer, increment
1799 * the index
1800 * GROW, SHRINK handling of input buffers
1801 */
1802
Daniel Veillardfdc91562002-07-01 21:52:03 +00001803#define RAW (*ctxt->input->cur)
1804#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001805#define NXT(val) ctxt->input->cur[(val)]
1806#define CUR_PTR ctxt->input->cur
1807
Daniel Veillarda07050d2003-10-19 14:46:32 +00001808#define CMP4( s, c1, c2, c3, c4 ) \
1809 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1810 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1811#define CMP5( s, c1, c2, c3, c4, c5 ) \
1812 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1813#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1814 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1815#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1816 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1817#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1818 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1819#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1820 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1821 ((unsigned char *) s)[ 8 ] == c9 )
1822#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1823 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1824 ((unsigned char *) s)[ 9 ] == c10 )
1825
Owen Taylor3473f882001-02-23 17:55:21 +00001826#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001827 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001828 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001829 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001830 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1831 xmlPopInput(ctxt); \
1832 } while (0)
1833
Daniel Veillard0b787f32004-03-26 17:29:53 +00001834#define SKIPL(val) do { \
1835 int skipl; \
1836 for(skipl=0; skipl<val; skipl++) { \
1837 if (*(ctxt->input->cur) == '\n') { \
1838 ctxt->input->line++; ctxt->input->col = 1; \
1839 } else ctxt->input->col++; \
1840 ctxt->nbChars++; \
1841 ctxt->input->cur++; \
1842 } \
1843 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1844 if ((*ctxt->input->cur == 0) && \
1845 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1846 xmlPopInput(ctxt); \
1847 } while (0)
1848
Daniel Veillarda880b122003-04-21 21:36:41 +00001849#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001850 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1851 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001852 xmlSHRINK (ctxt);
1853
1854static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1855 xmlParserInputShrink(ctxt->input);
1856 if ((*ctxt->input->cur == 0) &&
1857 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1858 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001859 }
Owen Taylor3473f882001-02-23 17:55:21 +00001860
Daniel Veillarda880b122003-04-21 21:36:41 +00001861#define GROW if ((ctxt->progressive == 0) && \
1862 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001863 xmlGROW (ctxt);
1864
1865static void xmlGROW (xmlParserCtxtPtr ctxt) {
1866 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1867 if ((*ctxt->input->cur == 0) &&
1868 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1869 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001870}
Owen Taylor3473f882001-02-23 17:55:21 +00001871
1872#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1873
1874#define NEXT xmlNextChar(ctxt)
1875
Daniel Veillard21a0f912001-02-25 19:54:14 +00001876#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001877 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001878 ctxt->input->cur++; \
1879 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001880 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001881 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1882 }
1883
Owen Taylor3473f882001-02-23 17:55:21 +00001884#define NEXTL(l) do { \
1885 if (*(ctxt->input->cur) == '\n') { \
1886 ctxt->input->line++; ctxt->input->col = 1; \
1887 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001888 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001889 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001890 } while (0)
1891
1892#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1893#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1894
1895#define COPY_BUF(l,b,i,v) \
1896 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001897 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001898
1899/**
1900 * xmlSkipBlankChars:
1901 * @ctxt: the XML parser context
1902 *
1903 * skip all blanks character found at that point in the input streams.
1904 * It pops up finished entities in the process if allowable at that point.
1905 *
1906 * Returns the number of space chars skipped
1907 */
1908
1909int
1910xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001911 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001912
1913 /*
1914 * It's Okay to use CUR/NEXT here since all the blanks are on
1915 * the ASCII range.
1916 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001917 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1918 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001919 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001920 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001921 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001922 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001923 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001924 if (*cur == '\n') {
1925 ctxt->input->line++; ctxt->input->col = 1;
1926 }
1927 cur++;
1928 res++;
1929 if (*cur == 0) {
1930 ctxt->input->cur = cur;
1931 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1932 cur = ctxt->input->cur;
1933 }
1934 }
1935 ctxt->input->cur = cur;
1936 } else {
1937 int cur;
1938 do {
1939 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001940 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001941 NEXT;
1942 cur = CUR;
1943 res++;
1944 }
1945 while ((cur == 0) && (ctxt->inputNr > 1) &&
1946 (ctxt->instate != XML_PARSER_COMMENT)) {
1947 xmlPopInput(ctxt);
1948 cur = CUR;
1949 }
1950 /*
1951 * Need to handle support of entities branching here
1952 */
1953 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1954 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1955 }
Owen Taylor3473f882001-02-23 17:55:21 +00001956 return(res);
1957}
1958
1959/************************************************************************
1960 * *
1961 * Commodity functions to handle entities *
1962 * *
1963 ************************************************************************/
1964
1965/**
1966 * xmlPopInput:
1967 * @ctxt: an XML parser context
1968 *
1969 * xmlPopInput: the current input pointed by ctxt->input came to an end
1970 * pop it and return the next char.
1971 *
1972 * Returns the current xmlChar in the parser context
1973 */
1974xmlChar
1975xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001976 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 if (xmlParserDebugEntities)
1978 xmlGenericError(xmlGenericErrorContext,
1979 "Popping input %d\n", ctxt->inputNr);
1980 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001981 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001982 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1983 return(xmlPopInput(ctxt));
1984 return(CUR);
1985}
1986
1987/**
1988 * xmlPushInput:
1989 * @ctxt: an XML parser context
1990 * @input: an XML parser input fragment (entity, XML fragment ...).
1991 *
1992 * xmlPushInput: switch to a new input stream which is stacked on top
1993 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001994 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001995 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001996int
Owen Taylor3473f882001-02-23 17:55:21 +00001997xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001998 int ret;
1999 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002000
2001 if (xmlParserDebugEntities) {
2002 if ((ctxt->input != NULL) && (ctxt->input->filename))
2003 xmlGenericError(xmlGenericErrorContext,
2004 "%s(%d): ", ctxt->input->filename,
2005 ctxt->input->line);
2006 xmlGenericError(xmlGenericErrorContext,
2007 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2008 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002009 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002010 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002011 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002012}
2013
2014/**
2015 * xmlParseCharRef:
2016 * @ctxt: an XML parser context
2017 *
2018 * parse Reference declarations
2019 *
2020 * [66] CharRef ::= '&#' [0-9]+ ';' |
2021 * '&#x' [0-9a-fA-F]+ ';'
2022 *
2023 * [ WFC: Legal Character ]
2024 * Characters referred to using character references must match the
2025 * production for Char.
2026 *
2027 * Returns the value parsed (as an int), 0 in case of error
2028 */
2029int
2030xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002031 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002032 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002033 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002034
Owen Taylor3473f882001-02-23 17:55:21 +00002035 /*
2036 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2037 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002038 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002039 (NXT(2) == 'x')) {
2040 SKIP(3);
2041 GROW;
2042 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002043 if (count++ > 20) {
2044 count = 0;
2045 GROW;
2046 }
2047 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002048 val = val * 16 + (CUR - '0');
2049 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2050 val = val * 16 + (CUR - 'a') + 10;
2051 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2052 val = val * 16 + (CUR - 'A') + 10;
2053 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002054 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002055 val = 0;
2056 break;
2057 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002058 if (val > 0x10FFFF)
2059 outofrange = val;
2060
Owen Taylor3473f882001-02-23 17:55:21 +00002061 NEXT;
2062 count++;
2063 }
2064 if (RAW == ';') {
2065 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002066 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002067 ctxt->nbChars ++;
2068 ctxt->input->cur++;
2069 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002070 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002071 SKIP(2);
2072 GROW;
2073 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002074 if (count++ > 20) {
2075 count = 0;
2076 GROW;
2077 }
2078 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002079 val = val * 10 + (CUR - '0');
2080 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002081 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002082 val = 0;
2083 break;
2084 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002085 if (val > 0x10FFFF)
2086 outofrange = val;
2087
Owen Taylor3473f882001-02-23 17:55:21 +00002088 NEXT;
2089 count++;
2090 }
2091 if (RAW == ';') {
2092 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002093 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002094 ctxt->nbChars ++;
2095 ctxt->input->cur++;
2096 }
2097 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002098 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002099 }
2100
2101 /*
2102 * [ WFC: Legal Character ]
2103 * Characters referred to using character references must match the
2104 * production for Char.
2105 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002106 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002107 return(val);
2108 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002109 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2110 "xmlParseCharRef: invalid xmlChar value %d\n",
2111 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002112 }
2113 return(0);
2114}
2115
2116/**
2117 * xmlParseStringCharRef:
2118 * @ctxt: an XML parser context
2119 * @str: a pointer to an index in the string
2120 *
2121 * parse Reference declarations, variant parsing from a string rather
2122 * than an an input flow.
2123 *
2124 * [66] CharRef ::= '&#' [0-9]+ ';' |
2125 * '&#x' [0-9a-fA-F]+ ';'
2126 *
2127 * [ WFC: Legal Character ]
2128 * Characters referred to using character references must match the
2129 * production for Char.
2130 *
2131 * Returns the value parsed (as an int), 0 in case of error, str will be
2132 * updated to the current value of the index
2133 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002134static int
Owen Taylor3473f882001-02-23 17:55:21 +00002135xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2136 const xmlChar *ptr;
2137 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002138 unsigned int val = 0;
2139 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002140
2141 if ((str == NULL) || (*str == NULL)) return(0);
2142 ptr = *str;
2143 cur = *ptr;
2144 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2145 ptr += 3;
2146 cur = *ptr;
2147 while (cur != ';') { /* Non input consuming loop */
2148 if ((cur >= '0') && (cur <= '9'))
2149 val = val * 16 + (cur - '0');
2150 else if ((cur >= 'a') && (cur <= 'f'))
2151 val = val * 16 + (cur - 'a') + 10;
2152 else if ((cur >= 'A') && (cur <= 'F'))
2153 val = val * 16 + (cur - 'A') + 10;
2154 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002155 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002156 val = 0;
2157 break;
2158 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002159 if (val > 0x10FFFF)
2160 outofrange = val;
2161
Owen Taylor3473f882001-02-23 17:55:21 +00002162 ptr++;
2163 cur = *ptr;
2164 }
2165 if (cur == ';')
2166 ptr++;
2167 } else if ((cur == '&') && (ptr[1] == '#')){
2168 ptr += 2;
2169 cur = *ptr;
2170 while (cur != ';') { /* Non input consuming loops */
2171 if ((cur >= '0') && (cur <= '9'))
2172 val = val * 10 + (cur - '0');
2173 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002174 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002175 val = 0;
2176 break;
2177 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002178 if (val > 0x10FFFF)
2179 outofrange = val;
2180
Owen Taylor3473f882001-02-23 17:55:21 +00002181 ptr++;
2182 cur = *ptr;
2183 }
2184 if (cur == ';')
2185 ptr++;
2186 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002187 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002188 return(0);
2189 }
2190 *str = ptr;
2191
2192 /*
2193 * [ WFC: Legal Character ]
2194 * Characters referred to using character references must match the
2195 * production for Char.
2196 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002197 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002198 return(val);
2199 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002200 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2201 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2202 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002203 }
2204 return(0);
2205}
2206
2207/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002208 * xmlNewBlanksWrapperInputStream:
2209 * @ctxt: an XML parser context
2210 * @entity: an Entity pointer
2211 *
2212 * Create a new input stream for wrapping
2213 * blanks around a PEReference
2214 *
2215 * Returns the new input stream or NULL
2216 */
2217
2218static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2219
Daniel Veillardf4862f02002-09-10 11:13:43 +00002220static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002221xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2222 xmlParserInputPtr input;
2223 xmlChar *buffer;
2224 size_t length;
2225 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002226 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2227 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002228 return(NULL);
2229 }
2230 if (xmlParserDebugEntities)
2231 xmlGenericError(xmlGenericErrorContext,
2232 "new blanks wrapper for entity: %s\n", entity->name);
2233 input = xmlNewInputStream(ctxt);
2234 if (input == NULL) {
2235 return(NULL);
2236 }
2237 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002238 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002239 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002240 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002241 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002242 return(NULL);
2243 }
2244 buffer [0] = ' ';
2245 buffer [1] = '%';
2246 buffer [length-3] = ';';
2247 buffer [length-2] = ' ';
2248 buffer [length-1] = 0;
2249 memcpy(buffer + 2, entity->name, length - 5);
2250 input->free = deallocblankswrapper;
2251 input->base = buffer;
2252 input->cur = buffer;
2253 input->length = length;
2254 input->end = &buffer[length];
2255 return(input);
2256}
2257
2258/**
Owen Taylor3473f882001-02-23 17:55:21 +00002259 * xmlParserHandlePEReference:
2260 * @ctxt: the parser context
2261 *
2262 * [69] PEReference ::= '%' Name ';'
2263 *
2264 * [ WFC: No Recursion ]
2265 * A parsed entity must not contain a recursive
2266 * reference to itself, either directly or indirectly.
2267 *
2268 * [ WFC: Entity Declared ]
2269 * In a document without any DTD, a document with only an internal DTD
2270 * subset which contains no parameter entity references, or a document
2271 * with "standalone='yes'", ... ... The declaration of a parameter
2272 * entity must precede any reference to it...
2273 *
2274 * [ VC: Entity Declared ]
2275 * In a document with an external subset or external parameter entities
2276 * with "standalone='no'", ... ... The declaration of a parameter entity
2277 * must precede any reference to it...
2278 *
2279 * [ WFC: In DTD ]
2280 * Parameter-entity references may only appear in the DTD.
2281 * NOTE: misleading but this is handled.
2282 *
2283 * A PEReference may have been detected in the current input stream
2284 * the handling is done accordingly to
2285 * http://www.w3.org/TR/REC-xml#entproc
2286 * i.e.
2287 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002288 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002289 */
2290void
2291xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002292 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002293 xmlEntityPtr entity = NULL;
2294 xmlParserInputPtr input;
2295
Owen Taylor3473f882001-02-23 17:55:21 +00002296 if (RAW != '%') return;
2297 switch(ctxt->instate) {
2298 case XML_PARSER_CDATA_SECTION:
2299 return;
2300 case XML_PARSER_COMMENT:
2301 return;
2302 case XML_PARSER_START_TAG:
2303 return;
2304 case XML_PARSER_END_TAG:
2305 return;
2306 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002307 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002308 return;
2309 case XML_PARSER_PROLOG:
2310 case XML_PARSER_START:
2311 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002312 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002313 return;
2314 case XML_PARSER_ENTITY_DECL:
2315 case XML_PARSER_CONTENT:
2316 case XML_PARSER_ATTRIBUTE_VALUE:
2317 case XML_PARSER_PI:
2318 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002319 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002320 /* we just ignore it there */
2321 return;
2322 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002323 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002324 return;
2325 case XML_PARSER_ENTITY_VALUE:
2326 /*
2327 * NOTE: in the case of entity values, we don't do the
2328 * substitution here since we need the literal
2329 * entity value to be able to save the internal
2330 * subset of the document.
2331 * This will be handled by xmlStringDecodeEntities
2332 */
2333 return;
2334 case XML_PARSER_DTD:
2335 /*
2336 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2337 * In the internal DTD subset, parameter-entity references
2338 * can occur only where markup declarations can occur, not
2339 * within markup declarations.
2340 * In that case this is handled in xmlParseMarkupDecl
2341 */
2342 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2343 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002344 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002345 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002346 break;
2347 case XML_PARSER_IGNORE:
2348 return;
2349 }
2350
2351 NEXT;
2352 name = xmlParseName(ctxt);
2353 if (xmlParserDebugEntities)
2354 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002355 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002356 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002357 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002358 } else {
2359 if (RAW == ';') {
2360 NEXT;
2361 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2362 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2363 if (entity == NULL) {
2364
2365 /*
2366 * [ WFC: Entity Declared ]
2367 * In a document without any DTD, a document with only an
2368 * internal DTD subset which contains no parameter entity
2369 * references, or a document with "standalone='yes'", ...
2370 * ... The declaration of a parameter entity must precede
2371 * any reference to it...
2372 */
2373 if ((ctxt->standalone == 1) ||
2374 ((ctxt->hasExternalSubset == 0) &&
2375 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002376 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002377 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002378 } else {
2379 /*
2380 * [ VC: Entity Declared ]
2381 * In a document with an external subset or external
2382 * parameter entities with "standalone='no'", ...
2383 * ... The declaration of a parameter entity must precede
2384 * any reference to it...
2385 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002386 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2387 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2388 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002389 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002390 } else
2391 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2392 "PEReference: %%%s; not found\n",
2393 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002394 ctxt->valid = 0;
2395 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002396 } else if (ctxt->input->free != deallocblankswrapper) {
2397 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002398 if (xmlPushInput(ctxt, input) < 0)
2399 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002400 } else {
2401 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2402 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002403 xmlChar start[4];
2404 xmlCharEncoding enc;
2405
Owen Taylor3473f882001-02-23 17:55:21 +00002406 /*
2407 * handle the extra spaces added before and after
2408 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002409 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002410 */
2411 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002412 if (xmlPushInput(ctxt, input) < 0)
2413 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002414
2415 /*
2416 * Get the 4 first bytes and decode the charset
2417 * if enc != XML_CHAR_ENCODING_NONE
2418 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002419 * Note that, since we may have some non-UTF8
2420 * encoding (like UTF16, bug 135229), the 'length'
2421 * is not known, but we can calculate based upon
2422 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002423 */
2424 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002425 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002426 start[0] = RAW;
2427 start[1] = NXT(1);
2428 start[2] = NXT(2);
2429 start[3] = NXT(3);
2430 enc = xmlDetectCharEncoding(start, 4);
2431 if (enc != XML_CHAR_ENCODING_NONE) {
2432 xmlSwitchEncoding(ctxt, enc);
2433 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002434 }
2435
Owen Taylor3473f882001-02-23 17:55:21 +00002436 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002437 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2438 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002439 xmlParseTextDecl(ctxt);
2440 }
Owen Taylor3473f882001-02-23 17:55:21 +00002441 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002442 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2443 "PEReference: %s is not a parameter entity\n",
2444 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002445 }
2446 }
2447 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002448 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002449 }
Owen Taylor3473f882001-02-23 17:55:21 +00002450 }
2451}
2452
2453/*
2454 * Macro used to grow the current buffer.
2455 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002456#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002457 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002458 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002459 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002460 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002461 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002462 if (tmp == NULL) goto mem_error; \
2463 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002464}
2465
2466/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002467 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002468 * @ctxt: the parser context
2469 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002470 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002471 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2472 * @end: an end marker xmlChar, 0 if none
2473 * @end2: an end marker xmlChar, 0 if none
2474 * @end3: an end marker xmlChar, 0 if none
2475 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002476 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002477 *
2478 * [67] Reference ::= EntityRef | CharRef
2479 *
2480 * [69] PEReference ::= '%' Name ';'
2481 *
2482 * Returns A newly allocated string with the substitution done. The caller
2483 * must deallocate it !
2484 */
2485xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002486xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2487 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002488 xmlChar *buffer = NULL;
2489 int buffer_size = 0;
2490
2491 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002492 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002493 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002494 xmlEntityPtr ent;
2495 int c,l;
2496 int nbchars = 0;
2497
Daniel Veillarda82b1822004-11-08 16:24:57 +00002498 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002499 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002500 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002501
Daniel Veillard0161e632008-08-28 15:36:32 +00002502 if (((ctxt->depth > 40) &&
2503 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2504 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002505 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002506 return(NULL);
2507 }
2508
2509 /*
2510 * allocate a translation buffer.
2511 */
2512 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002513 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002514 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002515
2516 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002517 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002518 * we are operating on already parsed values.
2519 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002520 if (str < last)
2521 c = CUR_SCHAR(str, l);
2522 else
2523 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002524 while ((c != 0) && (c != end) && /* non input consuming loop */
2525 (c != end2) && (c != end3)) {
2526
2527 if (c == 0) break;
2528 if ((c == '&') && (str[1] == '#')) {
2529 int val = xmlParseStringCharRef(ctxt, &str);
2530 if (val != 0) {
2531 COPY_BUF(0,buffer,nbchars,val);
2532 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002533 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002534 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002535 }
Owen Taylor3473f882001-02-23 17:55:21 +00002536 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2537 if (xmlParserDebugEntities)
2538 xmlGenericError(xmlGenericErrorContext,
2539 "String decoding Entity Reference: %.30s\n",
2540 str);
2541 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002542 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2543 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002544 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002545 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002546 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002547 if ((ent != NULL) &&
2548 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2549 if (ent->content != NULL) {
2550 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002551 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002552 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002553 }
Owen Taylor3473f882001-02-23 17:55:21 +00002554 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002555 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2556 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002557 }
2558 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002559 ctxt->depth++;
2560 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2561 0, 0, 0);
2562 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002563
Owen Taylor3473f882001-02-23 17:55:21 +00002564 if (rep != NULL) {
2565 current = rep;
2566 while (*current != 0) { /* non input consuming loop */
2567 buffer[nbchars++] = *current++;
2568 if (nbchars >
2569 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002570 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2571 goto int_error;
2572 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002573 }
2574 }
2575 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002576 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002577 }
2578 } else if (ent != NULL) {
2579 int i = xmlStrlen(ent->name);
2580 const xmlChar *cur = ent->name;
2581
2582 buffer[nbchars++] = '&';
2583 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002584 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002585 }
2586 for (;i > 0;i--)
2587 buffer[nbchars++] = *cur++;
2588 buffer[nbchars++] = ';';
2589 }
2590 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2591 if (xmlParserDebugEntities)
2592 xmlGenericError(xmlGenericErrorContext,
2593 "String decoding PE Reference: %.30s\n", str);
2594 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002595 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2596 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002597 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002598 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002599 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002600 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002601 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002602 }
Owen Taylor3473f882001-02-23 17:55:21 +00002603 ctxt->depth++;
2604 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2605 0, 0, 0);
2606 ctxt->depth--;
2607 if (rep != NULL) {
2608 current = rep;
2609 while (*current != 0) { /* non input consuming loop */
2610 buffer[nbchars++] = *current++;
2611 if (nbchars >
2612 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002613 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2614 goto int_error;
2615 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002616 }
2617 }
2618 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002619 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002620 }
2621 }
2622 } else {
2623 COPY_BUF(l,buffer,nbchars,c);
2624 str += l;
2625 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002626 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002627 }
2628 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002629 if (str < last)
2630 c = CUR_SCHAR(str, l);
2631 else
2632 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002633 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002634 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002635 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002636
2637mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002638 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002639int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002640 if (rep != NULL)
2641 xmlFree(rep);
2642 if (buffer != NULL)
2643 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002644 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002645}
2646
Daniel Veillarde57ec792003-09-10 10:50:59 +00002647/**
2648 * xmlStringDecodeEntities:
2649 * @ctxt: the parser context
2650 * @str: the input string
2651 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2652 * @end: an end marker xmlChar, 0 if none
2653 * @end2: an end marker xmlChar, 0 if none
2654 * @end3: an end marker xmlChar, 0 if none
2655 *
2656 * Takes a entity string content and process to do the adequate substitutions.
2657 *
2658 * [67] Reference ::= EntityRef | CharRef
2659 *
2660 * [69] PEReference ::= '%' Name ';'
2661 *
2662 * Returns A newly allocated string with the substitution done. The caller
2663 * must deallocate it !
2664 */
2665xmlChar *
2666xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2667 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002668 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002669 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2670 end, end2, end3));
2671}
Owen Taylor3473f882001-02-23 17:55:21 +00002672
2673/************************************************************************
2674 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002675 * Commodity functions, cleanup needed ? *
2676 * *
2677 ************************************************************************/
2678
2679/**
2680 * areBlanks:
2681 * @ctxt: an XML parser context
2682 * @str: a xmlChar *
2683 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002684 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002685 *
2686 * Is this a sequence of blank chars that one can ignore ?
2687 *
2688 * Returns 1 if ignorable 0 otherwise.
2689 */
2690
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002691static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2692 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002693 int i, ret;
2694 xmlNodePtr lastChild;
2695
Daniel Veillard05c13a22001-09-09 08:38:09 +00002696 /*
2697 * Don't spend time trying to differentiate them, the same callback is
2698 * used !
2699 */
2700 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002701 return(0);
2702
Owen Taylor3473f882001-02-23 17:55:21 +00002703 /*
2704 * Check for xml:space value.
2705 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002706 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2707 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002708 return(0);
2709
2710 /*
2711 * Check that the string is made of blanks
2712 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002713 if (blank_chars == 0) {
2714 for (i = 0;i < len;i++)
2715 if (!(IS_BLANK_CH(str[i]))) return(0);
2716 }
Owen Taylor3473f882001-02-23 17:55:21 +00002717
2718 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002719 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002720 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002721 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002722 if (ctxt->myDoc != NULL) {
2723 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2724 if (ret == 0) return(1);
2725 if (ret == 1) return(0);
2726 }
2727
2728 /*
2729 * Otherwise, heuristic :-\
2730 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002731 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002732 if ((ctxt->node->children == NULL) &&
2733 (RAW == '<') && (NXT(1) == '/')) return(0);
2734
2735 lastChild = xmlGetLastChild(ctxt->node);
2736 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002737 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2738 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002739 } else if (xmlNodeIsText(lastChild))
2740 return(0);
2741 else if ((ctxt->node->children != NULL) &&
2742 (xmlNodeIsText(ctxt->node->children)))
2743 return(0);
2744 return(1);
2745}
2746
Owen Taylor3473f882001-02-23 17:55:21 +00002747/************************************************************************
2748 * *
2749 * Extra stuff for namespace support *
2750 * Relates to http://www.w3.org/TR/WD-xml-names *
2751 * *
2752 ************************************************************************/
2753
2754/**
2755 * xmlSplitQName:
2756 * @ctxt: an XML parser context
2757 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002758 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002759 *
2760 * parse an UTF8 encoded XML qualified name string
2761 *
2762 * [NS 5] QName ::= (Prefix ':')? LocalPart
2763 *
2764 * [NS 6] Prefix ::= NCName
2765 *
2766 * [NS 7] LocalPart ::= NCName
2767 *
2768 * Returns the local part, and prefix is updated
2769 * to get the Prefix if any.
2770 */
2771
2772xmlChar *
2773xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2774 xmlChar buf[XML_MAX_NAMELEN + 5];
2775 xmlChar *buffer = NULL;
2776 int len = 0;
2777 int max = XML_MAX_NAMELEN;
2778 xmlChar *ret = NULL;
2779 const xmlChar *cur = name;
2780 int c;
2781
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002782 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002783 *prefix = NULL;
2784
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002785 if (cur == NULL) return(NULL);
2786
Owen Taylor3473f882001-02-23 17:55:21 +00002787#ifndef XML_XML_NAMESPACE
2788 /* xml: prefix is not really a namespace */
2789 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2790 (cur[2] == 'l') && (cur[3] == ':'))
2791 return(xmlStrdup(name));
2792#endif
2793
Daniel Veillard597bc482003-07-24 16:08:28 +00002794 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002795 if (cur[0] == ':')
2796 return(xmlStrdup(name));
2797
2798 c = *cur++;
2799 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2800 buf[len++] = c;
2801 c = *cur++;
2802 }
2803 if (len >= max) {
2804 /*
2805 * Okay someone managed to make a huge name, so he's ready to pay
2806 * for the processing speed.
2807 */
2808 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002809
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002810 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002811 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002812 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002813 return(NULL);
2814 }
2815 memcpy(buffer, buf, len);
2816 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2817 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002818 xmlChar *tmp;
2819
Owen Taylor3473f882001-02-23 17:55:21 +00002820 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002821 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002822 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002823 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002824 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002825 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002826 return(NULL);
2827 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002828 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002829 }
2830 buffer[len++] = c;
2831 c = *cur++;
2832 }
2833 buffer[len] = 0;
2834 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002835
Daniel Veillard597bc482003-07-24 16:08:28 +00002836 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002837 if (buffer != NULL)
2838 xmlFree(buffer);
2839 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002840 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002841 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002842
Owen Taylor3473f882001-02-23 17:55:21 +00002843 if (buffer == NULL)
2844 ret = xmlStrndup(buf, len);
2845 else {
2846 ret = buffer;
2847 buffer = NULL;
2848 max = XML_MAX_NAMELEN;
2849 }
2850
2851
2852 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002853 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002854 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002855 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002856 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002857 }
Owen Taylor3473f882001-02-23 17:55:21 +00002858 len = 0;
2859
Daniel Veillardbb284f42002-10-16 18:02:47 +00002860 /*
2861 * Check that the first character is proper to start
2862 * a new name
2863 */
2864 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2865 ((c >= 0x41) && (c <= 0x5A)) ||
2866 (c == '_') || (c == ':'))) {
2867 int l;
2868 int first = CUR_SCHAR(cur, l);
2869
2870 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002871 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002872 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002873 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002874 }
2875 }
2876 cur++;
2877
Owen Taylor3473f882001-02-23 17:55:21 +00002878 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2879 buf[len++] = c;
2880 c = *cur++;
2881 }
2882 if (len >= max) {
2883 /*
2884 * Okay someone managed to make a huge name, so he's ready to pay
2885 * for the processing speed.
2886 */
2887 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002888
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002889 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002890 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002891 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002892 return(NULL);
2893 }
2894 memcpy(buffer, buf, len);
2895 while (c != 0) { /* tested bigname2.xml */
2896 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002897 xmlChar *tmp;
2898
Owen Taylor3473f882001-02-23 17:55:21 +00002899 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002900 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002901 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002902 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002903 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002904 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002905 return(NULL);
2906 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002907 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002908 }
2909 buffer[len++] = c;
2910 c = *cur++;
2911 }
2912 buffer[len] = 0;
2913 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002914
Owen Taylor3473f882001-02-23 17:55:21 +00002915 if (buffer == NULL)
2916 ret = xmlStrndup(buf, len);
2917 else {
2918 ret = buffer;
2919 }
2920 }
2921
2922 return(ret);
2923}
2924
2925/************************************************************************
2926 * *
2927 * The parser itself *
2928 * Relates to http://www.w3.org/TR/REC-xml *
2929 * *
2930 ************************************************************************/
2931
Daniel Veillard34e3f642008-07-29 09:02:27 +00002932/************************************************************************
2933 * *
2934 * Routines to parse Name, NCName and NmToken *
2935 * *
2936 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00002937#ifdef DEBUG
2938static unsigned long nbParseName = 0;
2939static unsigned long nbParseNmToken = 0;
2940static unsigned long nbParseNCName = 0;
2941static unsigned long nbParseNCNameComplex = 0;
2942static unsigned long nbParseNameComplex = 0;
2943static unsigned long nbParseStringName = 0;
2944#endif
2945
Daniel Veillard34e3f642008-07-29 09:02:27 +00002946/*
2947 * The two following functions are related to the change of accepted
2948 * characters for Name and NmToken in the Revision 5 of XML-1.0
2949 * They correspond to the modified production [4] and the new production [4a]
2950 * changes in that revision. Also note that the macros used for the
2951 * productions Letter, Digit, CombiningChar and Extender are not needed
2952 * anymore.
2953 * We still keep compatibility to pre-revision5 parsing semantic if the
2954 * new XML_PARSE_OLD10 option is given to the parser.
2955 */
2956static int
2957xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2958 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2959 /*
2960 * Use the new checks of production [4] [4a] amd [5] of the
2961 * Update 5 of XML-1.0
2962 */
2963 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2964 (((c >= 'a') && (c <= 'z')) ||
2965 ((c >= 'A') && (c <= 'Z')) ||
2966 (c == '_') || (c == ':') ||
2967 ((c >= 0xC0) && (c <= 0xD6)) ||
2968 ((c >= 0xD8) && (c <= 0xF6)) ||
2969 ((c >= 0xF8) && (c <= 0x2FF)) ||
2970 ((c >= 0x370) && (c <= 0x37D)) ||
2971 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2972 ((c >= 0x200C) && (c <= 0x200D)) ||
2973 ((c >= 0x2070) && (c <= 0x218F)) ||
2974 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2975 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2976 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2977 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2978 ((c >= 0x10000) && (c <= 0xEFFFF))))
2979 return(1);
2980 } else {
2981 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2982 return(1);
2983 }
2984 return(0);
2985}
2986
2987static int
2988xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2989 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2990 /*
2991 * Use the new checks of production [4] [4a] amd [5] of the
2992 * Update 5 of XML-1.0
2993 */
2994 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2995 (((c >= 'a') && (c <= 'z')) ||
2996 ((c >= 'A') && (c <= 'Z')) ||
2997 ((c >= '0') && (c <= '9')) || /* !start */
2998 (c == '_') || (c == ':') ||
2999 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3000 ((c >= 0xC0) && (c <= 0xD6)) ||
3001 ((c >= 0xD8) && (c <= 0xF6)) ||
3002 ((c >= 0xF8) && (c <= 0x2FF)) ||
3003 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3004 ((c >= 0x370) && (c <= 0x37D)) ||
3005 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3006 ((c >= 0x200C) && (c <= 0x200D)) ||
3007 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3008 ((c >= 0x2070) && (c <= 0x218F)) ||
3009 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3010 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3011 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3012 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3013 ((c >= 0x10000) && (c <= 0xEFFFF))))
3014 return(1);
3015 } else {
3016 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3017 (c == '.') || (c == '-') ||
3018 (c == '_') || (c == ':') ||
3019 (IS_COMBINING(c)) ||
3020 (IS_EXTENDER(c)))
3021 return(1);
3022 }
3023 return(0);
3024}
3025
Daniel Veillarde57ec792003-09-10 10:50:59 +00003026static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003027 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003028
Daniel Veillard34e3f642008-07-29 09:02:27 +00003029static const xmlChar *
3030xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3031 int len = 0, l;
3032 int c;
3033 int count = 0;
3034
Daniel Veillardc6561462009-03-25 10:22:31 +00003035#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003036 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003037#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003038
3039 /*
3040 * Handler for more complex cases
3041 */
3042 GROW;
3043 c = CUR_CHAR(l);
3044 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3045 /*
3046 * Use the new checks of production [4] [4a] amd [5] of the
3047 * Update 5 of XML-1.0
3048 */
3049 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3050 (!(((c >= 'a') && (c <= 'z')) ||
3051 ((c >= 'A') && (c <= 'Z')) ||
3052 (c == '_') || (c == ':') ||
3053 ((c >= 0xC0) && (c <= 0xD6)) ||
3054 ((c >= 0xD8) && (c <= 0xF6)) ||
3055 ((c >= 0xF8) && (c <= 0x2FF)) ||
3056 ((c >= 0x370) && (c <= 0x37D)) ||
3057 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3058 ((c >= 0x200C) && (c <= 0x200D)) ||
3059 ((c >= 0x2070) && (c <= 0x218F)) ||
3060 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3061 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3062 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3063 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3064 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3065 return(NULL);
3066 }
3067 len += l;
3068 NEXTL(l);
3069 c = CUR_CHAR(l);
3070 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3071 (((c >= 'a') && (c <= 'z')) ||
3072 ((c >= 'A') && (c <= 'Z')) ||
3073 ((c >= '0') && (c <= '9')) || /* !start */
3074 (c == '_') || (c == ':') ||
3075 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3076 ((c >= 0xC0) && (c <= 0xD6)) ||
3077 ((c >= 0xD8) && (c <= 0xF6)) ||
3078 ((c >= 0xF8) && (c <= 0x2FF)) ||
3079 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3080 ((c >= 0x370) && (c <= 0x37D)) ||
3081 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3082 ((c >= 0x200C) && (c <= 0x200D)) ||
3083 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3084 ((c >= 0x2070) && (c <= 0x218F)) ||
3085 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3086 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3087 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3088 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3089 ((c >= 0x10000) && (c <= 0xEFFFF))
3090 )) {
3091 if (count++ > 100) {
3092 count = 0;
3093 GROW;
3094 }
3095 len += l;
3096 NEXTL(l);
3097 c = CUR_CHAR(l);
3098 }
3099 } else {
3100 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3101 (!IS_LETTER(c) && (c != '_') &&
3102 (c != ':'))) {
3103 return(NULL);
3104 }
3105 len += l;
3106 NEXTL(l);
3107 c = CUR_CHAR(l);
3108
3109 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3110 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3111 (c == '.') || (c == '-') ||
3112 (c == '_') || (c == ':') ||
3113 (IS_COMBINING(c)) ||
3114 (IS_EXTENDER(c)))) {
3115 if (count++ > 100) {
3116 count = 0;
3117 GROW;
3118 }
3119 len += l;
3120 NEXTL(l);
3121 c = CUR_CHAR(l);
3122 }
3123 }
3124 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3125 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3126 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3127}
3128
Owen Taylor3473f882001-02-23 17:55:21 +00003129/**
3130 * xmlParseName:
3131 * @ctxt: an XML parser context
3132 *
3133 * parse an XML name.
3134 *
3135 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3136 * CombiningChar | Extender
3137 *
3138 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3139 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003140 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003141 *
3142 * Returns the Name parsed or NULL
3143 */
3144
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003145const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003146xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003147 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003148 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003149 int count = 0;
3150
3151 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003152
Daniel Veillardc6561462009-03-25 10:22:31 +00003153#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003154 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003155#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003156
Daniel Veillard48b2f892001-02-25 16:11:03 +00003157 /*
3158 * Accelerator for simple ASCII names
3159 */
3160 in = ctxt->input->cur;
3161 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3162 ((*in >= 0x41) && (*in <= 0x5A)) ||
3163 (*in == '_') || (*in == ':')) {
3164 in++;
3165 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3166 ((*in >= 0x41) && (*in <= 0x5A)) ||
3167 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003168 (*in == '_') || (*in == '-') ||
3169 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003170 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003171 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003172 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003173 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003174 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003175 ctxt->nbChars += count;
3176 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003177 if (ret == NULL)
3178 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003179 return(ret);
3180 }
3181 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003182 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003183 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003184}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003185
Daniel Veillard34e3f642008-07-29 09:02:27 +00003186static const xmlChar *
3187xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3188 int len = 0, l;
3189 int c;
3190 int count = 0;
3191
Daniel Veillardc6561462009-03-25 10:22:31 +00003192#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003193 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003194#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003195
3196 /*
3197 * Handler for more complex cases
3198 */
3199 GROW;
3200 c = CUR_CHAR(l);
3201 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3202 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3203 return(NULL);
3204 }
3205
3206 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3207 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3208 if (count++ > 100) {
3209 count = 0;
3210 GROW;
3211 }
3212 len += l;
3213 NEXTL(l);
3214 c = CUR_CHAR(l);
3215 }
3216 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3217}
3218
3219/**
3220 * xmlParseNCName:
3221 * @ctxt: an XML parser context
3222 * @len: lenght of the string parsed
3223 *
3224 * parse an XML name.
3225 *
3226 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3227 * CombiningChar | Extender
3228 *
3229 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3230 *
3231 * Returns the Name parsed or NULL
3232 */
3233
3234static const xmlChar *
3235xmlParseNCName(xmlParserCtxtPtr ctxt) {
3236 const xmlChar *in;
3237 const xmlChar *ret;
3238 int count = 0;
3239
Daniel Veillardc6561462009-03-25 10:22:31 +00003240#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003241 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003242#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003243
3244 /*
3245 * Accelerator for simple ASCII names
3246 */
3247 in = ctxt->input->cur;
3248 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3249 ((*in >= 0x41) && (*in <= 0x5A)) ||
3250 (*in == '_')) {
3251 in++;
3252 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3253 ((*in >= 0x41) && (*in <= 0x5A)) ||
3254 ((*in >= 0x30) && (*in <= 0x39)) ||
3255 (*in == '_') || (*in == '-') ||
3256 (*in == '.'))
3257 in++;
3258 if ((*in > 0) && (*in < 0x80)) {
3259 count = in - ctxt->input->cur;
3260 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3261 ctxt->input->cur = in;
3262 ctxt->nbChars += count;
3263 ctxt->input->col += count;
3264 if (ret == NULL) {
3265 xmlErrMemory(ctxt, NULL);
3266 }
3267 return(ret);
3268 }
3269 }
3270 return(xmlParseNCNameComplex(ctxt));
3271}
3272
Daniel Veillard46de64e2002-05-29 08:21:33 +00003273/**
3274 * xmlParseNameAndCompare:
3275 * @ctxt: an XML parser context
3276 *
3277 * parse an XML name and compares for match
3278 * (specialized for endtag parsing)
3279 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003280 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3281 * and the name for mismatch
3282 */
3283
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003284static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003285xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003286 register const xmlChar *cmp = other;
3287 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003288 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003289
3290 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003291
Daniel Veillard46de64e2002-05-29 08:21:33 +00003292 in = ctxt->input->cur;
3293 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003294 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003295 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003296 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003297 }
William M. Brack76e95df2003-10-18 16:20:14 +00003298 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003299 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003300 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003301 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003302 }
3303 /* failure (or end of input buffer), check with full function */
3304 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003305 /* strings coming from the dictionnary direct compare possible */
3306 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003307 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003308 }
3309 return ret;
3310}
3311
Owen Taylor3473f882001-02-23 17:55:21 +00003312/**
3313 * xmlParseStringName:
3314 * @ctxt: an XML parser context
3315 * @str: a pointer to the string pointer (IN/OUT)
3316 *
3317 * parse an XML name.
3318 *
3319 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3320 * CombiningChar | Extender
3321 *
3322 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3323 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003324 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003325 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003326 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003327 * is updated to the current location in the string.
3328 */
3329
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003330static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003331xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3332 xmlChar buf[XML_MAX_NAMELEN + 5];
3333 const xmlChar *cur = *str;
3334 int len = 0, l;
3335 int c;
3336
Daniel Veillardc6561462009-03-25 10:22:31 +00003337#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003338 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003339#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003340
Owen Taylor3473f882001-02-23 17:55:21 +00003341 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003342 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003343 return(NULL);
3344 }
3345
Daniel Veillard34e3f642008-07-29 09:02:27 +00003346 COPY_BUF(l,buf,len,c);
3347 cur += l;
3348 c = CUR_SCHAR(cur, l);
3349 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003350 COPY_BUF(l,buf,len,c);
3351 cur += l;
3352 c = CUR_SCHAR(cur, l);
3353 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3354 /*
3355 * Okay someone managed to make a huge name, so he's ready to pay
3356 * for the processing speed.
3357 */
3358 xmlChar *buffer;
3359 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003360
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003361 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003362 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003363 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003364 return(NULL);
3365 }
3366 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003367 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003368 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003369 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003370 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003371 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003372 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003373 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003374 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003375 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003376 return(NULL);
3377 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003378 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 }
3380 COPY_BUF(l,buffer,len,c);
3381 cur += l;
3382 c = CUR_SCHAR(cur, l);
3383 }
3384 buffer[len] = 0;
3385 *str = cur;
3386 return(buffer);
3387 }
3388 }
3389 *str = cur;
3390 return(xmlStrndup(buf, len));
3391}
3392
3393/**
3394 * xmlParseNmtoken:
3395 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003396 *
Owen Taylor3473f882001-02-23 17:55:21 +00003397 * parse an XML Nmtoken.
3398 *
3399 * [7] Nmtoken ::= (NameChar)+
3400 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003401 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003402 *
3403 * Returns the Nmtoken parsed or NULL
3404 */
3405
3406xmlChar *
3407xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3408 xmlChar buf[XML_MAX_NAMELEN + 5];
3409 int len = 0, l;
3410 int c;
3411 int count = 0;
3412
Daniel Veillardc6561462009-03-25 10:22:31 +00003413#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003414 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003415#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003416
Owen Taylor3473f882001-02-23 17:55:21 +00003417 GROW;
3418 c = CUR_CHAR(l);
3419
Daniel Veillard34e3f642008-07-29 09:02:27 +00003420 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003421 if (count++ > 100) {
3422 count = 0;
3423 GROW;
3424 }
3425 COPY_BUF(l,buf,len,c);
3426 NEXTL(l);
3427 c = CUR_CHAR(l);
3428 if (len >= XML_MAX_NAMELEN) {
3429 /*
3430 * Okay someone managed to make a huge token, so he's ready to pay
3431 * for the processing speed.
3432 */
3433 xmlChar *buffer;
3434 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003435
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003436 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003437 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003439 return(NULL);
3440 }
3441 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003442 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003443 if (count++ > 100) {
3444 count = 0;
3445 GROW;
3446 }
3447 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003448 xmlChar *tmp;
3449
Owen Taylor3473f882001-02-23 17:55:21 +00003450 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003451 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003452 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003453 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003454 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003455 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003456 return(NULL);
3457 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003458 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003459 }
3460 COPY_BUF(l,buffer,len,c);
3461 NEXTL(l);
3462 c = CUR_CHAR(l);
3463 }
3464 buffer[len] = 0;
3465 return(buffer);
3466 }
3467 }
3468 if (len == 0)
3469 return(NULL);
3470 return(xmlStrndup(buf, len));
3471}
3472
3473/**
3474 * xmlParseEntityValue:
3475 * @ctxt: an XML parser context
3476 * @orig: if non-NULL store a copy of the original entity value
3477 *
3478 * parse a value for ENTITY declarations
3479 *
3480 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3481 * "'" ([^%&'] | PEReference | Reference)* "'"
3482 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003483 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003484 */
3485
3486xmlChar *
3487xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3488 xmlChar *buf = NULL;
3489 int len = 0;
3490 int size = XML_PARSER_BUFFER_SIZE;
3491 int c, l;
3492 xmlChar stop;
3493 xmlChar *ret = NULL;
3494 const xmlChar *cur = NULL;
3495 xmlParserInputPtr input;
3496
3497 if (RAW == '"') stop = '"';
3498 else if (RAW == '\'') stop = '\'';
3499 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003500 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003501 return(NULL);
3502 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003503 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003504 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003505 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003506 return(NULL);
3507 }
3508
3509 /*
3510 * The content of the entity definition is copied in a buffer.
3511 */
3512
3513 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3514 input = ctxt->input;
3515 GROW;
3516 NEXT;
3517 c = CUR_CHAR(l);
3518 /*
3519 * NOTE: 4.4.5 Included in Literal
3520 * When a parameter entity reference appears in a literal entity
3521 * value, ... a single or double quote character in the replacement
3522 * text is always treated as a normal data character and will not
3523 * terminate the literal.
3524 * In practice it means we stop the loop only when back at parsing
3525 * the initial entity and the quote is found
3526 */
William M. Brack871611b2003-10-18 04:53:14 +00003527 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003528 (ctxt->input != input))) {
3529 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003530 xmlChar *tmp;
3531
Owen Taylor3473f882001-02-23 17:55:21 +00003532 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003533 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3534 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003535 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003536 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003537 return(NULL);
3538 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003539 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003540 }
3541 COPY_BUF(l,buf,len,c);
3542 NEXTL(l);
3543 /*
3544 * Pop-up of finished entities.
3545 */
3546 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3547 xmlPopInput(ctxt);
3548
3549 GROW;
3550 c = CUR_CHAR(l);
3551 if (c == 0) {
3552 GROW;
3553 c = CUR_CHAR(l);
3554 }
3555 }
3556 buf[len] = 0;
3557
3558 /*
3559 * Raise problem w.r.t. '&' and '%' being used in non-entities
3560 * reference constructs. Note Charref will be handled in
3561 * xmlStringDecodeEntities()
3562 */
3563 cur = buf;
3564 while (*cur != 0) { /* non input consuming */
3565 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3566 xmlChar *name;
3567 xmlChar tmp = *cur;
3568
3569 cur++;
3570 name = xmlParseStringName(ctxt, &cur);
3571 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003572 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003573 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003574 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003575 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003576 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3577 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003578 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003579 }
3580 if (name != NULL)
3581 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003582 if (*cur == 0)
3583 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003584 }
3585 cur++;
3586 }
3587
3588 /*
3589 * Then PEReference entities are substituted.
3590 */
3591 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003592 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003593 xmlFree(buf);
3594 } else {
3595 NEXT;
3596 /*
3597 * NOTE: 4.4.7 Bypassed
3598 * When a general entity reference appears in the EntityValue in
3599 * an entity declaration, it is bypassed and left as is.
3600 * so XML_SUBSTITUTE_REF is not set here.
3601 */
3602 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3603 0, 0, 0);
3604 if (orig != NULL)
3605 *orig = buf;
3606 else
3607 xmlFree(buf);
3608 }
3609
3610 return(ret);
3611}
3612
3613/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003614 * xmlParseAttValueComplex:
3615 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003616 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003617 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003618 *
3619 * parse a value for an attribute, this is the fallback function
3620 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003621 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003622 *
3623 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3624 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003625static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003626xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003627 xmlChar limit = 0;
3628 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003629 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003630 int len = 0;
3631 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003632 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003633 xmlChar *current = NULL;
3634 xmlEntityPtr ent;
3635
Owen Taylor3473f882001-02-23 17:55:21 +00003636 if (NXT(0) == '"') {
3637 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3638 limit = '"';
3639 NEXT;
3640 } else if (NXT(0) == '\'') {
3641 limit = '\'';
3642 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3643 NEXT;
3644 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003645 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003646 return(NULL);
3647 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003648
Owen Taylor3473f882001-02-23 17:55:21 +00003649 /*
3650 * allocate a translation buffer.
3651 */
3652 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003653 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003654 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003655
3656 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003657 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003658 */
3659 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003660 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003661 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003662 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003663 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003664 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003665 if (NXT(1) == '#') {
3666 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003667
Owen Taylor3473f882001-02-23 17:55:21 +00003668 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003669 if (ctxt->replaceEntities) {
3670 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003671 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003672 }
3673 buf[len++] = '&';
3674 } else {
3675 /*
3676 * The reparsing will be done in xmlStringGetNodeList()
3677 * called by the attribute() function in SAX.c
3678 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003679 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003680 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003681 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003682 buf[len++] = '&';
3683 buf[len++] = '#';
3684 buf[len++] = '3';
3685 buf[len++] = '8';
3686 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003687 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003688 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003689 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003690 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003691 }
Owen Taylor3473f882001-02-23 17:55:21 +00003692 len += xmlCopyChar(0, &buf[len], val);
3693 }
3694 } else {
3695 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003696 ctxt->nbentities++;
3697 if (ent != NULL)
3698 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003699 if ((ent != NULL) &&
3700 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3701 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003702 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003703 }
3704 if ((ctxt->replaceEntities == 0) &&
3705 (ent->content[0] == '&')) {
3706 buf[len++] = '&';
3707 buf[len++] = '#';
3708 buf[len++] = '3';
3709 buf[len++] = '8';
3710 buf[len++] = ';';
3711 } else {
3712 buf[len++] = ent->content[0];
3713 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003714 } else if ((ent != NULL) &&
3715 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003716 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3717 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003718 XML_SUBSTITUTE_REF,
3719 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003720 if (rep != NULL) {
3721 current = rep;
3722 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003723 if ((*current == 0xD) || (*current == 0xA) ||
3724 (*current == 0x9)) {
3725 buf[len++] = 0x20;
3726 current++;
3727 } else
3728 buf[len++] = *current++;
Owen Taylor3473f882001-02-23 17:55:21 +00003729 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003730 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003731 }
3732 }
3733 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003734 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003735 }
3736 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003737 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003738 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003739 }
Owen Taylor3473f882001-02-23 17:55:21 +00003740 if (ent->content != NULL)
3741 buf[len++] = ent->content[0];
3742 }
3743 } else if (ent != NULL) {
3744 int i = xmlStrlen(ent->name);
3745 const xmlChar *cur = ent->name;
3746
3747 /*
3748 * This may look absurd but is needed to detect
3749 * entities problems
3750 */
3751 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3752 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003753 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003754 XML_SUBSTITUTE_REF, 0, 0, 0);
3755 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003756 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003757 rep = NULL;
3758 }
Owen Taylor3473f882001-02-23 17:55:21 +00003759 }
3760
3761 /*
3762 * Just output the reference
3763 */
3764 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003765 while (len > buf_size - i - 10) {
3766 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003767 }
3768 for (;i > 0;i--)
3769 buf[len++] = *cur++;
3770 buf[len++] = ';';
3771 }
3772 }
3773 } else {
3774 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003775 if ((len != 0) || (!normalize)) {
3776 if ((!normalize) || (!in_space)) {
3777 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003778 while (len > buf_size - 10) {
3779 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003780 }
3781 }
3782 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003783 }
3784 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003785 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003786 COPY_BUF(l,buf,len,c);
3787 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003788 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003789 }
3790 }
3791 NEXTL(l);
3792 }
3793 GROW;
3794 c = CUR_CHAR(l);
3795 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003796 if ((in_space) && (normalize)) {
3797 while (buf[len - 1] == 0x20) len--;
3798 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003799 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003800 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003801 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003802 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003803 if ((c != 0) && (!IS_CHAR(c))) {
3804 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3805 "invalid character in attribute value\n");
3806 } else {
3807 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3808 "AttValue: ' expected\n");
3809 }
Owen Taylor3473f882001-02-23 17:55:21 +00003810 } else
3811 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003812 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003813 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003814
3815mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003816 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003817 if (buf != NULL)
3818 xmlFree(buf);
3819 if (rep != NULL)
3820 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003821 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003822}
3823
3824/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003825 * xmlParseAttValue:
3826 * @ctxt: an XML parser context
3827 *
3828 * parse a value for an attribute
3829 * Note: the parser won't do substitution of entities here, this
3830 * will be handled later in xmlStringGetNodeList
3831 *
3832 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3833 * "'" ([^<&'] | Reference)* "'"
3834 *
3835 * 3.3.3 Attribute-Value Normalization:
3836 * Before the value of an attribute is passed to the application or
3837 * checked for validity, the XML processor must normalize it as follows:
3838 * - a character reference is processed by appending the referenced
3839 * character to the attribute value
3840 * - an entity reference is processed by recursively processing the
3841 * replacement text of the entity
3842 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3843 * appending #x20 to the normalized value, except that only a single
3844 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3845 * parsed entity or the literal entity value of an internal parsed entity
3846 * - other characters are processed by appending them to the normalized value
3847 * If the declared value is not CDATA, then the XML processor must further
3848 * process the normalized attribute value by discarding any leading and
3849 * trailing space (#x20) characters, and by replacing sequences of space
3850 * (#x20) characters by a single space (#x20) character.
3851 * All attributes for which no declaration has been read should be treated
3852 * by a non-validating parser as if declared CDATA.
3853 *
3854 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3855 */
3856
3857
3858xmlChar *
3859xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003860 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003861 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003862}
3863
3864/**
Owen Taylor3473f882001-02-23 17:55:21 +00003865 * xmlParseSystemLiteral:
3866 * @ctxt: an XML parser context
3867 *
3868 * parse an XML Literal
3869 *
3870 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3871 *
3872 * Returns the SystemLiteral parsed or NULL
3873 */
3874
3875xmlChar *
3876xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3877 xmlChar *buf = NULL;
3878 int len = 0;
3879 int size = XML_PARSER_BUFFER_SIZE;
3880 int cur, l;
3881 xmlChar stop;
3882 int state = ctxt->instate;
3883 int count = 0;
3884
3885 SHRINK;
3886 if (RAW == '"') {
3887 NEXT;
3888 stop = '"';
3889 } else if (RAW == '\'') {
3890 NEXT;
3891 stop = '\'';
3892 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003893 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003894 return(NULL);
3895 }
3896
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003897 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003898 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003899 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003900 return(NULL);
3901 }
3902 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3903 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003904 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003905 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003906 xmlChar *tmp;
3907
Owen Taylor3473f882001-02-23 17:55:21 +00003908 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003909 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3910 if (tmp == NULL) {
3911 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003912 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003913 ctxt->instate = (xmlParserInputState) state;
3914 return(NULL);
3915 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003916 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003917 }
3918 count++;
3919 if (count > 50) {
3920 GROW;
3921 count = 0;
3922 }
3923 COPY_BUF(l,buf,len,cur);
3924 NEXTL(l);
3925 cur = CUR_CHAR(l);
3926 if (cur == 0) {
3927 GROW;
3928 SHRINK;
3929 cur = CUR_CHAR(l);
3930 }
3931 }
3932 buf[len] = 0;
3933 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003934 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003935 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003936 } else {
3937 NEXT;
3938 }
3939 return(buf);
3940}
3941
3942/**
3943 * xmlParsePubidLiteral:
3944 * @ctxt: an XML parser context
3945 *
3946 * parse an XML public literal
3947 *
3948 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3949 *
3950 * Returns the PubidLiteral parsed or NULL.
3951 */
3952
3953xmlChar *
3954xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3955 xmlChar *buf = NULL;
3956 int len = 0;
3957 int size = XML_PARSER_BUFFER_SIZE;
3958 xmlChar cur;
3959 xmlChar stop;
3960 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003961 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003962
3963 SHRINK;
3964 if (RAW == '"') {
3965 NEXT;
3966 stop = '"';
3967 } else if (RAW == '\'') {
3968 NEXT;
3969 stop = '\'';
3970 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003971 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003972 return(NULL);
3973 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003974 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003975 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003976 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003977 return(NULL);
3978 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003979 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003980 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003981 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003982 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003983 xmlChar *tmp;
3984
Owen Taylor3473f882001-02-23 17:55:21 +00003985 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003986 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3987 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003988 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003989 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003990 return(NULL);
3991 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003992 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003993 }
3994 buf[len++] = cur;
3995 count++;
3996 if (count > 50) {
3997 GROW;
3998 count = 0;
3999 }
4000 NEXT;
4001 cur = CUR;
4002 if (cur == 0) {
4003 GROW;
4004 SHRINK;
4005 cur = CUR;
4006 }
4007 }
4008 buf[len] = 0;
4009 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004010 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004011 } else {
4012 NEXT;
4013 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004014 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004015 return(buf);
4016}
4017
Daniel Veillard8ed10722009-08-20 19:17:36 +02004018static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004019
4020/*
4021 * used for the test in the inner loop of the char data testing
4022 */
4023static const unsigned char test_char_data[256] = {
4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4025 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4028 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4029 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4030 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4031 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4032 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4033 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4034 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4035 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4036 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4037 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4038 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4039 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4040 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4041 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4042 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4043 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4044 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4045 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4046 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4047 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4048 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4049 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4050 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4051 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4052 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4053 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4054 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4055 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4056};
4057
Owen Taylor3473f882001-02-23 17:55:21 +00004058/**
4059 * xmlParseCharData:
4060 * @ctxt: an XML parser context
4061 * @cdata: int indicating whether we are within a CDATA section
4062 *
4063 * parse a CharData section.
4064 * if we are within a CDATA section ']]>' marks an end of section.
4065 *
4066 * The right angle bracket (>) may be represented using the string "&gt;",
4067 * and must, for compatibility, be escaped using "&gt;" or a character
4068 * reference when it appears in the string "]]>" in content, when that
4069 * string is not marking the end of a CDATA section.
4070 *
4071 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4072 */
4073
4074void
4075xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004076 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004077 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004078 int line = ctxt->input->line;
4079 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004080 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004081
4082 SHRINK;
4083 GROW;
4084 /*
4085 * Accelerated common case where input don't need to be
4086 * modified before passing it to the handler.
4087 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004088 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004089 in = ctxt->input->cur;
4090 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004091get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004092 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004093 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004094 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004095 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004096 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004097 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004098 goto get_more_space;
4099 }
4100 if (*in == '<') {
4101 nbchar = in - ctxt->input->cur;
4102 if (nbchar > 0) {
4103 const xmlChar *tmp = ctxt->input->cur;
4104 ctxt->input->cur = in;
4105
Daniel Veillard34099b42004-11-04 17:34:35 +00004106 if ((ctxt->sax != NULL) &&
4107 (ctxt->sax->ignorableWhitespace !=
4108 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004109 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004110 if (ctxt->sax->ignorableWhitespace != NULL)
4111 ctxt->sax->ignorableWhitespace(ctxt->userData,
4112 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004113 } else {
4114 if (ctxt->sax->characters != NULL)
4115 ctxt->sax->characters(ctxt->userData,
4116 tmp, nbchar);
4117 if (*ctxt->space == -1)
4118 *ctxt->space = -2;
4119 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004120 } else if ((ctxt->sax != NULL) &&
4121 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004122 ctxt->sax->characters(ctxt->userData,
4123 tmp, nbchar);
4124 }
4125 }
4126 return;
4127 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004128
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004129get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004130 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004131 while (test_char_data[*in]) {
4132 in++;
4133 ccol++;
4134 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004135 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004136 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004137 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004138 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004139 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004140 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004141 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004142 }
4143 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004144 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004145 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004146 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004147 return;
4148 }
4149 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004150 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004151 goto get_more;
4152 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004153 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004154 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004155 if ((ctxt->sax != NULL) &&
4156 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004157 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004158 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004159 const xmlChar *tmp = ctxt->input->cur;
4160 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004161
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004162 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004163 if (ctxt->sax->ignorableWhitespace != NULL)
4164 ctxt->sax->ignorableWhitespace(ctxt->userData,
4165 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004166 } else {
4167 if (ctxt->sax->characters != NULL)
4168 ctxt->sax->characters(ctxt->userData,
4169 tmp, nbchar);
4170 if (*ctxt->space == -1)
4171 *ctxt->space = -2;
4172 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004173 line = ctxt->input->line;
4174 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004175 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004176 if (ctxt->sax->characters != NULL)
4177 ctxt->sax->characters(ctxt->userData,
4178 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004179 line = ctxt->input->line;
4180 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004181 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004182 /* something really bad happened in the SAX callback */
4183 if (ctxt->instate != XML_PARSER_CONTENT)
4184 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004185 }
4186 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004187 if (*in == 0xD) {
4188 in++;
4189 if (*in == 0xA) {
4190 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004191 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004192 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004193 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004194 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004195 in--;
4196 }
4197 if (*in == '<') {
4198 return;
4199 }
4200 if (*in == '&') {
4201 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004202 }
4203 SHRINK;
4204 GROW;
4205 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004206 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004207 nbchar = 0;
4208 }
Daniel Veillard50582112001-03-26 22:52:16 +00004209 ctxt->input->line = line;
4210 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004211 xmlParseCharDataComplex(ctxt, cdata);
4212}
4213
Daniel Veillard01c13b52002-12-10 15:19:08 +00004214/**
4215 * xmlParseCharDataComplex:
4216 * @ctxt: an XML parser context
4217 * @cdata: int indicating whether we are within a CDATA section
4218 *
4219 * parse a CharData section.this is the fallback function
4220 * of xmlParseCharData() when the parsing requires handling
4221 * of non-ASCII characters.
4222 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004223static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004224xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004225 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4226 int nbchar = 0;
4227 int cur, l;
4228 int count = 0;
4229
4230 SHRINK;
4231 GROW;
4232 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004233 while ((cur != '<') && /* checked */
4234 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004235 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004236 if ((cur == ']') && (NXT(1) == ']') &&
4237 (NXT(2) == '>')) {
4238 if (cdata) break;
4239 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004240 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004241 }
4242 }
4243 COPY_BUF(l,buf,nbchar,cur);
4244 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004245 buf[nbchar] = 0;
4246
Owen Taylor3473f882001-02-23 17:55:21 +00004247 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004248 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004249 */
4250 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004251 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004252 if (ctxt->sax->ignorableWhitespace != NULL)
4253 ctxt->sax->ignorableWhitespace(ctxt->userData,
4254 buf, nbchar);
4255 } else {
4256 if (ctxt->sax->characters != NULL)
4257 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004258 if ((ctxt->sax->characters !=
4259 ctxt->sax->ignorableWhitespace) &&
4260 (*ctxt->space == -1))
4261 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004262 }
4263 }
4264 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004265 /* something really bad happened in the SAX callback */
4266 if (ctxt->instate != XML_PARSER_CONTENT)
4267 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
4269 count++;
4270 if (count > 50) {
4271 GROW;
4272 count = 0;
4273 }
4274 NEXTL(l);
4275 cur = CUR_CHAR(l);
4276 }
4277 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004278 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004279 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004280 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004281 */
4282 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004283 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004284 if (ctxt->sax->ignorableWhitespace != NULL)
4285 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4286 } else {
4287 if (ctxt->sax->characters != NULL)
4288 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004289 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4290 (*ctxt->space == -1))
4291 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004292 }
4293 }
4294 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004295 if ((cur != 0) && (!IS_CHAR(cur))) {
4296 /* Generate the error and skip the offending character */
4297 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4298 "PCDATA invalid Char value %d\n",
4299 cur);
4300 NEXTL(l);
4301 }
Owen Taylor3473f882001-02-23 17:55:21 +00004302}
4303
4304/**
4305 * xmlParseExternalID:
4306 * @ctxt: an XML parser context
4307 * @publicID: a xmlChar** receiving PubidLiteral
4308 * @strict: indicate whether we should restrict parsing to only
4309 * production [75], see NOTE below
4310 *
4311 * Parse an External ID or a Public ID
4312 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004313 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004314 * 'PUBLIC' S PubidLiteral S SystemLiteral
4315 *
4316 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4317 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4318 *
4319 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4320 *
4321 * Returns the function returns SystemLiteral and in the second
4322 * case publicID receives PubidLiteral, is strict is off
4323 * it is possible to return NULL and have publicID set.
4324 */
4325
4326xmlChar *
4327xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4328 xmlChar *URI = NULL;
4329
4330 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004331
4332 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004333 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004334 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004335 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004336 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4337 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004338 }
4339 SKIP_BLANKS;
4340 URI = xmlParseSystemLiteral(ctxt);
4341 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004342 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004343 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004344 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004345 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004346 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004347 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004348 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004349 }
4350 SKIP_BLANKS;
4351 *publicID = xmlParsePubidLiteral(ctxt);
4352 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004353 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004354 }
4355 if (strict) {
4356 /*
4357 * We don't handle [83] so "S SystemLiteral" is required.
4358 */
William M. Brack76e95df2003-10-18 16:20:14 +00004359 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004360 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004361 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004362 }
4363 } else {
4364 /*
4365 * We handle [83] so we return immediately, if
4366 * "S SystemLiteral" is not detected. From a purely parsing
4367 * point of view that's a nice mess.
4368 */
4369 const xmlChar *ptr;
4370 GROW;
4371
4372 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004373 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004374
William M. Brack76e95df2003-10-18 16:20:14 +00004375 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004376 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4377 }
4378 SKIP_BLANKS;
4379 URI = xmlParseSystemLiteral(ctxt);
4380 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004381 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004382 }
4383 }
4384 return(URI);
4385}
4386
4387/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004388 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004389 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004390 * @buf: the already parsed part of the buffer
4391 * @len: number of bytes filles in the buffer
4392 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004393 *
4394 * Skip an XML (SGML) comment <!-- .... -->
4395 * The spec says that "For compatibility, the string "--" (double-hyphen)
4396 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004397 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004398 *
4399 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4400 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004401static void
4402xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004403 int q, ql;
4404 int r, rl;
4405 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004406 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004407 int inputid;
4408
4409 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004410
Owen Taylor3473f882001-02-23 17:55:21 +00004411 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004412 len = 0;
4413 size = XML_PARSER_BUFFER_SIZE;
4414 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4415 if (buf == NULL) {
4416 xmlErrMemory(ctxt, NULL);
4417 return;
4418 }
Owen Taylor3473f882001-02-23 17:55:21 +00004419 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004420 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004421 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004422 if (q == 0)
4423 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004424 if (!IS_CHAR(q)) {
4425 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4426 "xmlParseComment: invalid xmlChar value %d\n",
4427 q);
4428 xmlFree (buf);
4429 return;
4430 }
Owen Taylor3473f882001-02-23 17:55:21 +00004431 NEXTL(ql);
4432 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004433 if (r == 0)
4434 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004435 if (!IS_CHAR(r)) {
4436 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4437 "xmlParseComment: invalid xmlChar value %d\n",
4438 q);
4439 xmlFree (buf);
4440 return;
4441 }
Owen Taylor3473f882001-02-23 17:55:21 +00004442 NEXTL(rl);
4443 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004444 if (cur == 0)
4445 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004446 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004447 ((cur != '>') ||
4448 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004449 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004450 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004451 }
4452 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004453 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004454 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004455 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4456 if (new_buf == NULL) {
4457 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004458 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004459 return;
4460 }
William M. Bracka3215c72004-07-31 16:24:01 +00004461 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004462 }
4463 COPY_BUF(ql,buf,len,q);
4464 q = r;
4465 ql = rl;
4466 r = cur;
4467 rl = l;
4468
4469 count++;
4470 if (count > 50) {
4471 GROW;
4472 count = 0;
4473 }
4474 NEXTL(l);
4475 cur = CUR_CHAR(l);
4476 if (cur == 0) {
4477 SHRINK;
4478 GROW;
4479 cur = CUR_CHAR(l);
4480 }
4481 }
4482 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004483 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004484 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004485 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004486 } else if (!IS_CHAR(cur)) {
4487 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4488 "xmlParseComment: invalid xmlChar value %d\n",
4489 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004490 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004491 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004492 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4493 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004494 }
4495 NEXT;
4496 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4497 (!ctxt->disableSAX))
4498 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004499 }
Daniel Veillardda629342007-08-01 07:49:06 +00004500 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004501 return;
4502not_terminated:
4503 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4504 "Comment not terminated\n", NULL);
4505 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004506 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004507}
Daniel Veillardda629342007-08-01 07:49:06 +00004508
Daniel Veillard4c778d82005-01-23 17:37:44 +00004509/**
4510 * xmlParseComment:
4511 * @ctxt: an XML parser context
4512 *
4513 * Skip an XML (SGML) comment <!-- .... -->
4514 * The spec says that "For compatibility, the string "--" (double-hyphen)
4515 * must not occur within comments. "
4516 *
4517 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4518 */
4519void
4520xmlParseComment(xmlParserCtxtPtr ctxt) {
4521 xmlChar *buf = NULL;
4522 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004523 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004524 xmlParserInputState state;
4525 const xmlChar *in;
4526 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004527 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004528
4529 /*
4530 * Check that there is a comment right here.
4531 */
4532 if ((RAW != '<') || (NXT(1) != '!') ||
4533 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004534 state = ctxt->instate;
4535 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004536 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004537 SKIP(4);
4538 SHRINK;
4539 GROW;
4540
4541 /*
4542 * Accelerated common case where input don't need to be
4543 * modified before passing it to the handler.
4544 */
4545 in = ctxt->input->cur;
4546 do {
4547 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004548 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004549 ctxt->input->line++; ctxt->input->col = 1;
4550 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004551 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004552 }
4553get_more:
4554 ccol = ctxt->input->col;
4555 while (((*in > '-') && (*in <= 0x7F)) ||
4556 ((*in >= 0x20) && (*in < '-')) ||
4557 (*in == 0x09)) {
4558 in++;
4559 ccol++;
4560 }
4561 ctxt->input->col = ccol;
4562 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004563 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004564 ctxt->input->line++; ctxt->input->col = 1;
4565 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004566 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004567 goto get_more;
4568 }
4569 nbchar = in - ctxt->input->cur;
4570 /*
4571 * save current set of data
4572 */
4573 if (nbchar > 0) {
4574 if ((ctxt->sax != NULL) &&
4575 (ctxt->sax->comment != NULL)) {
4576 if (buf == NULL) {
4577 if ((*in == '-') && (in[1] == '-'))
4578 size = nbchar + 1;
4579 else
4580 size = XML_PARSER_BUFFER_SIZE + nbchar;
4581 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4582 if (buf == NULL) {
4583 xmlErrMemory(ctxt, NULL);
4584 ctxt->instate = state;
4585 return;
4586 }
4587 len = 0;
4588 } else if (len + nbchar + 1 >= size) {
4589 xmlChar *new_buf;
4590 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4591 new_buf = (xmlChar *) xmlRealloc(buf,
4592 size * sizeof(xmlChar));
4593 if (new_buf == NULL) {
4594 xmlFree (buf);
4595 xmlErrMemory(ctxt, NULL);
4596 ctxt->instate = state;
4597 return;
4598 }
4599 buf = new_buf;
4600 }
4601 memcpy(&buf[len], ctxt->input->cur, nbchar);
4602 len += nbchar;
4603 buf[len] = 0;
4604 }
4605 }
4606 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004607 if (*in == 0xA) {
4608 in++;
4609 ctxt->input->line++; ctxt->input->col = 1;
4610 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004611 if (*in == 0xD) {
4612 in++;
4613 if (*in == 0xA) {
4614 ctxt->input->cur = in;
4615 in++;
4616 ctxt->input->line++; ctxt->input->col = 1;
4617 continue; /* while */
4618 }
4619 in--;
4620 }
4621 SHRINK;
4622 GROW;
4623 in = ctxt->input->cur;
4624 if (*in == '-') {
4625 if (in[1] == '-') {
4626 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004627 if (ctxt->input->id != inputid) {
4628 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4629 "comment doesn't start and stop in the same entity\n");
4630 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004631 SKIP(3);
4632 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4633 (!ctxt->disableSAX)) {
4634 if (buf != NULL)
4635 ctxt->sax->comment(ctxt->userData, buf);
4636 else
4637 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4638 }
4639 if (buf != NULL)
4640 xmlFree(buf);
4641 ctxt->instate = state;
4642 return;
4643 }
4644 if (buf != NULL)
4645 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4646 "Comment not terminated \n<!--%.50s\n",
4647 buf);
4648 else
4649 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4650 "Comment not terminated \n", NULL);
4651 in++;
4652 ctxt->input->col++;
4653 }
4654 in++;
4655 ctxt->input->col++;
4656 goto get_more;
4657 }
4658 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4659 xmlParseCommentComplex(ctxt, buf, len, size);
4660 ctxt->instate = state;
4661 return;
4662}
4663
Owen Taylor3473f882001-02-23 17:55:21 +00004664
4665/**
4666 * xmlParsePITarget:
4667 * @ctxt: an XML parser context
4668 *
4669 * parse the name of a PI
4670 *
4671 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4672 *
4673 * Returns the PITarget name or NULL
4674 */
4675
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004676const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004677xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004678 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004679
4680 name = xmlParseName(ctxt);
4681 if ((name != NULL) &&
4682 ((name[0] == 'x') || (name[0] == 'X')) &&
4683 ((name[1] == 'm') || (name[1] == 'M')) &&
4684 ((name[2] == 'l') || (name[2] == 'L'))) {
4685 int i;
4686 if ((name[0] == 'x') && (name[1] == 'm') &&
4687 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004688 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004689 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004690 return(name);
4691 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004692 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004693 return(name);
4694 }
4695 for (i = 0;;i++) {
4696 if (xmlW3CPIs[i] == NULL) break;
4697 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4698 return(name);
4699 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004700 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4701 "xmlParsePITarget: invalid name prefix 'xml'\n",
4702 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004703 }
Daniel Veillard37334572008-07-31 08:20:02 +00004704 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4705 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4706 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4707 }
Owen Taylor3473f882001-02-23 17:55:21 +00004708 return(name);
4709}
4710
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004711#ifdef LIBXML_CATALOG_ENABLED
4712/**
4713 * xmlParseCatalogPI:
4714 * @ctxt: an XML parser context
4715 * @catalog: the PI value string
4716 *
4717 * parse an XML Catalog Processing Instruction.
4718 *
4719 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4720 *
4721 * Occurs only if allowed by the user and if happening in the Misc
4722 * part of the document before any doctype informations
4723 * This will add the given catalog to the parsing context in order
4724 * to be used if there is a resolution need further down in the document
4725 */
4726
4727static void
4728xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4729 xmlChar *URL = NULL;
4730 const xmlChar *tmp, *base;
4731 xmlChar marker;
4732
4733 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004734 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004735 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4736 goto error;
4737 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004738 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004739 if (*tmp != '=') {
4740 return;
4741 }
4742 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004743 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004744 marker = *tmp;
4745 if ((marker != '\'') && (marker != '"'))
4746 goto error;
4747 tmp++;
4748 base = tmp;
4749 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4750 if (*tmp == 0)
4751 goto error;
4752 URL = xmlStrndup(base, tmp - base);
4753 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004754 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004755 if (*tmp != 0)
4756 goto error;
4757
4758 if (URL != NULL) {
4759 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4760 xmlFree(URL);
4761 }
4762 return;
4763
4764error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004765 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4766 "Catalog PI syntax error: %s\n",
4767 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004768 if (URL != NULL)
4769 xmlFree(URL);
4770}
4771#endif
4772
Owen Taylor3473f882001-02-23 17:55:21 +00004773/**
4774 * xmlParsePI:
4775 * @ctxt: an XML parser context
4776 *
4777 * parse an XML Processing Instruction.
4778 *
4779 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4780 *
4781 * The processing is transfered to SAX once parsed.
4782 */
4783
4784void
4785xmlParsePI(xmlParserCtxtPtr ctxt) {
4786 xmlChar *buf = NULL;
4787 int len = 0;
4788 int size = XML_PARSER_BUFFER_SIZE;
4789 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004790 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004791 xmlParserInputState state;
4792 int count = 0;
4793
4794 if ((RAW == '<') && (NXT(1) == '?')) {
4795 xmlParserInputPtr input = ctxt->input;
4796 state = ctxt->instate;
4797 ctxt->instate = XML_PARSER_PI;
4798 /*
4799 * this is a Processing Instruction.
4800 */
4801 SKIP(2);
4802 SHRINK;
4803
4804 /*
4805 * Parse the target name and check for special support like
4806 * namespace.
4807 */
4808 target = xmlParsePITarget(ctxt);
4809 if (target != NULL) {
4810 if ((RAW == '?') && (NXT(1) == '>')) {
4811 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004812 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4813 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004814 }
4815 SKIP(2);
4816
4817 /*
4818 * SAX: PI detected.
4819 */
4820 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4821 (ctxt->sax->processingInstruction != NULL))
4822 ctxt->sax->processingInstruction(ctxt->userData,
4823 target, NULL);
4824 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004825 return;
4826 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004827 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004828 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004829 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004830 ctxt->instate = state;
4831 return;
4832 }
4833 cur = CUR;
4834 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004835 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4836 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004837 }
4838 SKIP_BLANKS;
4839 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004840 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004841 ((cur != '?') || (NXT(1) != '>'))) {
4842 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004843 xmlChar *tmp;
4844
Owen Taylor3473f882001-02-23 17:55:21 +00004845 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004846 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4847 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004848 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004849 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004850 ctxt->instate = state;
4851 return;
4852 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004853 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004854 }
4855 count++;
4856 if (count > 50) {
4857 GROW;
4858 count = 0;
4859 }
4860 COPY_BUF(l,buf,len,cur);
4861 NEXTL(l);
4862 cur = CUR_CHAR(l);
4863 if (cur == 0) {
4864 SHRINK;
4865 GROW;
4866 cur = CUR_CHAR(l);
4867 }
4868 }
4869 buf[len] = 0;
4870 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004871 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4872 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004873 } else {
4874 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004875 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4876 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004877 }
4878 SKIP(2);
4879
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004880#ifdef LIBXML_CATALOG_ENABLED
4881 if (((state == XML_PARSER_MISC) ||
4882 (state == XML_PARSER_START)) &&
4883 (xmlStrEqual(target, XML_CATALOG_PI))) {
4884 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4885 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4886 (allow == XML_CATA_ALLOW_ALL))
4887 xmlParseCatalogPI(ctxt, buf);
4888 }
4889#endif
4890
4891
Owen Taylor3473f882001-02-23 17:55:21 +00004892 /*
4893 * SAX: PI detected.
4894 */
4895 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4896 (ctxt->sax->processingInstruction != NULL))
4897 ctxt->sax->processingInstruction(ctxt->userData,
4898 target, buf);
4899 }
4900 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004901 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004902 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004903 }
4904 ctxt->instate = state;
4905 }
4906}
4907
4908/**
4909 * xmlParseNotationDecl:
4910 * @ctxt: an XML parser context
4911 *
4912 * parse a notation declaration
4913 *
4914 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4915 *
4916 * Hence there is actually 3 choices:
4917 * 'PUBLIC' S PubidLiteral
4918 * 'PUBLIC' S PubidLiteral S SystemLiteral
4919 * and 'SYSTEM' S SystemLiteral
4920 *
4921 * See the NOTE on xmlParseExternalID().
4922 */
4923
4924void
4925xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004926 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004927 xmlChar *Pubid;
4928 xmlChar *Systemid;
4929
Daniel Veillarda07050d2003-10-19 14:46:32 +00004930 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004931 xmlParserInputPtr input = ctxt->input;
4932 SHRINK;
4933 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004934 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4936 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004937 return;
4938 }
4939 SKIP_BLANKS;
4940
Daniel Veillard76d66f42001-05-16 21:05:17 +00004941 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004942 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004943 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004944 return;
4945 }
William M. Brack76e95df2003-10-18 16:20:14 +00004946 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004948 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004949 return;
4950 }
Daniel Veillard37334572008-07-31 08:20:02 +00004951 if (xmlStrchr(name, ':') != NULL) {
4952 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4953 "colon are forbidden from notation names '%s'\n",
4954 name, NULL, NULL);
4955 }
Owen Taylor3473f882001-02-23 17:55:21 +00004956 SKIP_BLANKS;
4957
4958 /*
4959 * Parse the IDs.
4960 */
4961 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4962 SKIP_BLANKS;
4963
4964 if (RAW == '>') {
4965 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004966 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4967 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004968 }
4969 NEXT;
4970 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4971 (ctxt->sax->notationDecl != NULL))
4972 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4973 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004974 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004975 }
Owen Taylor3473f882001-02-23 17:55:21 +00004976 if (Systemid != NULL) xmlFree(Systemid);
4977 if (Pubid != NULL) xmlFree(Pubid);
4978 }
4979}
4980
4981/**
4982 * xmlParseEntityDecl:
4983 * @ctxt: an XML parser context
4984 *
4985 * parse <!ENTITY declarations
4986 *
4987 * [70] EntityDecl ::= GEDecl | PEDecl
4988 *
4989 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4990 *
4991 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4992 *
4993 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4994 *
4995 * [74] PEDef ::= EntityValue | ExternalID
4996 *
4997 * [76] NDataDecl ::= S 'NDATA' S Name
4998 *
4999 * [ VC: Notation Declared ]
5000 * The Name must match the declared name of a notation.
5001 */
5002
5003void
5004xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005005 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005006 xmlChar *value = NULL;
5007 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005008 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005009 int isParameter = 0;
5010 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005011 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005012
Daniel Veillard4c778d82005-01-23 17:37:44 +00005013 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005014 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005015 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005016 SHRINK;
5017 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005018 skipped = SKIP_BLANKS;
5019 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5021 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005022 }
Owen Taylor3473f882001-02-23 17:55:21 +00005023
5024 if (RAW == '%') {
5025 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005026 skipped = SKIP_BLANKS;
5027 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005028 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5029 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005030 }
Owen Taylor3473f882001-02-23 17:55:21 +00005031 isParameter = 1;
5032 }
5033
Daniel Veillard76d66f42001-05-16 21:05:17 +00005034 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005035 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005036 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5037 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005038 return;
5039 }
Daniel Veillard37334572008-07-31 08:20:02 +00005040 if (xmlStrchr(name, ':') != NULL) {
5041 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5042 "colon are forbidden from entities names '%s'\n",
5043 name, NULL, NULL);
5044 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005045 skipped = SKIP_BLANKS;
5046 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005047 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5048 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005049 }
Owen Taylor3473f882001-02-23 17:55:21 +00005050
Daniel Veillardf5582f12002-06-11 10:08:16 +00005051 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005052 /*
5053 * handle the various case of definitions...
5054 */
5055 if (isParameter) {
5056 if ((RAW == '"') || (RAW == '\'')) {
5057 value = xmlParseEntityValue(ctxt, &orig);
5058 if (value) {
5059 if ((ctxt->sax != NULL) &&
5060 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5061 ctxt->sax->entityDecl(ctxt->userData, name,
5062 XML_INTERNAL_PARAMETER_ENTITY,
5063 NULL, NULL, value);
5064 }
5065 } else {
5066 URI = xmlParseExternalID(ctxt, &literal, 1);
5067 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005068 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005069 }
5070 if (URI) {
5071 xmlURIPtr uri;
5072
5073 uri = xmlParseURI((const char *) URI);
5074 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005075 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5076 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005077 /*
5078 * This really ought to be a well formedness error
5079 * but the XML Core WG decided otherwise c.f. issue
5080 * E26 of the XML erratas.
5081 */
Owen Taylor3473f882001-02-23 17:55:21 +00005082 } else {
5083 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005084 /*
5085 * Okay this is foolish to block those but not
5086 * invalid URIs.
5087 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005088 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005089 } else {
5090 if ((ctxt->sax != NULL) &&
5091 (!ctxt->disableSAX) &&
5092 (ctxt->sax->entityDecl != NULL))
5093 ctxt->sax->entityDecl(ctxt->userData, name,
5094 XML_EXTERNAL_PARAMETER_ENTITY,
5095 literal, URI, NULL);
5096 }
5097 xmlFreeURI(uri);
5098 }
5099 }
5100 }
5101 } else {
5102 if ((RAW == '"') || (RAW == '\'')) {
5103 value = xmlParseEntityValue(ctxt, &orig);
5104 if ((ctxt->sax != NULL) &&
5105 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5106 ctxt->sax->entityDecl(ctxt->userData, name,
5107 XML_INTERNAL_GENERAL_ENTITY,
5108 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005109 /*
5110 * For expat compatibility in SAX mode.
5111 */
5112 if ((ctxt->myDoc == NULL) ||
5113 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5114 if (ctxt->myDoc == NULL) {
5115 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005116 if (ctxt->myDoc == NULL) {
5117 xmlErrMemory(ctxt, "New Doc failed");
5118 return;
5119 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005120 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005121 }
5122 if (ctxt->myDoc->intSubset == NULL)
5123 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5124 BAD_CAST "fake", NULL, NULL);
5125
Daniel Veillard1af9a412003-08-20 22:54:39 +00005126 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5127 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005128 }
Owen Taylor3473f882001-02-23 17:55:21 +00005129 } else {
5130 URI = xmlParseExternalID(ctxt, &literal, 1);
5131 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005132 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 }
5134 if (URI) {
5135 xmlURIPtr uri;
5136
5137 uri = xmlParseURI((const char *)URI);
5138 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005139 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5140 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005141 /*
5142 * This really ought to be a well formedness error
5143 * but the XML Core WG decided otherwise c.f. issue
5144 * E26 of the XML erratas.
5145 */
Owen Taylor3473f882001-02-23 17:55:21 +00005146 } else {
5147 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005148 /*
5149 * Okay this is foolish to block those but not
5150 * invalid URIs.
5151 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005152 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005153 }
5154 xmlFreeURI(uri);
5155 }
5156 }
William M. Brack76e95df2003-10-18 16:20:14 +00005157 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005158 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5159 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005160 }
5161 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005162 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005163 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005164 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005165 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5166 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005167 }
5168 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005169 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005170 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5171 (ctxt->sax->unparsedEntityDecl != NULL))
5172 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5173 literal, URI, ndata);
5174 } else {
5175 if ((ctxt->sax != NULL) &&
5176 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5177 ctxt->sax->entityDecl(ctxt->userData, name,
5178 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5179 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005180 /*
5181 * For expat compatibility in SAX mode.
5182 * assuming the entity repalcement was asked for
5183 */
5184 if ((ctxt->replaceEntities != 0) &&
5185 ((ctxt->myDoc == NULL) ||
5186 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5187 if (ctxt->myDoc == NULL) {
5188 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005189 if (ctxt->myDoc == NULL) {
5190 xmlErrMemory(ctxt, "New Doc failed");
5191 return;
5192 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005193 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005194 }
5195
5196 if (ctxt->myDoc->intSubset == NULL)
5197 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5198 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005199 xmlSAX2EntityDecl(ctxt, name,
5200 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5201 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005202 }
Owen Taylor3473f882001-02-23 17:55:21 +00005203 }
5204 }
5205 }
5206 SKIP_BLANKS;
5207 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005208 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005209 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005210 } else {
5211 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005212 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5213 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005214 }
5215 NEXT;
5216 }
5217 if (orig != NULL) {
5218 /*
5219 * Ugly mechanism to save the raw entity value.
5220 */
5221 xmlEntityPtr cur = NULL;
5222
5223 if (isParameter) {
5224 if ((ctxt->sax != NULL) &&
5225 (ctxt->sax->getParameterEntity != NULL))
5226 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5227 } else {
5228 if ((ctxt->sax != NULL) &&
5229 (ctxt->sax->getEntity != NULL))
5230 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005231 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005232 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005233 }
Owen Taylor3473f882001-02-23 17:55:21 +00005234 }
5235 if (cur != NULL) {
5236 if (cur->orig != NULL)
5237 xmlFree(orig);
5238 else
5239 cur->orig = orig;
5240 } else
5241 xmlFree(orig);
5242 }
Owen Taylor3473f882001-02-23 17:55:21 +00005243 if (value != NULL) xmlFree(value);
5244 if (URI != NULL) xmlFree(URI);
5245 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005246 }
5247}
5248
5249/**
5250 * xmlParseDefaultDecl:
5251 * @ctxt: an XML parser context
5252 * @value: Receive a possible fixed default value for the attribute
5253 *
5254 * Parse an attribute default declaration
5255 *
5256 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5257 *
5258 * [ VC: Required Attribute ]
5259 * if the default declaration is the keyword #REQUIRED, then the
5260 * attribute must be specified for all elements of the type in the
5261 * attribute-list declaration.
5262 *
5263 * [ VC: Attribute Default Legal ]
5264 * The declared default value must meet the lexical constraints of
5265 * the declared attribute type c.f. xmlValidateAttributeDecl()
5266 *
5267 * [ VC: Fixed Attribute Default ]
5268 * if an attribute has a default value declared with the #FIXED
5269 * keyword, instances of that attribute must match the default value.
5270 *
5271 * [ WFC: No < in Attribute Values ]
5272 * handled in xmlParseAttValue()
5273 *
5274 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5275 * or XML_ATTRIBUTE_FIXED.
5276 */
5277
5278int
5279xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5280 int val;
5281 xmlChar *ret;
5282
5283 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005284 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005285 SKIP(9);
5286 return(XML_ATTRIBUTE_REQUIRED);
5287 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005288 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005289 SKIP(8);
5290 return(XML_ATTRIBUTE_IMPLIED);
5291 }
5292 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005293 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005294 SKIP(6);
5295 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005296 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5298 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005299 }
5300 SKIP_BLANKS;
5301 }
5302 ret = xmlParseAttValue(ctxt);
5303 ctxt->instate = XML_PARSER_DTD;
5304 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005305 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005306 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005307 } else
5308 *value = ret;
5309 return(val);
5310}
5311
5312/**
5313 * xmlParseNotationType:
5314 * @ctxt: an XML parser context
5315 *
5316 * parse an Notation attribute type.
5317 *
5318 * Note: the leading 'NOTATION' S part has already being parsed...
5319 *
5320 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5321 *
5322 * [ VC: Notation Attributes ]
5323 * Values of this type must match one of the notation names included
5324 * in the declaration; all notation names in the declaration must be declared.
5325 *
5326 * Returns: the notation attribute tree built while parsing
5327 */
5328
5329xmlEnumerationPtr
5330xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005331 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005332 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005333
5334 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005335 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005336 return(NULL);
5337 }
5338 SHRINK;
5339 do {
5340 NEXT;
5341 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005342 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005343 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005344 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5345 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005346 xmlFreeEnumeration(ret);
5347 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005348 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005349 tmp = ret;
5350 while (tmp != NULL) {
5351 if (xmlStrEqual(name, tmp->name)) {
5352 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5353 "standalone: attribute notation value token %s duplicated\n",
5354 name, NULL);
5355 if (!xmlDictOwns(ctxt->dict, name))
5356 xmlFree((xmlChar *) name);
5357 break;
5358 }
5359 tmp = tmp->next;
5360 }
5361 if (tmp == NULL) {
5362 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005363 if (cur == NULL) {
5364 xmlFreeEnumeration(ret);
5365 return(NULL);
5366 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005367 if (last == NULL) ret = last = cur;
5368 else {
5369 last->next = cur;
5370 last = cur;
5371 }
Owen Taylor3473f882001-02-23 17:55:21 +00005372 }
5373 SKIP_BLANKS;
5374 } while (RAW == '|');
5375 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005376 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005377 xmlFreeEnumeration(ret);
5378 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005379 }
5380 NEXT;
5381 return(ret);
5382}
5383
5384/**
5385 * xmlParseEnumerationType:
5386 * @ctxt: an XML parser context
5387 *
5388 * parse an Enumeration attribute type.
5389 *
5390 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5391 *
5392 * [ VC: Enumeration ]
5393 * Values of this type must match one of the Nmtoken tokens in
5394 * the declaration
5395 *
5396 * Returns: the enumeration attribute tree built while parsing
5397 */
5398
5399xmlEnumerationPtr
5400xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5401 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005402 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005403
5404 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005405 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005406 return(NULL);
5407 }
5408 SHRINK;
5409 do {
5410 NEXT;
5411 SKIP_BLANKS;
5412 name = xmlParseNmtoken(ctxt);
5413 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005414 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005415 return(ret);
5416 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005417 tmp = ret;
5418 while (tmp != NULL) {
5419 if (xmlStrEqual(name, tmp->name)) {
5420 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5421 "standalone: attribute enumeration value token %s duplicated\n",
5422 name, NULL);
5423 if (!xmlDictOwns(ctxt->dict, name))
5424 xmlFree(name);
5425 break;
5426 }
5427 tmp = tmp->next;
5428 }
5429 if (tmp == NULL) {
5430 cur = xmlCreateEnumeration(name);
5431 if (!xmlDictOwns(ctxt->dict, name))
5432 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005433 if (cur == NULL) {
5434 xmlFreeEnumeration(ret);
5435 return(NULL);
5436 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005437 if (last == NULL) ret = last = cur;
5438 else {
5439 last->next = cur;
5440 last = cur;
5441 }
Owen Taylor3473f882001-02-23 17:55:21 +00005442 }
5443 SKIP_BLANKS;
5444 } while (RAW == '|');
5445 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005446 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005447 return(ret);
5448 }
5449 NEXT;
5450 return(ret);
5451}
5452
5453/**
5454 * xmlParseEnumeratedType:
5455 * @ctxt: an XML parser context
5456 * @tree: the enumeration tree built while parsing
5457 *
5458 * parse an Enumerated attribute type.
5459 *
5460 * [57] EnumeratedType ::= NotationType | Enumeration
5461 *
5462 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5463 *
5464 *
5465 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5466 */
5467
5468int
5469xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005470 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005471 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005472 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5474 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005475 return(0);
5476 }
5477 SKIP_BLANKS;
5478 *tree = xmlParseNotationType(ctxt);
5479 if (*tree == NULL) return(0);
5480 return(XML_ATTRIBUTE_NOTATION);
5481 }
5482 *tree = xmlParseEnumerationType(ctxt);
5483 if (*tree == NULL) return(0);
5484 return(XML_ATTRIBUTE_ENUMERATION);
5485}
5486
5487/**
5488 * xmlParseAttributeType:
5489 * @ctxt: an XML parser context
5490 * @tree: the enumeration tree built while parsing
5491 *
5492 * parse the Attribute list def for an element
5493 *
5494 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5495 *
5496 * [55] StringType ::= 'CDATA'
5497 *
5498 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5499 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5500 *
5501 * Validity constraints for attribute values syntax are checked in
5502 * xmlValidateAttributeValue()
5503 *
5504 * [ VC: ID ]
5505 * Values of type ID must match the Name production. A name must not
5506 * appear more than once in an XML document as a value of this type;
5507 * i.e., ID values must uniquely identify the elements which bear them.
5508 *
5509 * [ VC: One ID per Element Type ]
5510 * No element type may have more than one ID attribute specified.
5511 *
5512 * [ VC: ID Attribute Default ]
5513 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5514 *
5515 * [ VC: IDREF ]
5516 * Values of type IDREF must match the Name production, and values
5517 * of type IDREFS must match Names; each IDREF Name must match the value
5518 * of an ID attribute on some element in the XML document; i.e. IDREF
5519 * values must match the value of some ID attribute.
5520 *
5521 * [ VC: Entity Name ]
5522 * Values of type ENTITY must match the Name production, values
5523 * of type ENTITIES must match Names; each Entity Name must match the
5524 * name of an unparsed entity declared in the DTD.
5525 *
5526 * [ VC: Name Token ]
5527 * Values of type NMTOKEN must match the Nmtoken production; values
5528 * of type NMTOKENS must match Nmtokens.
5529 *
5530 * Returns the attribute type
5531 */
5532int
5533xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5534 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005535 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005536 SKIP(5);
5537 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005538 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005539 SKIP(6);
5540 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005541 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005542 SKIP(5);
5543 return(XML_ATTRIBUTE_IDREF);
5544 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5545 SKIP(2);
5546 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005547 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005548 SKIP(6);
5549 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005550 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005551 SKIP(8);
5552 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005553 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005554 SKIP(8);
5555 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005556 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005557 SKIP(7);
5558 return(XML_ATTRIBUTE_NMTOKEN);
5559 }
5560 return(xmlParseEnumeratedType(ctxt, tree));
5561}
5562
5563/**
5564 * xmlParseAttributeListDecl:
5565 * @ctxt: an XML parser context
5566 *
5567 * : parse the Attribute list def for an element
5568 *
5569 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5570 *
5571 * [53] AttDef ::= S Name S AttType S DefaultDecl
5572 *
5573 */
5574void
5575xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005576 const xmlChar *elemName;
5577 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005578 xmlEnumerationPtr tree;
5579
Daniel Veillarda07050d2003-10-19 14:46:32 +00005580 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005581 xmlParserInputPtr input = ctxt->input;
5582
5583 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005584 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005585 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005586 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005587 }
5588 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005589 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005590 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005591 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5592 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005593 return;
5594 }
5595 SKIP_BLANKS;
5596 GROW;
5597 while (RAW != '>') {
5598 const xmlChar *check = CUR_PTR;
5599 int type;
5600 int def;
5601 xmlChar *defaultValue = NULL;
5602
5603 GROW;
5604 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005605 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005606 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005607 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5608 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005609 break;
5610 }
5611 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005612 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005613 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005614 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005615 break;
5616 }
5617 SKIP_BLANKS;
5618
5619 type = xmlParseAttributeType(ctxt, &tree);
5620 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005621 break;
5622 }
5623
5624 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005625 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005626 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5627 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005628 if (tree != NULL)
5629 xmlFreeEnumeration(tree);
5630 break;
5631 }
5632 SKIP_BLANKS;
5633
5634 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5635 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005636 if (defaultValue != NULL)
5637 xmlFree(defaultValue);
5638 if (tree != NULL)
5639 xmlFreeEnumeration(tree);
5640 break;
5641 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005642 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5643 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005644
5645 GROW;
5646 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005647 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005648 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005649 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005650 if (defaultValue != NULL)
5651 xmlFree(defaultValue);
5652 if (tree != NULL)
5653 xmlFreeEnumeration(tree);
5654 break;
5655 }
5656 SKIP_BLANKS;
5657 }
5658 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005659 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5660 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005661 if (defaultValue != NULL)
5662 xmlFree(defaultValue);
5663 if (tree != NULL)
5664 xmlFreeEnumeration(tree);
5665 break;
5666 }
5667 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5668 (ctxt->sax->attributeDecl != NULL))
5669 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5670 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005671 else if (tree != NULL)
5672 xmlFreeEnumeration(tree);
5673
5674 if ((ctxt->sax2) && (defaultValue != NULL) &&
5675 (def != XML_ATTRIBUTE_IMPLIED) &&
5676 (def != XML_ATTRIBUTE_REQUIRED)) {
5677 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5678 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005679 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005680 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5681 }
Owen Taylor3473f882001-02-23 17:55:21 +00005682 if (defaultValue != NULL)
5683 xmlFree(defaultValue);
5684 GROW;
5685 }
5686 if (RAW == '>') {
5687 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005688 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5689 "Attribute list declaration doesn't start and stop in the same entity\n",
5690 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005691 }
5692 NEXT;
5693 }
Owen Taylor3473f882001-02-23 17:55:21 +00005694 }
5695}
5696
5697/**
5698 * xmlParseElementMixedContentDecl:
5699 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005700 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005701 *
5702 * parse the declaration for a Mixed Element content
5703 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5704 *
5705 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5706 * '(' S? '#PCDATA' S? ')'
5707 *
5708 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5709 *
5710 * [ VC: No Duplicate Types ]
5711 * The same name must not appear more than once in a single
5712 * mixed-content declaration.
5713 *
5714 * returns: the list of the xmlElementContentPtr describing the element choices
5715 */
5716xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005717xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005718 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005719 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005720
5721 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005722 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005723 SKIP(7);
5724 SKIP_BLANKS;
5725 SHRINK;
5726 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005727 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005728 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5729"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005730 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005731 }
Owen Taylor3473f882001-02-23 17:55:21 +00005732 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005733 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005734 if (ret == NULL)
5735 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005736 if (RAW == '*') {
5737 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5738 NEXT;
5739 }
5740 return(ret);
5741 }
5742 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005743 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005744 if (ret == NULL) return(NULL);
5745 }
5746 while (RAW == '|') {
5747 NEXT;
5748 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005749 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005750 if (ret == NULL) return(NULL);
5751 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005752 if (cur != NULL)
5753 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005754 cur = ret;
5755 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005756 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005757 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005758 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005759 if (n->c1 != NULL)
5760 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005761 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005762 if (n != NULL)
5763 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005764 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005765 }
5766 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005767 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005768 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005769 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005770 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005771 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005772 return(NULL);
5773 }
5774 SKIP_BLANKS;
5775 GROW;
5776 }
5777 if ((RAW == ')') && (NXT(1) == '*')) {
5778 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005779 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005780 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005781 if (cur->c2 != NULL)
5782 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005783 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02005784 if (ret != NULL)
5785 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005786 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005787 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5788"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005789 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005790 }
Owen Taylor3473f882001-02-23 17:55:21 +00005791 SKIP(2);
5792 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005793 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005794 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005795 return(NULL);
5796 }
5797
5798 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005799 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005800 }
5801 return(ret);
5802}
5803
5804/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005805 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005806 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005807 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005808 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005809 *
5810 * parse the declaration for a Mixed Element content
5811 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5812 *
5813 *
5814 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5815 *
5816 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5817 *
5818 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5819 *
5820 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5821 *
5822 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5823 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005824 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005825 * opening or closing parentheses in a choice, seq, or Mixed
5826 * construct is contained in the replacement text for a parameter
5827 * entity, both must be contained in the same replacement text. For
5828 * interoperability, if a parameter-entity reference appears in a
5829 * choice, seq, or Mixed construct, its replacement text should not
5830 * be empty, and neither the first nor last non-blank character of
5831 * the replacement text should be a connector (| or ,).
5832 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005833 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005834 * hierarchy.
5835 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005836static xmlElementContentPtr
5837xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5838 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005839 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005840 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005841 xmlChar type = 0;
5842
Daniel Veillard489f9672009-08-10 16:49:30 +02005843 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5844 (depth > 2048)) {
5845 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5846"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5847 depth);
5848 return(NULL);
5849 }
Owen Taylor3473f882001-02-23 17:55:21 +00005850 SKIP_BLANKS;
5851 GROW;
5852 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005853 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005854
Owen Taylor3473f882001-02-23 17:55:21 +00005855 /* Recurse on first child */
5856 NEXT;
5857 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005858 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5859 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005860 SKIP_BLANKS;
5861 GROW;
5862 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005863 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005864 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005865 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005866 return(NULL);
5867 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005868 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005869 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005870 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005871 return(NULL);
5872 }
Owen Taylor3473f882001-02-23 17:55:21 +00005873 GROW;
5874 if (RAW == '?') {
5875 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5876 NEXT;
5877 } else if (RAW == '*') {
5878 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5879 NEXT;
5880 } else if (RAW == '+') {
5881 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5882 NEXT;
5883 } else {
5884 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5885 }
Owen Taylor3473f882001-02-23 17:55:21 +00005886 GROW;
5887 }
5888 SKIP_BLANKS;
5889 SHRINK;
5890 while (RAW != ')') {
5891 /*
5892 * Each loop we parse one separator and one element.
5893 */
5894 if (RAW == ',') {
5895 if (type == 0) type = CUR;
5896
5897 /*
5898 * Detect "Name | Name , Name" error
5899 */
5900 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005901 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005902 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005903 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005904 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005905 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005906 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005907 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005908 return(NULL);
5909 }
5910 NEXT;
5911
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005912 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005913 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005914 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005915 xmlFreeDocElementContent(ctxt->myDoc, last);
5916 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005917 return(NULL);
5918 }
5919 if (last == NULL) {
5920 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005921 if (ret != NULL)
5922 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005923 ret = cur = op;
5924 } else {
5925 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005926 if (op != NULL)
5927 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005928 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005929 if (last != NULL)
5930 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005931 cur =op;
5932 last = NULL;
5933 }
5934 } else if (RAW == '|') {
5935 if (type == 0) type = CUR;
5936
5937 /*
5938 * Detect "Name , Name | Name" error
5939 */
5940 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005941 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005942 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005943 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005944 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005945 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005946 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005947 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005948 return(NULL);
5949 }
5950 NEXT;
5951
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005952 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005953 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005954 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005955 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005956 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005957 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005958 return(NULL);
5959 }
5960 if (last == NULL) {
5961 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005962 if (ret != NULL)
5963 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005964 ret = cur = op;
5965 } else {
5966 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005967 if (op != NULL)
5968 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005969 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005970 if (last != NULL)
5971 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005972 cur =op;
5973 last = NULL;
5974 }
5975 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005976 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005977 if ((last != NULL) && (last != ret))
5978 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005979 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005980 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005981 return(NULL);
5982 }
5983 GROW;
5984 SKIP_BLANKS;
5985 GROW;
5986 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005987 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005988 /* Recurse on second child */
5989 NEXT;
5990 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005991 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5992 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005993 SKIP_BLANKS;
5994 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005995 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005996 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005997 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005998 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005999 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006000 return(NULL);
6001 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006002 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006003 if (last == NULL) {
6004 if (ret != NULL)
6005 xmlFreeDocElementContent(ctxt->myDoc, ret);
6006 return(NULL);
6007 }
Owen Taylor3473f882001-02-23 17:55:21 +00006008 if (RAW == '?') {
6009 last->ocur = XML_ELEMENT_CONTENT_OPT;
6010 NEXT;
6011 } else if (RAW == '*') {
6012 last->ocur = XML_ELEMENT_CONTENT_MULT;
6013 NEXT;
6014 } else if (RAW == '+') {
6015 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6016 NEXT;
6017 } else {
6018 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6019 }
6020 }
6021 SKIP_BLANKS;
6022 GROW;
6023 }
6024 if ((cur != NULL) && (last != NULL)) {
6025 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006026 if (last != NULL)
6027 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006028 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006029 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006030 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6031"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006032 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006033 }
Owen Taylor3473f882001-02-23 17:55:21 +00006034 NEXT;
6035 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006036 if (ret != NULL) {
6037 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6038 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6039 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6040 else
6041 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6042 }
Owen Taylor3473f882001-02-23 17:55:21 +00006043 NEXT;
6044 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006045 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006046 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006047 cur = ret;
6048 /*
6049 * Some normalization:
6050 * (a | b* | c?)* == (a | b | c)*
6051 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006052 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006053 if ((cur->c1 != NULL) &&
6054 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6055 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6056 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6057 if ((cur->c2 != NULL) &&
6058 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6059 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6060 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6061 cur = cur->c2;
6062 }
6063 }
Owen Taylor3473f882001-02-23 17:55:21 +00006064 NEXT;
6065 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006066 if (ret != NULL) {
6067 int found = 0;
6068
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006069 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6070 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6071 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006072 else
6073 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006074 /*
6075 * Some normalization:
6076 * (a | b*)+ == (a | b)*
6077 * (a | b?)+ == (a | b)*
6078 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006079 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006080 if ((cur->c1 != NULL) &&
6081 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6082 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6083 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6084 found = 1;
6085 }
6086 if ((cur->c2 != NULL) &&
6087 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6088 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6089 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6090 found = 1;
6091 }
6092 cur = cur->c2;
6093 }
6094 if (found)
6095 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6096 }
Owen Taylor3473f882001-02-23 17:55:21 +00006097 NEXT;
6098 }
6099 return(ret);
6100}
6101
6102/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006103 * xmlParseElementChildrenContentDecl:
6104 * @ctxt: an XML parser context
6105 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006106 *
6107 * parse the declaration for a Mixed Element content
6108 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6109 *
6110 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6111 *
6112 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6113 *
6114 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6115 *
6116 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6117 *
6118 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6119 * TODO Parameter-entity replacement text must be properly nested
6120 * with parenthesized groups. That is to say, if either of the
6121 * opening or closing parentheses in a choice, seq, or Mixed
6122 * construct is contained in the replacement text for a parameter
6123 * entity, both must be contained in the same replacement text. For
6124 * interoperability, if a parameter-entity reference appears in a
6125 * choice, seq, or Mixed construct, its replacement text should not
6126 * be empty, and neither the first nor last non-blank character of
6127 * the replacement text should be a connector (| or ,).
6128 *
6129 * Returns the tree of xmlElementContentPtr describing the element
6130 * hierarchy.
6131 */
6132xmlElementContentPtr
6133xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6134 /* stub left for API/ABI compat */
6135 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6136}
6137
6138/**
Owen Taylor3473f882001-02-23 17:55:21 +00006139 * xmlParseElementContentDecl:
6140 * @ctxt: an XML parser context
6141 * @name: the name of the element being defined.
6142 * @result: the Element Content pointer will be stored here if any
6143 *
6144 * parse the declaration for an Element content either Mixed or Children,
6145 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6146 *
6147 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6148 *
6149 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6150 */
6151
6152int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006153xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006154 xmlElementContentPtr *result) {
6155
6156 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006157 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006158 int res;
6159
6160 *result = NULL;
6161
6162 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006163 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006164 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006165 return(-1);
6166 }
6167 NEXT;
6168 GROW;
6169 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006170 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006171 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 res = XML_ELEMENT_TYPE_MIXED;
6173 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006174 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006175 res = XML_ELEMENT_TYPE_ELEMENT;
6176 }
Owen Taylor3473f882001-02-23 17:55:21 +00006177 SKIP_BLANKS;
6178 *result = tree;
6179 return(res);
6180}
6181
6182/**
6183 * xmlParseElementDecl:
6184 * @ctxt: an XML parser context
6185 *
6186 * parse an Element declaration.
6187 *
6188 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6189 *
6190 * [ VC: Unique Element Type Declaration ]
6191 * No element type may be declared more than once
6192 *
6193 * Returns the type of the element, or -1 in case of error
6194 */
6195int
6196xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006197 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006198 int ret = -1;
6199 xmlElementContentPtr content = NULL;
6200
Daniel Veillard4c778d82005-01-23 17:37:44 +00006201 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006202 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006203 xmlParserInputPtr input = ctxt->input;
6204
6205 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006206 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006207 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6208 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006209 }
6210 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006211 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006212 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006213 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6214 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006215 return(-1);
6216 }
6217 while ((RAW == 0) && (ctxt->inputNr > 1))
6218 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006219 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006220 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6221 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006222 }
6223 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006224 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006225 SKIP(5);
6226 /*
6227 * Element must always be empty.
6228 */
6229 ret = XML_ELEMENT_TYPE_EMPTY;
6230 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6231 (NXT(2) == 'Y')) {
6232 SKIP(3);
6233 /*
6234 * Element is a generic container.
6235 */
6236 ret = XML_ELEMENT_TYPE_ANY;
6237 } else if (RAW == '(') {
6238 ret = xmlParseElementContentDecl(ctxt, name, &content);
6239 } else {
6240 /*
6241 * [ WFC: PEs in Internal Subset ] error handling.
6242 */
6243 if ((RAW == '%') && (ctxt->external == 0) &&
6244 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006245 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006246 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006247 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006248 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006249 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6250 }
Owen Taylor3473f882001-02-23 17:55:21 +00006251 return(-1);
6252 }
6253
6254 SKIP_BLANKS;
6255 /*
6256 * Pop-up of finished entities.
6257 */
6258 while ((RAW == 0) && (ctxt->inputNr > 1))
6259 xmlPopInput(ctxt);
6260 SKIP_BLANKS;
6261
6262 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006263 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006264 if (content != NULL) {
6265 xmlFreeDocElementContent(ctxt->myDoc, content);
6266 }
Owen Taylor3473f882001-02-23 17:55:21 +00006267 } else {
6268 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006269 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006271 }
6272
6273 NEXT;
6274 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006275 (ctxt->sax->elementDecl != NULL)) {
6276 if (content != NULL)
6277 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006278 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6279 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006280 if ((content != NULL) && (content->parent == NULL)) {
6281 /*
6282 * this is a trick: if xmlAddElementDecl is called,
6283 * instead of copying the full tree it is plugged directly
6284 * if called from the parser. Avoid duplicating the
6285 * interfaces or change the API/ABI
6286 */
6287 xmlFreeDocElementContent(ctxt->myDoc, content);
6288 }
6289 } else if (content != NULL) {
6290 xmlFreeDocElementContent(ctxt->myDoc, content);
6291 }
Owen Taylor3473f882001-02-23 17:55:21 +00006292 }
Owen Taylor3473f882001-02-23 17:55:21 +00006293 }
6294 return(ret);
6295}
6296
6297/**
Owen Taylor3473f882001-02-23 17:55:21 +00006298 * xmlParseConditionalSections
6299 * @ctxt: an XML parser context
6300 *
6301 * [61] conditionalSect ::= includeSect | ignoreSect
6302 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6303 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6304 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6305 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6306 */
6307
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006308static void
Owen Taylor3473f882001-02-23 17:55:21 +00006309xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006310 int id = ctxt->input->id;
6311
Owen Taylor3473f882001-02-23 17:55:21 +00006312 SKIP(3);
6313 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006314 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006315 SKIP(7);
6316 SKIP_BLANKS;
6317 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006318 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006319 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006320 if (ctxt->input->id != id) {
6321 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6322 "All markup of the conditional section is not in the same entity\n",
6323 NULL, NULL);
6324 }
Owen Taylor3473f882001-02-23 17:55:21 +00006325 NEXT;
6326 }
6327 if (xmlParserDebugEntities) {
6328 if ((ctxt->input != NULL) && (ctxt->input->filename))
6329 xmlGenericError(xmlGenericErrorContext,
6330 "%s(%d): ", ctxt->input->filename,
6331 ctxt->input->line);
6332 xmlGenericError(xmlGenericErrorContext,
6333 "Entering INCLUDE Conditional Section\n");
6334 }
6335
6336 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6337 (NXT(2) != '>'))) {
6338 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006339 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006340
6341 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6342 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006343 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006344 NEXT;
6345 } else if (RAW == '%') {
6346 xmlParsePEReference(ctxt);
6347 } else
6348 xmlParseMarkupDecl(ctxt);
6349
6350 /*
6351 * Pop-up of finished entities.
6352 */
6353 while ((RAW == 0) && (ctxt->inputNr > 1))
6354 xmlPopInput(ctxt);
6355
Daniel Veillardfdc91562002-07-01 21:52:03 +00006356 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006357 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006358 break;
6359 }
6360 }
6361 if (xmlParserDebugEntities) {
6362 if ((ctxt->input != NULL) && (ctxt->input->filename))
6363 xmlGenericError(xmlGenericErrorContext,
6364 "%s(%d): ", ctxt->input->filename,
6365 ctxt->input->line);
6366 xmlGenericError(xmlGenericErrorContext,
6367 "Leaving INCLUDE Conditional Section\n");
6368 }
6369
Daniel Veillarda07050d2003-10-19 14:46:32 +00006370 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006371 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006372 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006373 int depth = 0;
6374
6375 SKIP(6);
6376 SKIP_BLANKS;
6377 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006378 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006379 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006380 if (ctxt->input->id != id) {
6381 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6382 "All markup of the conditional section is not in the same entity\n",
6383 NULL, NULL);
6384 }
Owen Taylor3473f882001-02-23 17:55:21 +00006385 NEXT;
6386 }
6387 if (xmlParserDebugEntities) {
6388 if ((ctxt->input != NULL) && (ctxt->input->filename))
6389 xmlGenericError(xmlGenericErrorContext,
6390 "%s(%d): ", ctxt->input->filename,
6391 ctxt->input->line);
6392 xmlGenericError(xmlGenericErrorContext,
6393 "Entering IGNORE Conditional Section\n");
6394 }
6395
6396 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006397 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006398 * But disable SAX event generating DTD building in the meantime
6399 */
6400 state = ctxt->disableSAX;
6401 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006402 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006403 ctxt->instate = XML_PARSER_IGNORE;
6404
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006405 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006406 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6407 depth++;
6408 SKIP(3);
6409 continue;
6410 }
6411 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6412 if (--depth >= 0) SKIP(3);
6413 continue;
6414 }
6415 NEXT;
6416 continue;
6417 }
6418
6419 ctxt->disableSAX = state;
6420 ctxt->instate = instate;
6421
6422 if (xmlParserDebugEntities) {
6423 if ((ctxt->input != NULL) && (ctxt->input->filename))
6424 xmlGenericError(xmlGenericErrorContext,
6425 "%s(%d): ", ctxt->input->filename,
6426 ctxt->input->line);
6427 xmlGenericError(xmlGenericErrorContext,
6428 "Leaving IGNORE Conditional Section\n");
6429 }
6430
6431 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006432 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006433 }
6434
6435 if (RAW == 0)
6436 SHRINK;
6437
6438 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006439 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006440 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006441 if (ctxt->input->id != id) {
6442 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6443 "All markup of the conditional section is not in the same entity\n",
6444 NULL, NULL);
6445 }
Owen Taylor3473f882001-02-23 17:55:21 +00006446 SKIP(3);
6447 }
6448}
6449
6450/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006451 * xmlParseMarkupDecl:
6452 * @ctxt: an XML parser context
6453 *
6454 * parse Markup declarations
6455 *
6456 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6457 * NotationDecl | PI | Comment
6458 *
6459 * [ VC: Proper Declaration/PE Nesting ]
6460 * Parameter-entity replacement text must be properly nested with
6461 * markup declarations. That is to say, if either the first character
6462 * or the last character of a markup declaration (markupdecl above) is
6463 * contained in the replacement text for a parameter-entity reference,
6464 * both must be contained in the same replacement text.
6465 *
6466 * [ WFC: PEs in Internal Subset ]
6467 * In the internal DTD subset, parameter-entity references can occur
6468 * only where markup declarations can occur, not within markup declarations.
6469 * (This does not apply to references that occur in external parameter
6470 * entities or to the external subset.)
6471 */
6472void
6473xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6474 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006475 if (CUR == '<') {
6476 if (NXT(1) == '!') {
6477 switch (NXT(2)) {
6478 case 'E':
6479 if (NXT(3) == 'L')
6480 xmlParseElementDecl(ctxt);
6481 else if (NXT(3) == 'N')
6482 xmlParseEntityDecl(ctxt);
6483 break;
6484 case 'A':
6485 xmlParseAttributeListDecl(ctxt);
6486 break;
6487 case 'N':
6488 xmlParseNotationDecl(ctxt);
6489 break;
6490 case '-':
6491 xmlParseComment(ctxt);
6492 break;
6493 default:
6494 /* there is an error but it will be detected later */
6495 break;
6496 }
6497 } else if (NXT(1) == '?') {
6498 xmlParsePI(ctxt);
6499 }
6500 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006501 /*
6502 * This is only for internal subset. On external entities,
6503 * the replacement is done before parsing stage
6504 */
6505 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6506 xmlParsePEReference(ctxt);
6507
6508 /*
6509 * Conditional sections are allowed from entities included
6510 * by PE References in the internal subset.
6511 */
6512 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6513 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6514 xmlParseConditionalSections(ctxt);
6515 }
6516 }
6517
6518 ctxt->instate = XML_PARSER_DTD;
6519}
6520
6521/**
6522 * xmlParseTextDecl:
6523 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006524 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006525 * parse an XML declaration header for external entities
6526 *
6527 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006528 */
6529
6530void
6531xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6532 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006533 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006534
6535 /*
6536 * We know that '<?xml' is here.
6537 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006538 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006539 SKIP(5);
6540 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006541 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006542 return;
6543 }
6544
William M. Brack76e95df2003-10-18 16:20:14 +00006545 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006546 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6547 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006548 }
6549 SKIP_BLANKS;
6550
6551 /*
6552 * We may have the VersionInfo here.
6553 */
6554 version = xmlParseVersionInfo(ctxt);
6555 if (version == NULL)
6556 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006557 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006558 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6560 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006561 }
6562 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006563 ctxt->input->version = version;
6564
6565 /*
6566 * We must have the encoding declaration
6567 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006568 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006569 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6570 /*
6571 * The XML REC instructs us to stop parsing right here
6572 */
6573 return;
6574 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006575 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6576 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6577 "Missing encoding in text declaration\n");
6578 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006579
6580 SKIP_BLANKS;
6581 if ((RAW == '?') && (NXT(1) == '>')) {
6582 SKIP(2);
6583 } else if (RAW == '>') {
6584 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006585 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006586 NEXT;
6587 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006588 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006589 MOVETO_ENDTAG(CUR_PTR);
6590 NEXT;
6591 }
6592}
6593
6594/**
Owen Taylor3473f882001-02-23 17:55:21 +00006595 * xmlParseExternalSubset:
6596 * @ctxt: an XML parser context
6597 * @ExternalID: the external identifier
6598 * @SystemID: the system identifier (or URL)
6599 *
6600 * parse Markup declarations from an external subset
6601 *
6602 * [30] extSubset ::= textDecl? extSubsetDecl
6603 *
6604 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6605 */
6606void
6607xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6608 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006609 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006610 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006611
6612 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6613 (ctxt->input->end - ctxt->input->cur >= 4)) {
6614 xmlChar start[4];
6615 xmlCharEncoding enc;
6616
6617 start[0] = RAW;
6618 start[1] = NXT(1);
6619 start[2] = NXT(2);
6620 start[3] = NXT(3);
6621 enc = xmlDetectCharEncoding(start, 4);
6622 if (enc != XML_CHAR_ENCODING_NONE)
6623 xmlSwitchEncoding(ctxt, enc);
6624 }
6625
Daniel Veillarda07050d2003-10-19 14:46:32 +00006626 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006627 xmlParseTextDecl(ctxt);
6628 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6629 /*
6630 * The XML REC instructs us to stop parsing right here
6631 */
6632 ctxt->instate = XML_PARSER_EOF;
6633 return;
6634 }
6635 }
6636 if (ctxt->myDoc == NULL) {
6637 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006638 if (ctxt->myDoc == NULL) {
6639 xmlErrMemory(ctxt, "New Doc failed");
6640 return;
6641 }
6642 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006643 }
6644 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6645 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6646
6647 ctxt->instate = XML_PARSER_DTD;
6648 ctxt->external = 1;
6649 while (((RAW == '<') && (NXT(1) == '?')) ||
6650 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006651 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006652 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006653 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006654
6655 GROW;
6656 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6657 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006658 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006659 NEXT;
6660 } else if (RAW == '%') {
6661 xmlParsePEReference(ctxt);
6662 } else
6663 xmlParseMarkupDecl(ctxt);
6664
6665 /*
6666 * Pop-up of finished entities.
6667 */
6668 while ((RAW == 0) && (ctxt->inputNr > 1))
6669 xmlPopInput(ctxt);
6670
Daniel Veillardfdc91562002-07-01 21:52:03 +00006671 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006672 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006673 break;
6674 }
6675 }
6676
6677 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006678 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006679 }
6680
6681}
6682
6683/**
6684 * xmlParseReference:
6685 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006686 *
Owen Taylor3473f882001-02-23 17:55:21 +00006687 * parse and handle entity references in content, depending on the SAX
6688 * interface, this may end-up in a call to character() if this is a
6689 * CharRef, a predefined entity, if there is no reference() callback.
6690 * or if the parser was asked to switch to that mode.
6691 *
6692 * [67] Reference ::= EntityRef | CharRef
6693 */
6694void
6695xmlParseReference(xmlParserCtxtPtr ctxt) {
6696 xmlEntityPtr ent;
6697 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006698 int was_checked;
6699 xmlNodePtr list = NULL;
6700 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006701
Daniel Veillard0161e632008-08-28 15:36:32 +00006702
6703 if (RAW != '&')
6704 return;
6705
6706 /*
6707 * Simple case of a CharRef
6708 */
Owen Taylor3473f882001-02-23 17:55:21 +00006709 if (NXT(1) == '#') {
6710 int i = 0;
6711 xmlChar out[10];
6712 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006713 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006714
Daniel Veillarddc171602008-03-26 17:41:38 +00006715 if (value == 0)
6716 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006717 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6718 /*
6719 * So we are using non-UTF-8 buffers
6720 * Check that the char fit on 8bits, if not
6721 * generate a CharRef.
6722 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006723 if (value <= 0xFF) {
6724 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006725 out[1] = 0;
6726 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6727 (!ctxt->disableSAX))
6728 ctxt->sax->characters(ctxt->userData, out, 1);
6729 } else {
6730 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006731 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006732 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006733 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006734 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6735 (!ctxt->disableSAX))
6736 ctxt->sax->reference(ctxt->userData, out);
6737 }
6738 } else {
6739 /*
6740 * Just encode the value in UTF-8
6741 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006742 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006743 out[i] = 0;
6744 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6745 (!ctxt->disableSAX))
6746 ctxt->sax->characters(ctxt->userData, out, i);
6747 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006748 return;
6749 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006750
Daniel Veillard0161e632008-08-28 15:36:32 +00006751 /*
6752 * We are seeing an entity reference
6753 */
6754 ent = xmlParseEntityRef(ctxt);
6755 if (ent == NULL) return;
6756 if (!ctxt->wellFormed)
6757 return;
6758 was_checked = ent->checked;
6759
6760 /* special case of predefined entities */
6761 if ((ent->name == NULL) ||
6762 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6763 val = ent->content;
6764 if (val == NULL) return;
6765 /*
6766 * inline the entity.
6767 */
6768 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6769 (!ctxt->disableSAX))
6770 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6771 return;
6772 }
6773
6774 /*
6775 * The first reference to the entity trigger a parsing phase
6776 * where the ent->children is filled with the result from
6777 * the parsing.
6778 */
6779 if (ent->checked == 0) {
6780 unsigned long oldnbent = ctxt->nbentities;
6781
6782 /*
6783 * This is a bit hackish but this seems the best
6784 * way to make sure both SAX and DOM entity support
6785 * behaves okay.
6786 */
6787 void *user_data;
6788 if (ctxt->userData == ctxt)
6789 user_data = NULL;
6790 else
6791 user_data = ctxt->userData;
6792
6793 /*
6794 * Check that this entity is well formed
6795 * 4.3.2: An internal general parsed entity is well-formed
6796 * if its replacement text matches the production labeled
6797 * content.
6798 */
6799 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6800 ctxt->depth++;
6801 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6802 user_data, &list);
6803 ctxt->depth--;
6804
6805 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6806 ctxt->depth++;
6807 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6808 user_data, ctxt->depth, ent->URI,
6809 ent->ExternalID, &list);
6810 ctxt->depth--;
6811 } else {
6812 ret = XML_ERR_ENTITY_PE_INTERNAL;
6813 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6814 "invalid entity type found\n", NULL);
6815 }
6816
6817 /*
6818 * Store the number of entities needing parsing for this entity
6819 * content and do checkings
6820 */
6821 ent->checked = ctxt->nbentities - oldnbent;
6822 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006823 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006824 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006825 return;
6826 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006827 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6828 xmlFreeNodeList(list);
6829 return;
6830 }
Owen Taylor3473f882001-02-23 17:55:21 +00006831
Daniel Veillard0161e632008-08-28 15:36:32 +00006832 if ((ret == XML_ERR_OK) && (list != NULL)) {
6833 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6834 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6835 (ent->children == NULL)) {
6836 ent->children = list;
6837 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006838 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006839 * Prune it directly in the generated document
6840 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006841 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006842 if (((list->type == XML_TEXT_NODE) &&
6843 (list->next == NULL)) ||
6844 (ctxt->parseMode == XML_PARSE_READER)) {
6845 list->parent = (xmlNodePtr) ent;
6846 list = NULL;
6847 ent->owner = 1;
6848 } else {
6849 ent->owner = 0;
6850 while (list != NULL) {
6851 list->parent = (xmlNodePtr) ctxt->node;
6852 list->doc = ctxt->myDoc;
6853 if (list->next == NULL)
6854 ent->last = list;
6855 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006856 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006857 list = ent->children;
6858#ifdef LIBXML_LEGACY_ENABLED
6859 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6860 xmlAddEntityReference(ent, list, NULL);
6861#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006862 }
6863 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006864 ent->owner = 1;
6865 while (list != NULL) {
6866 list->parent = (xmlNodePtr) ent;
6867 if (list->next == NULL)
6868 ent->last = list;
6869 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006870 }
6871 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006872 } else {
6873 xmlFreeNodeList(list);
6874 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006875 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006876 } else if ((ret != XML_ERR_OK) &&
6877 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6878 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6879 "Entity '%s' failed to parse\n", ent->name);
6880 } else if (list != NULL) {
6881 xmlFreeNodeList(list);
6882 list = NULL;
6883 }
6884 if (ent->checked == 0)
6885 ent->checked = 1;
6886 } else if (ent->checked != 1) {
6887 ctxt->nbentities += ent->checked;
6888 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006889
Daniel Veillard0161e632008-08-28 15:36:32 +00006890 /*
6891 * Now that the entity content has been gathered
6892 * provide it to the application, this can take different forms based
6893 * on the parsing modes.
6894 */
6895 if (ent->children == NULL) {
6896 /*
6897 * Probably running in SAX mode and the callbacks don't
6898 * build the entity content. So unless we already went
6899 * though parsing for first checking go though the entity
6900 * content to generate callbacks associated to the entity
6901 */
6902 if (was_checked != 0) {
6903 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006904 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006905 * This is a bit hackish but this seems the best
6906 * way to make sure both SAX and DOM entity support
6907 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006908 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006909 if (ctxt->userData == ctxt)
6910 user_data = NULL;
6911 else
6912 user_data = ctxt->userData;
6913
6914 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6915 ctxt->depth++;
6916 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6917 ent->content, user_data, NULL);
6918 ctxt->depth--;
6919 } else if (ent->etype ==
6920 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6921 ctxt->depth++;
6922 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6923 ctxt->sax, user_data, ctxt->depth,
6924 ent->URI, ent->ExternalID, NULL);
6925 ctxt->depth--;
6926 } else {
6927 ret = XML_ERR_ENTITY_PE_INTERNAL;
6928 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6929 "invalid entity type found\n", NULL);
6930 }
6931 if (ret == XML_ERR_ENTITY_LOOP) {
6932 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6933 return;
6934 }
6935 }
6936 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6937 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6938 /*
6939 * Entity reference callback comes second, it's somewhat
6940 * superfluous but a compatibility to historical behaviour
6941 */
6942 ctxt->sax->reference(ctxt->userData, ent->name);
6943 }
6944 return;
6945 }
6946
6947 /*
6948 * If we didn't get any children for the entity being built
6949 */
6950 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6951 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6952 /*
6953 * Create a node.
6954 */
6955 ctxt->sax->reference(ctxt->userData, ent->name);
6956 return;
6957 }
6958
6959 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6960 /*
6961 * There is a problem on the handling of _private for entities
6962 * (bug 155816): Should we copy the content of the field from
6963 * the entity (possibly overwriting some value set by the user
6964 * when a copy is created), should we leave it alone, or should
6965 * we try to take care of different situations? The problem
6966 * is exacerbated by the usage of this field by the xmlReader.
6967 * To fix this bug, we look at _private on the created node
6968 * and, if it's NULL, we copy in whatever was in the entity.
6969 * If it's not NULL we leave it alone. This is somewhat of a
6970 * hack - maybe we should have further tests to determine
6971 * what to do.
6972 */
6973 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6974 /*
6975 * Seems we are generating the DOM content, do
6976 * a simple tree copy for all references except the first
6977 * In the first occurrence list contains the replacement.
6978 * progressive == 2 means we are operating on the Reader
6979 * and since nodes are discarded we must copy all the time.
6980 */
6981 if (((list == NULL) && (ent->owner == 0)) ||
6982 (ctxt->parseMode == XML_PARSE_READER)) {
6983 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6984
6985 /*
6986 * when operating on a reader, the entities definitions
6987 * are always owning the entities subtree.
6988 if (ctxt->parseMode == XML_PARSE_READER)
6989 ent->owner = 1;
6990 */
6991
6992 cur = ent->children;
6993 while (cur != NULL) {
6994 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6995 if (nw != NULL) {
6996 if (nw->_private == NULL)
6997 nw->_private = cur->_private;
6998 if (firstChild == NULL){
6999 firstChild = nw;
7000 }
7001 nw = xmlAddChild(ctxt->node, nw);
7002 }
7003 if (cur == ent->last) {
7004 /*
7005 * needed to detect some strange empty
7006 * node cases in the reader tests
7007 */
7008 if ((ctxt->parseMode == XML_PARSE_READER) &&
7009 (nw != NULL) &&
7010 (nw->type == XML_ELEMENT_NODE) &&
7011 (nw->children == NULL))
7012 nw->extra = 1;
7013
7014 break;
7015 }
7016 cur = cur->next;
7017 }
7018#ifdef LIBXML_LEGACY_ENABLED
7019 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7020 xmlAddEntityReference(ent, firstChild, nw);
7021#endif /* LIBXML_LEGACY_ENABLED */
7022 } else if (list == NULL) {
7023 xmlNodePtr nw = NULL, cur, next, last,
7024 firstChild = NULL;
7025 /*
7026 * Copy the entity child list and make it the new
7027 * entity child list. The goal is to make sure any
7028 * ID or REF referenced will be the one from the
7029 * document content and not the entity copy.
7030 */
7031 cur = ent->children;
7032 ent->children = NULL;
7033 last = ent->last;
7034 ent->last = NULL;
7035 while (cur != NULL) {
7036 next = cur->next;
7037 cur->next = NULL;
7038 cur->parent = NULL;
7039 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7040 if (nw != NULL) {
7041 if (nw->_private == NULL)
7042 nw->_private = cur->_private;
7043 if (firstChild == NULL){
7044 firstChild = cur;
7045 }
7046 xmlAddChild((xmlNodePtr) ent, nw);
7047 xmlAddChild(ctxt->node, cur);
7048 }
7049 if (cur == last)
7050 break;
7051 cur = next;
7052 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007053 if (ent->owner == 0)
7054 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007055#ifdef LIBXML_LEGACY_ENABLED
7056 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7057 xmlAddEntityReference(ent, firstChild, nw);
7058#endif /* LIBXML_LEGACY_ENABLED */
7059 } else {
7060 const xmlChar *nbktext;
7061
7062 /*
7063 * the name change is to avoid coalescing of the
7064 * node with a possible previous text one which
7065 * would make ent->children a dangling pointer
7066 */
7067 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7068 -1);
7069 if (ent->children->type == XML_TEXT_NODE)
7070 ent->children->name = nbktext;
7071 if ((ent->last != ent->children) &&
7072 (ent->last->type == XML_TEXT_NODE))
7073 ent->last->name = nbktext;
7074 xmlAddChildList(ctxt->node, ent->children);
7075 }
7076
7077 /*
7078 * This is to avoid a nasty side effect, see
7079 * characters() in SAX.c
7080 */
7081 ctxt->nodemem = 0;
7082 ctxt->nodelen = 0;
7083 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007084 }
7085 }
7086}
7087
7088/**
7089 * xmlParseEntityRef:
7090 * @ctxt: an XML parser context
7091 *
7092 * parse ENTITY references declarations
7093 *
7094 * [68] EntityRef ::= '&' Name ';'
7095 *
7096 * [ WFC: Entity Declared ]
7097 * In a document without any DTD, a document with only an internal DTD
7098 * subset which contains no parameter entity references, or a document
7099 * with "standalone='yes'", the Name given in the entity reference
7100 * must match that in an entity declaration, except that well-formed
7101 * documents need not declare any of the following entities: amp, lt,
7102 * gt, apos, quot. The declaration of a parameter entity must precede
7103 * any reference to it. Similarly, the declaration of a general entity
7104 * must precede any reference to it which appears in a default value in an
7105 * attribute-list declaration. Note that if entities are declared in the
7106 * external subset or in external parameter entities, a non-validating
7107 * processor is not obligated to read and process their declarations;
7108 * for such documents, the rule that an entity must be declared is a
7109 * well-formedness constraint only if standalone='yes'.
7110 *
7111 * [ WFC: Parsed Entity ]
7112 * An entity reference must not contain the name of an unparsed entity
7113 *
7114 * Returns the xmlEntityPtr if found, or NULL otherwise.
7115 */
7116xmlEntityPtr
7117xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007118 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007119 xmlEntityPtr ent = NULL;
7120
7121 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007122
Daniel Veillard0161e632008-08-28 15:36:32 +00007123 if (RAW != '&')
7124 return(NULL);
7125 NEXT;
7126 name = xmlParseName(ctxt);
7127 if (name == NULL) {
7128 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7129 "xmlParseEntityRef: no name\n");
7130 return(NULL);
7131 }
7132 if (RAW != ';') {
7133 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7134 return(NULL);
7135 }
7136 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007137
Daniel Veillard0161e632008-08-28 15:36:32 +00007138 /*
7139 * Predefined entites override any extra definition
7140 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007141 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7142 ent = xmlGetPredefinedEntity(name);
7143 if (ent != NULL)
7144 return(ent);
7145 }
Owen Taylor3473f882001-02-23 17:55:21 +00007146
Daniel Veillard0161e632008-08-28 15:36:32 +00007147 /*
7148 * Increate the number of entity references parsed
7149 */
7150 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007151
Daniel Veillard0161e632008-08-28 15:36:32 +00007152 /*
7153 * Ask first SAX for entity resolution, otherwise try the
7154 * entities which may have stored in the parser context.
7155 */
7156 if (ctxt->sax != NULL) {
7157 if (ctxt->sax->getEntity != NULL)
7158 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007159 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7160 (ctxt->options & XML_PARSE_OLDSAX))
7161 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007162 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7163 (ctxt->userData==ctxt)) {
7164 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007165 }
7166 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007167 /*
7168 * [ WFC: Entity Declared ]
7169 * In a document without any DTD, a document with only an
7170 * internal DTD subset which contains no parameter entity
7171 * references, or a document with "standalone='yes'", the
7172 * Name given in the entity reference must match that in an
7173 * entity declaration, except that well-formed documents
7174 * need not declare any of the following entities: amp, lt,
7175 * gt, apos, quot.
7176 * The declaration of a parameter entity must precede any
7177 * reference to it.
7178 * Similarly, the declaration of a general entity must
7179 * precede any reference to it which appears in a default
7180 * value in an attribute-list declaration. Note that if
7181 * entities are declared in the external subset or in
7182 * external parameter entities, a non-validating processor
7183 * is not obligated to read and process their declarations;
7184 * for such documents, the rule that an entity must be
7185 * declared is a well-formedness constraint only if
7186 * standalone='yes'.
7187 */
7188 if (ent == NULL) {
7189 if ((ctxt->standalone == 1) ||
7190 ((ctxt->hasExternalSubset == 0) &&
7191 (ctxt->hasPErefs == 0))) {
7192 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7193 "Entity '%s' not defined\n", name);
7194 } else {
7195 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7196 "Entity '%s' not defined\n", name);
7197 if ((ctxt->inSubset == 0) &&
7198 (ctxt->sax != NULL) &&
7199 (ctxt->sax->reference != NULL)) {
7200 ctxt->sax->reference(ctxt->userData, name);
7201 }
7202 }
7203 ctxt->valid = 0;
7204 }
7205
7206 /*
7207 * [ WFC: Parsed Entity ]
7208 * An entity reference must not contain the name of an
7209 * unparsed entity
7210 */
7211 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7212 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7213 "Entity reference to unparsed entity %s\n", name);
7214 }
7215
7216 /*
7217 * [ WFC: No External Entity References ]
7218 * Attribute values cannot contain direct or indirect
7219 * entity references to external entities.
7220 */
7221 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7222 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7223 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7224 "Attribute references external entity '%s'\n", name);
7225 }
7226 /*
7227 * [ WFC: No < in Attribute Values ]
7228 * The replacement text of any entity referred to directly or
7229 * indirectly in an attribute value (other than "&lt;") must
7230 * not contain a <.
7231 */
7232 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7233 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007234 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007235 (xmlStrchr(ent->content, '<'))) {
7236 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7237 "'<' in entity '%s' is not allowed in attributes values\n", name);
7238 }
7239
7240 /*
7241 * Internal check, no parameter entities here ...
7242 */
7243 else {
7244 switch (ent->etype) {
7245 case XML_INTERNAL_PARAMETER_ENTITY:
7246 case XML_EXTERNAL_PARAMETER_ENTITY:
7247 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7248 "Attempt to reference the parameter entity '%s'\n",
7249 name);
7250 break;
7251 default:
7252 break;
7253 }
7254 }
7255
7256 /*
7257 * [ WFC: No Recursion ]
7258 * A parsed entity must not contain a recursive reference
7259 * to itself, either directly or indirectly.
7260 * Done somewhere else
7261 */
Owen Taylor3473f882001-02-23 17:55:21 +00007262 return(ent);
7263}
7264
7265/**
7266 * xmlParseStringEntityRef:
7267 * @ctxt: an XML parser context
7268 * @str: a pointer to an index in the string
7269 *
7270 * parse ENTITY references declarations, but this version parses it from
7271 * a string value.
7272 *
7273 * [68] EntityRef ::= '&' Name ';'
7274 *
7275 * [ WFC: Entity Declared ]
7276 * In a document without any DTD, a document with only an internal DTD
7277 * subset which contains no parameter entity references, or a document
7278 * with "standalone='yes'", the Name given in the entity reference
7279 * must match that in an entity declaration, except that well-formed
7280 * documents need not declare any of the following entities: amp, lt,
7281 * gt, apos, quot. The declaration of a parameter entity must precede
7282 * any reference to it. Similarly, the declaration of a general entity
7283 * must precede any reference to it which appears in a default value in an
7284 * attribute-list declaration. Note that if entities are declared in the
7285 * external subset or in external parameter entities, a non-validating
7286 * processor is not obligated to read and process their declarations;
7287 * for such documents, the rule that an entity must be declared is a
7288 * well-formedness constraint only if standalone='yes'.
7289 *
7290 * [ WFC: Parsed Entity ]
7291 * An entity reference must not contain the name of an unparsed entity
7292 *
7293 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7294 * is updated to the current location in the string.
7295 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007296static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007297xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7298 xmlChar *name;
7299 const xmlChar *ptr;
7300 xmlChar cur;
7301 xmlEntityPtr ent = NULL;
7302
7303 if ((str == NULL) || (*str == NULL))
7304 return(NULL);
7305 ptr = *str;
7306 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007307 if (cur != '&')
7308 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007309
Daniel Veillard0161e632008-08-28 15:36:32 +00007310 ptr++;
7311 cur = *ptr;
7312 name = xmlParseStringName(ctxt, &ptr);
7313 if (name == NULL) {
7314 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7315 "xmlParseStringEntityRef: no name\n");
7316 *str = ptr;
7317 return(NULL);
7318 }
7319 if (*ptr != ';') {
7320 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007321 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007322 *str = ptr;
7323 return(NULL);
7324 }
7325 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007326
Owen Taylor3473f882001-02-23 17:55:21 +00007327
Daniel Veillard0161e632008-08-28 15:36:32 +00007328 /*
7329 * Predefined entites override any extra definition
7330 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007331 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7332 ent = xmlGetPredefinedEntity(name);
7333 if (ent != NULL) {
7334 xmlFree(name);
7335 *str = ptr;
7336 return(ent);
7337 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007338 }
Owen Taylor3473f882001-02-23 17:55:21 +00007339
Daniel Veillard0161e632008-08-28 15:36:32 +00007340 /*
7341 * Increate the number of entity references parsed
7342 */
7343 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007344
Daniel Veillard0161e632008-08-28 15:36:32 +00007345 /*
7346 * Ask first SAX for entity resolution, otherwise try the
7347 * entities which may have stored in the parser context.
7348 */
7349 if (ctxt->sax != NULL) {
7350 if (ctxt->sax->getEntity != NULL)
7351 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007352 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7353 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007354 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7355 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007356 }
7357 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007358
7359 /*
7360 * [ WFC: Entity Declared ]
7361 * In a document without any DTD, a document with only an
7362 * internal DTD subset which contains no parameter entity
7363 * references, or a document with "standalone='yes'", the
7364 * Name given in the entity reference must match that in an
7365 * entity declaration, except that well-formed documents
7366 * need not declare any of the following entities: amp, lt,
7367 * gt, apos, quot.
7368 * The declaration of a parameter entity must precede any
7369 * reference to it.
7370 * Similarly, the declaration of a general entity must
7371 * precede any reference to it which appears in a default
7372 * value in an attribute-list declaration. Note that if
7373 * entities are declared in the external subset or in
7374 * external parameter entities, a non-validating processor
7375 * is not obligated to read and process their declarations;
7376 * for such documents, the rule that an entity must be
7377 * declared is a well-formedness constraint only if
7378 * standalone='yes'.
7379 */
7380 if (ent == NULL) {
7381 if ((ctxt->standalone == 1) ||
7382 ((ctxt->hasExternalSubset == 0) &&
7383 (ctxt->hasPErefs == 0))) {
7384 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7385 "Entity '%s' not defined\n", name);
7386 } else {
7387 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7388 "Entity '%s' not defined\n",
7389 name);
7390 }
7391 /* TODO ? check regressions ctxt->valid = 0; */
7392 }
7393
7394 /*
7395 * [ WFC: Parsed Entity ]
7396 * An entity reference must not contain the name of an
7397 * unparsed entity
7398 */
7399 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7400 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7401 "Entity reference to unparsed entity %s\n", name);
7402 }
7403
7404 /*
7405 * [ WFC: No External Entity References ]
7406 * Attribute values cannot contain direct or indirect
7407 * entity references to external entities.
7408 */
7409 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7410 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7411 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7412 "Attribute references external entity '%s'\n", name);
7413 }
7414 /*
7415 * [ WFC: No < in Attribute Values ]
7416 * The replacement text of any entity referred to directly or
7417 * indirectly in an attribute value (other than "&lt;") must
7418 * not contain a <.
7419 */
7420 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7421 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007422 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007423 (xmlStrchr(ent->content, '<'))) {
7424 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7425 "'<' in entity '%s' is not allowed in attributes values\n",
7426 name);
7427 }
7428
7429 /*
7430 * Internal check, no parameter entities here ...
7431 */
7432 else {
7433 switch (ent->etype) {
7434 case XML_INTERNAL_PARAMETER_ENTITY:
7435 case XML_EXTERNAL_PARAMETER_ENTITY:
7436 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7437 "Attempt to reference the parameter entity '%s'\n",
7438 name);
7439 break;
7440 default:
7441 break;
7442 }
7443 }
7444
7445 /*
7446 * [ WFC: No Recursion ]
7447 * A parsed entity must not contain a recursive reference
7448 * to itself, either directly or indirectly.
7449 * Done somewhere else
7450 */
7451
7452 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007453 *str = ptr;
7454 return(ent);
7455}
7456
7457/**
7458 * xmlParsePEReference:
7459 * @ctxt: an XML parser context
7460 *
7461 * parse PEReference declarations
7462 * The entity content is handled directly by pushing it's content as
7463 * a new input stream.
7464 *
7465 * [69] PEReference ::= '%' Name ';'
7466 *
7467 * [ WFC: No Recursion ]
7468 * A parsed entity must not contain a recursive
7469 * reference to itself, either directly or indirectly.
7470 *
7471 * [ WFC: Entity Declared ]
7472 * In a document without any DTD, a document with only an internal DTD
7473 * subset which contains no parameter entity references, or a document
7474 * with "standalone='yes'", ... ... The declaration of a parameter
7475 * entity must precede any reference to it...
7476 *
7477 * [ VC: Entity Declared ]
7478 * In a document with an external subset or external parameter entities
7479 * with "standalone='no'", ... ... The declaration of a parameter entity
7480 * must precede any reference to it...
7481 *
7482 * [ WFC: In DTD ]
7483 * Parameter-entity references may only appear in the DTD.
7484 * NOTE: misleading but this is handled.
7485 */
7486void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007487xmlParsePEReference(xmlParserCtxtPtr ctxt)
7488{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007489 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007490 xmlEntityPtr entity = NULL;
7491 xmlParserInputPtr input;
7492
Daniel Veillard0161e632008-08-28 15:36:32 +00007493 if (RAW != '%')
7494 return;
7495 NEXT;
7496 name = xmlParseName(ctxt);
7497 if (name == NULL) {
7498 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7499 "xmlParsePEReference: no name\n");
7500 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007501 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007502 if (RAW != ';') {
7503 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7504 return;
7505 }
7506
7507 NEXT;
7508
7509 /*
7510 * Increate the number of entity references parsed
7511 */
7512 ctxt->nbentities++;
7513
7514 /*
7515 * Request the entity from SAX
7516 */
7517 if ((ctxt->sax != NULL) &&
7518 (ctxt->sax->getParameterEntity != NULL))
7519 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7520 name);
7521 if (entity == NULL) {
7522 /*
7523 * [ WFC: Entity Declared ]
7524 * In a document without any DTD, a document with only an
7525 * internal DTD subset which contains no parameter entity
7526 * references, or a document with "standalone='yes'", ...
7527 * ... The declaration of a parameter entity must precede
7528 * any reference to it...
7529 */
7530 if ((ctxt->standalone == 1) ||
7531 ((ctxt->hasExternalSubset == 0) &&
7532 (ctxt->hasPErefs == 0))) {
7533 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7534 "PEReference: %%%s; not found\n",
7535 name);
7536 } else {
7537 /*
7538 * [ VC: Entity Declared ]
7539 * In a document with an external subset or external
7540 * parameter entities with "standalone='no'", ...
7541 * ... The declaration of a parameter entity must
7542 * precede any reference to it...
7543 */
7544 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7545 "PEReference: %%%s; not found\n",
7546 name, NULL);
7547 ctxt->valid = 0;
7548 }
7549 } else {
7550 /*
7551 * Internal checking in case the entity quest barfed
7552 */
7553 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7554 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7555 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7556 "Internal: %%%s; is not a parameter entity\n",
7557 name, NULL);
7558 } else if (ctxt->input->free != deallocblankswrapper) {
7559 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7560 if (xmlPushInput(ctxt, input) < 0)
7561 return;
7562 } else {
7563 /*
7564 * TODO !!!
7565 * handle the extra spaces added before and after
7566 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7567 */
7568 input = xmlNewEntityInputStream(ctxt, entity);
7569 if (xmlPushInput(ctxt, input) < 0)
7570 return;
7571 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7572 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7573 (IS_BLANK_CH(NXT(5)))) {
7574 xmlParseTextDecl(ctxt);
7575 if (ctxt->errNo ==
7576 XML_ERR_UNSUPPORTED_ENCODING) {
7577 /*
7578 * The XML REC instructs us to stop parsing
7579 * right here
7580 */
7581 ctxt->instate = XML_PARSER_EOF;
7582 return;
7583 }
7584 }
7585 }
7586 }
7587 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007588}
7589
7590/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007591 * xmlLoadEntityContent:
7592 * @ctxt: an XML parser context
7593 * @entity: an unloaded system entity
7594 *
7595 * Load the original content of the given system entity from the
7596 * ExternalID/SystemID given. This is to be used for Included in Literal
7597 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7598 *
7599 * Returns 0 in case of success and -1 in case of failure
7600 */
7601static int
7602xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7603 xmlParserInputPtr input;
7604 xmlBufferPtr buf;
7605 int l, c;
7606 int count = 0;
7607
7608 if ((ctxt == NULL) || (entity == NULL) ||
7609 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7610 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7611 (entity->content != NULL)) {
7612 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7613 "xmlLoadEntityContent parameter error");
7614 return(-1);
7615 }
7616
7617 if (xmlParserDebugEntities)
7618 xmlGenericError(xmlGenericErrorContext,
7619 "Reading %s entity content input\n", entity->name);
7620
7621 buf = xmlBufferCreate();
7622 if (buf == NULL) {
7623 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7624 "xmlLoadEntityContent parameter error");
7625 return(-1);
7626 }
7627
7628 input = xmlNewEntityInputStream(ctxt, entity);
7629 if (input == NULL) {
7630 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7631 "xmlLoadEntityContent input error");
7632 xmlBufferFree(buf);
7633 return(-1);
7634 }
7635
7636 /*
7637 * Push the entity as the current input, read char by char
7638 * saving to the buffer until the end of the entity or an error
7639 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007640 if (xmlPushInput(ctxt, input) < 0) {
7641 xmlBufferFree(buf);
7642 return(-1);
7643 }
7644
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007645 GROW;
7646 c = CUR_CHAR(l);
7647 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7648 (IS_CHAR(c))) {
7649 xmlBufferAdd(buf, ctxt->input->cur, l);
7650 if (count++ > 100) {
7651 count = 0;
7652 GROW;
7653 }
7654 NEXTL(l);
7655 c = CUR_CHAR(l);
7656 }
7657
7658 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7659 xmlPopInput(ctxt);
7660 } else if (!IS_CHAR(c)) {
7661 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7662 "xmlLoadEntityContent: invalid char value %d\n",
7663 c);
7664 xmlBufferFree(buf);
7665 return(-1);
7666 }
7667 entity->content = buf->content;
7668 buf->content = NULL;
7669 xmlBufferFree(buf);
7670
7671 return(0);
7672}
7673
7674/**
Owen Taylor3473f882001-02-23 17:55:21 +00007675 * xmlParseStringPEReference:
7676 * @ctxt: an XML parser context
7677 * @str: a pointer to an index in the string
7678 *
7679 * parse PEReference declarations
7680 *
7681 * [69] PEReference ::= '%' Name ';'
7682 *
7683 * [ WFC: No Recursion ]
7684 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007685 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007686 *
7687 * [ WFC: Entity Declared ]
7688 * In a document without any DTD, a document with only an internal DTD
7689 * subset which contains no parameter entity references, or a document
7690 * with "standalone='yes'", ... ... The declaration of a parameter
7691 * entity must precede any reference to it...
7692 *
7693 * [ VC: Entity Declared ]
7694 * In a document with an external subset or external parameter entities
7695 * with "standalone='no'", ... ... The declaration of a parameter entity
7696 * must precede any reference to it...
7697 *
7698 * [ WFC: In DTD ]
7699 * Parameter-entity references may only appear in the DTD.
7700 * NOTE: misleading but this is handled.
7701 *
7702 * Returns the string of the entity content.
7703 * str is updated to the current value of the index
7704 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007705static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007706xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7707 const xmlChar *ptr;
7708 xmlChar cur;
7709 xmlChar *name;
7710 xmlEntityPtr entity = NULL;
7711
7712 if ((str == NULL) || (*str == NULL)) return(NULL);
7713 ptr = *str;
7714 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007715 if (cur != '%')
7716 return(NULL);
7717 ptr++;
7718 cur = *ptr;
7719 name = xmlParseStringName(ctxt, &ptr);
7720 if (name == NULL) {
7721 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7722 "xmlParseStringPEReference: no name\n");
7723 *str = ptr;
7724 return(NULL);
7725 }
7726 cur = *ptr;
7727 if (cur != ';') {
7728 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7729 xmlFree(name);
7730 *str = ptr;
7731 return(NULL);
7732 }
7733 ptr++;
7734
7735 /*
7736 * Increate the number of entity references parsed
7737 */
7738 ctxt->nbentities++;
7739
7740 /*
7741 * Request the entity from SAX
7742 */
7743 if ((ctxt->sax != NULL) &&
7744 (ctxt->sax->getParameterEntity != NULL))
7745 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7746 name);
7747 if (entity == NULL) {
7748 /*
7749 * [ WFC: Entity Declared ]
7750 * In a document without any DTD, a document with only an
7751 * internal DTD subset which contains no parameter entity
7752 * references, or a document with "standalone='yes'", ...
7753 * ... The declaration of a parameter entity must precede
7754 * any reference to it...
7755 */
7756 if ((ctxt->standalone == 1) ||
7757 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7758 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7759 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007760 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007761 /*
7762 * [ VC: Entity Declared ]
7763 * In a document with an external subset or external
7764 * parameter entities with "standalone='no'", ...
7765 * ... The declaration of a parameter entity must
7766 * precede any reference to it...
7767 */
7768 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7769 "PEReference: %%%s; not found\n",
7770 name, NULL);
7771 ctxt->valid = 0;
7772 }
7773 } else {
7774 /*
7775 * Internal checking in case the entity quest barfed
7776 */
7777 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7778 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7779 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7780 "%%%s; is not a parameter entity\n",
7781 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007782 }
7783 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007784 ctxt->hasPErefs = 1;
7785 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007786 *str = ptr;
7787 return(entity);
7788}
7789
7790/**
7791 * xmlParseDocTypeDecl:
7792 * @ctxt: an XML parser context
7793 *
7794 * parse a DOCTYPE declaration
7795 *
7796 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7797 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7798 *
7799 * [ VC: Root Element Type ]
7800 * The Name in the document type declaration must match the element
7801 * type of the root element.
7802 */
7803
7804void
7805xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007806 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007807 xmlChar *ExternalID = NULL;
7808 xmlChar *URI = NULL;
7809
7810 /*
7811 * We know that '<!DOCTYPE' has been detected.
7812 */
7813 SKIP(9);
7814
7815 SKIP_BLANKS;
7816
7817 /*
7818 * Parse the DOCTYPE name.
7819 */
7820 name = xmlParseName(ctxt);
7821 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007822 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7823 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007824 }
7825 ctxt->intSubName = name;
7826
7827 SKIP_BLANKS;
7828
7829 /*
7830 * Check for SystemID and ExternalID
7831 */
7832 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7833
7834 if ((URI != NULL) || (ExternalID != NULL)) {
7835 ctxt->hasExternalSubset = 1;
7836 }
7837 ctxt->extSubURI = URI;
7838 ctxt->extSubSystem = ExternalID;
7839
7840 SKIP_BLANKS;
7841
7842 /*
7843 * Create and update the internal subset.
7844 */
7845 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7846 (!ctxt->disableSAX))
7847 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7848
7849 /*
7850 * Is there any internal subset declarations ?
7851 * they are handled separately in xmlParseInternalSubset()
7852 */
7853 if (RAW == '[')
7854 return;
7855
7856 /*
7857 * We should be at the end of the DOCTYPE declaration.
7858 */
7859 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007860 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007861 }
7862 NEXT;
7863}
7864
7865/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007866 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007867 * @ctxt: an XML parser context
7868 *
7869 * parse the internal subset declaration
7870 *
7871 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7872 */
7873
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007874static void
Owen Taylor3473f882001-02-23 17:55:21 +00007875xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7876 /*
7877 * Is there any DTD definition ?
7878 */
7879 if (RAW == '[') {
7880 ctxt->instate = XML_PARSER_DTD;
7881 NEXT;
7882 /*
7883 * Parse the succession of Markup declarations and
7884 * PEReferences.
7885 * Subsequence (markupdecl | PEReference | S)*
7886 */
7887 while (RAW != ']') {
7888 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007889 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007890
7891 SKIP_BLANKS;
7892 xmlParseMarkupDecl(ctxt);
7893 xmlParsePEReference(ctxt);
7894
7895 /*
7896 * Pop-up of finished entities.
7897 */
7898 while ((RAW == 0) && (ctxt->inputNr > 1))
7899 xmlPopInput(ctxt);
7900
7901 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007902 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007903 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007904 break;
7905 }
7906 }
7907 if (RAW == ']') {
7908 NEXT;
7909 SKIP_BLANKS;
7910 }
7911 }
7912
7913 /*
7914 * We should be at the end of the DOCTYPE declaration.
7915 */
7916 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007917 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007918 }
7919 NEXT;
7920}
7921
Daniel Veillard81273902003-09-30 00:43:48 +00007922#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007923/**
7924 * xmlParseAttribute:
7925 * @ctxt: an XML parser context
7926 * @value: a xmlChar ** used to store the value of the attribute
7927 *
7928 * parse an attribute
7929 *
7930 * [41] Attribute ::= Name Eq AttValue
7931 *
7932 * [ WFC: No External Entity References ]
7933 * Attribute values cannot contain direct or indirect entity references
7934 * to external entities.
7935 *
7936 * [ WFC: No < in Attribute Values ]
7937 * The replacement text of any entity referred to directly or indirectly in
7938 * an attribute value (other than "&lt;") must not contain a <.
7939 *
7940 * [ VC: Attribute Value Type ]
7941 * The attribute must have been declared; the value must be of the type
7942 * declared for it.
7943 *
7944 * [25] Eq ::= S? '=' S?
7945 *
7946 * With namespace:
7947 *
7948 * [NS 11] Attribute ::= QName Eq AttValue
7949 *
7950 * Also the case QName == xmlns:??? is handled independently as a namespace
7951 * definition.
7952 *
7953 * Returns the attribute name, and the value in *value.
7954 */
7955
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007956const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007957xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007958 const xmlChar *name;
7959 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007960
7961 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007962 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007963 name = xmlParseName(ctxt);
7964 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007965 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007966 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007967 return(NULL);
7968 }
7969
7970 /*
7971 * read the value
7972 */
7973 SKIP_BLANKS;
7974 if (RAW == '=') {
7975 NEXT;
7976 SKIP_BLANKS;
7977 val = xmlParseAttValue(ctxt);
7978 ctxt->instate = XML_PARSER_CONTENT;
7979 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007980 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007981 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007982 return(NULL);
7983 }
7984
7985 /*
7986 * Check that xml:lang conforms to the specification
7987 * No more registered as an error, just generate a warning now
7988 * since this was deprecated in XML second edition
7989 */
7990 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7991 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007992 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7993 "Malformed value for xml:lang : %s\n",
7994 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007995 }
7996 }
7997
7998 /*
7999 * Check that xml:space conforms to the specification
8000 */
8001 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8002 if (xmlStrEqual(val, BAD_CAST "default"))
8003 *(ctxt->space) = 0;
8004 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8005 *(ctxt->space) = 1;
8006 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008007 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008008"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008009 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008010 }
8011 }
8012
8013 *value = val;
8014 return(name);
8015}
8016
8017/**
8018 * xmlParseStartTag:
8019 * @ctxt: an XML parser context
8020 *
8021 * parse a start of tag either for rule element or
8022 * EmptyElement. In both case we don't parse the tag closing chars.
8023 *
8024 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8025 *
8026 * [ WFC: Unique Att Spec ]
8027 * No attribute name may appear more than once in the same start-tag or
8028 * empty-element tag.
8029 *
8030 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8031 *
8032 * [ WFC: Unique Att Spec ]
8033 * No attribute name may appear more than once in the same start-tag or
8034 * empty-element tag.
8035 *
8036 * With namespace:
8037 *
8038 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8039 *
8040 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8041 *
8042 * Returns the element name parsed
8043 */
8044
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008045const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008046xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008047 const xmlChar *name;
8048 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008049 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008050 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008051 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008052 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008053 int i;
8054
8055 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008056 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008057
8058 name = xmlParseName(ctxt);
8059 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008060 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008061 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008062 return(NULL);
8063 }
8064
8065 /*
8066 * Now parse the attributes, it ends up with the ending
8067 *
8068 * (S Attribute)* S?
8069 */
8070 SKIP_BLANKS;
8071 GROW;
8072
Daniel Veillard21a0f912001-02-25 19:54:14 +00008073 while ((RAW != '>') &&
8074 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008075 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008076 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008077 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008078
8079 attname = xmlParseAttribute(ctxt, &attvalue);
8080 if ((attname != NULL) && (attvalue != NULL)) {
8081 /*
8082 * [ WFC: Unique Att Spec ]
8083 * No attribute name may appear more than once in the same
8084 * start-tag or empty-element tag.
8085 */
8086 for (i = 0; i < nbatts;i += 2) {
8087 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008088 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008089 xmlFree(attvalue);
8090 goto failed;
8091 }
8092 }
Owen Taylor3473f882001-02-23 17:55:21 +00008093 /*
8094 * Add the pair to atts
8095 */
8096 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008097 maxatts = 22; /* allow for 10 attrs by default */
8098 atts = (const xmlChar **)
8099 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008100 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008101 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008102 if (attvalue != NULL)
8103 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008104 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008105 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008106 ctxt->atts = atts;
8107 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008108 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008109 const xmlChar **n;
8110
Owen Taylor3473f882001-02-23 17:55:21 +00008111 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008112 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008113 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008114 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008115 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008116 if (attvalue != NULL)
8117 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008118 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008119 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008120 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008121 ctxt->atts = atts;
8122 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008123 }
8124 atts[nbatts++] = attname;
8125 atts[nbatts++] = attvalue;
8126 atts[nbatts] = NULL;
8127 atts[nbatts + 1] = NULL;
8128 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008129 if (attvalue != NULL)
8130 xmlFree(attvalue);
8131 }
8132
8133failed:
8134
Daniel Veillard3772de32002-12-17 10:31:45 +00008135 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008136 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8137 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008138 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008139 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8140 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008141 }
8142 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008143 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8144 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008145 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8146 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008147 break;
8148 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008149 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008150 GROW;
8151 }
8152
8153 /*
8154 * SAX: Start of Element !
8155 */
8156 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008157 (!ctxt->disableSAX)) {
8158 if (nbatts > 0)
8159 ctxt->sax->startElement(ctxt->userData, name, atts);
8160 else
8161 ctxt->sax->startElement(ctxt->userData, name, NULL);
8162 }
Owen Taylor3473f882001-02-23 17:55:21 +00008163
8164 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008165 /* Free only the content strings */
8166 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008167 if (atts[i] != NULL)
8168 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008169 }
8170 return(name);
8171}
8172
8173/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008174 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008175 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008176 * @line: line of the start tag
8177 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008178 *
8179 * parse an end of tag
8180 *
8181 * [42] ETag ::= '</' Name S? '>'
8182 *
8183 * With namespace
8184 *
8185 * [NS 9] ETag ::= '</' QName S? '>'
8186 */
8187
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008188static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008189xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008190 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008191
8192 GROW;
8193 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008194 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008195 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008196 return;
8197 }
8198 SKIP(2);
8199
Daniel Veillard46de64e2002-05-29 08:21:33 +00008200 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008201
8202 /*
8203 * We should definitely be at the ending "S? '>'" part
8204 */
8205 GROW;
8206 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008207 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008208 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008209 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008210 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008211
8212 /*
8213 * [ WFC: Element Type Match ]
8214 * The Name in an element's end-tag must match the element type in the
8215 * start-tag.
8216 *
8217 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008218 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008219 if (name == NULL) name = BAD_CAST "unparseable";
8220 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008221 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008222 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008223 }
8224
8225 /*
8226 * SAX: End of Tag
8227 */
8228 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8229 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008230 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008231
Daniel Veillarde57ec792003-09-10 10:50:59 +00008232 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008233 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008234 return;
8235}
8236
8237/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008238 * xmlParseEndTag:
8239 * @ctxt: an XML parser context
8240 *
8241 * parse an end of tag
8242 *
8243 * [42] ETag ::= '</' Name S? '>'
8244 *
8245 * With namespace
8246 *
8247 * [NS 9] ETag ::= '</' QName S? '>'
8248 */
8249
8250void
8251xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008252 xmlParseEndTag1(ctxt, 0);
8253}
Daniel Veillard81273902003-09-30 00:43:48 +00008254#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008255
8256/************************************************************************
8257 * *
8258 * SAX 2 specific operations *
8259 * *
8260 ************************************************************************/
8261
Daniel Veillard0fb18932003-09-07 09:14:37 +00008262/*
8263 * xmlGetNamespace:
8264 * @ctxt: an XML parser context
8265 * @prefix: the prefix to lookup
8266 *
8267 * Lookup the namespace name for the @prefix (which ca be NULL)
8268 * The prefix must come from the @ctxt->dict dictionnary
8269 *
8270 * Returns the namespace name or NULL if not bound
8271 */
8272static const xmlChar *
8273xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8274 int i;
8275
Daniel Veillarde57ec792003-09-10 10:50:59 +00008276 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008277 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008278 if (ctxt->nsTab[i] == prefix) {
8279 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8280 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008281 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008282 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008283 return(NULL);
8284}
8285
8286/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008287 * xmlParseQName:
8288 * @ctxt: an XML parser context
8289 * @prefix: pointer to store the prefix part
8290 *
8291 * parse an XML Namespace QName
8292 *
8293 * [6] QName ::= (Prefix ':')? LocalPart
8294 * [7] Prefix ::= NCName
8295 * [8] LocalPart ::= NCName
8296 *
8297 * Returns the Name parsed or NULL
8298 */
8299
8300static const xmlChar *
8301xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8302 const xmlChar *l, *p;
8303
8304 GROW;
8305
8306 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008307 if (l == NULL) {
8308 if (CUR == ':') {
8309 l = xmlParseName(ctxt);
8310 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008311 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8312 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008313 *prefix = NULL;
8314 return(l);
8315 }
8316 }
8317 return(NULL);
8318 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008319 if (CUR == ':') {
8320 NEXT;
8321 p = l;
8322 l = xmlParseNCName(ctxt);
8323 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008324 xmlChar *tmp;
8325
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008326 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8327 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008328 l = xmlParseNmtoken(ctxt);
8329 if (l == NULL)
8330 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8331 else {
8332 tmp = xmlBuildQName(l, p, NULL, 0);
8333 xmlFree((char *)l);
8334 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008335 p = xmlDictLookup(ctxt->dict, tmp, -1);
8336 if (tmp != NULL) xmlFree(tmp);
8337 *prefix = NULL;
8338 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008339 }
8340 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008341 xmlChar *tmp;
8342
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008343 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8344 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008345 NEXT;
8346 tmp = (xmlChar *) xmlParseName(ctxt);
8347 if (tmp != NULL) {
8348 tmp = xmlBuildQName(tmp, l, NULL, 0);
8349 l = xmlDictLookup(ctxt->dict, tmp, -1);
8350 if (tmp != NULL) xmlFree(tmp);
8351 *prefix = p;
8352 return(l);
8353 }
8354 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8355 l = xmlDictLookup(ctxt->dict, tmp, -1);
8356 if (tmp != NULL) xmlFree(tmp);
8357 *prefix = p;
8358 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008359 }
8360 *prefix = p;
8361 } else
8362 *prefix = NULL;
8363 return(l);
8364}
8365
8366/**
8367 * xmlParseQNameAndCompare:
8368 * @ctxt: an XML parser context
8369 * @name: the localname
8370 * @prefix: the prefix, if any.
8371 *
8372 * parse an XML name and compares for match
8373 * (specialized for endtag parsing)
8374 *
8375 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8376 * and the name for mismatch
8377 */
8378
8379static const xmlChar *
8380xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8381 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008382 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008383 const xmlChar *in;
8384 const xmlChar *ret;
8385 const xmlChar *prefix2;
8386
8387 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8388
8389 GROW;
8390 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008391
Daniel Veillard0fb18932003-09-07 09:14:37 +00008392 cmp = prefix;
8393 while (*in != 0 && *in == *cmp) {
8394 ++in;
8395 ++cmp;
8396 }
8397 if ((*cmp == 0) && (*in == ':')) {
8398 in++;
8399 cmp = name;
8400 while (*in != 0 && *in == *cmp) {
8401 ++in;
8402 ++cmp;
8403 }
William M. Brack76e95df2003-10-18 16:20:14 +00008404 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008405 /* success */
8406 ctxt->input->cur = in;
8407 return((const xmlChar*) 1);
8408 }
8409 }
8410 /*
8411 * all strings coms from the dictionary, equality can be done directly
8412 */
8413 ret = xmlParseQName (ctxt, &prefix2);
8414 if ((ret == name) && (prefix == prefix2))
8415 return((const xmlChar*) 1);
8416 return ret;
8417}
8418
8419/**
8420 * xmlParseAttValueInternal:
8421 * @ctxt: an XML parser context
8422 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008423 * @alloc: whether the attribute was reallocated as a new string
8424 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008425 *
8426 * parse a value for an attribute.
8427 * NOTE: if no normalization is needed, the routine will return pointers
8428 * directly from the data buffer.
8429 *
8430 * 3.3.3 Attribute-Value Normalization:
8431 * Before the value of an attribute is passed to the application or
8432 * checked for validity, the XML processor must normalize it as follows:
8433 * - a character reference is processed by appending the referenced
8434 * character to the attribute value
8435 * - an entity reference is processed by recursively processing the
8436 * replacement text of the entity
8437 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8438 * appending #x20 to the normalized value, except that only a single
8439 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8440 * parsed entity or the literal entity value of an internal parsed entity
8441 * - other characters are processed by appending them to the normalized value
8442 * If the declared value is not CDATA, then the XML processor must further
8443 * process the normalized attribute value by discarding any leading and
8444 * trailing space (#x20) characters, and by replacing sequences of space
8445 * (#x20) characters by a single space (#x20) character.
8446 * All attributes for which no declaration has been read should be treated
8447 * by a non-validating parser as if declared CDATA.
8448 *
8449 * Returns the AttValue parsed or NULL. The value has to be freed by the
8450 * caller if it was copied, this can be detected by val[*len] == 0.
8451 */
8452
8453static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008454xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8455 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008456{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008457 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008458 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008459 xmlChar *ret = NULL;
8460
8461 GROW;
8462 in = (xmlChar *) CUR_PTR;
8463 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008464 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008465 return (NULL);
8466 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008467 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008468
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008469 /*
8470 * try to handle in this routine the most common case where no
8471 * allocation of a new string is required and where content is
8472 * pure ASCII.
8473 */
8474 limit = *in++;
8475 end = ctxt->input->end;
8476 start = in;
8477 if (in >= end) {
8478 const xmlChar *oldbase = ctxt->input->base;
8479 GROW;
8480 if (oldbase != ctxt->input->base) {
8481 long delta = ctxt->input->base - oldbase;
8482 start = start + delta;
8483 in = in + delta;
8484 }
8485 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008486 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008487 if (normalize) {
8488 /*
8489 * Skip any leading spaces
8490 */
8491 while ((in < end) && (*in != limit) &&
8492 ((*in == 0x20) || (*in == 0x9) ||
8493 (*in == 0xA) || (*in == 0xD))) {
8494 in++;
8495 start = in;
8496 if (in >= end) {
8497 const xmlChar *oldbase = ctxt->input->base;
8498 GROW;
8499 if (oldbase != ctxt->input->base) {
8500 long delta = ctxt->input->base - oldbase;
8501 start = start + delta;
8502 in = in + delta;
8503 }
8504 end = ctxt->input->end;
8505 }
8506 }
8507 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8508 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8509 if ((*in++ == 0x20) && (*in == 0x20)) break;
8510 if (in >= end) {
8511 const xmlChar *oldbase = ctxt->input->base;
8512 GROW;
8513 if (oldbase != ctxt->input->base) {
8514 long delta = ctxt->input->base - oldbase;
8515 start = start + delta;
8516 in = in + delta;
8517 }
8518 end = ctxt->input->end;
8519 }
8520 }
8521 last = in;
8522 /*
8523 * skip the trailing blanks
8524 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008525 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008526 while ((in < end) && (*in != limit) &&
8527 ((*in == 0x20) || (*in == 0x9) ||
8528 (*in == 0xA) || (*in == 0xD))) {
8529 in++;
8530 if (in >= end) {
8531 const xmlChar *oldbase = ctxt->input->base;
8532 GROW;
8533 if (oldbase != ctxt->input->base) {
8534 long delta = ctxt->input->base - oldbase;
8535 start = start + delta;
8536 in = in + delta;
8537 last = last + delta;
8538 }
8539 end = ctxt->input->end;
8540 }
8541 }
8542 if (*in != limit) goto need_complex;
8543 } else {
8544 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8545 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8546 in++;
8547 if (in >= end) {
8548 const xmlChar *oldbase = ctxt->input->base;
8549 GROW;
8550 if (oldbase != ctxt->input->base) {
8551 long delta = ctxt->input->base - oldbase;
8552 start = start + delta;
8553 in = in + delta;
8554 }
8555 end = ctxt->input->end;
8556 }
8557 }
8558 last = in;
8559 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008560 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008561 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008562 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008563 *len = last - start;
8564 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008565 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008566 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008567 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008568 }
8569 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008570 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008571 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008572need_complex:
8573 if (alloc) *alloc = 1;
8574 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008575}
8576
8577/**
8578 * xmlParseAttribute2:
8579 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008580 * @pref: the element prefix
8581 * @elem: the element name
8582 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008583 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008584 * @len: an int * to save the length of the attribute
8585 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008586 *
8587 * parse an attribute in the new SAX2 framework.
8588 *
8589 * Returns the attribute name, and the value in *value, .
8590 */
8591
8592static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008593xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008594 const xmlChar * pref, const xmlChar * elem,
8595 const xmlChar ** prefix, xmlChar ** value,
8596 int *len, int *alloc)
8597{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008598 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008599 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008600 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008601
8602 *value = NULL;
8603 GROW;
8604 name = xmlParseQName(ctxt, prefix);
8605 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008606 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8607 "error parsing attribute name\n");
8608 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008609 }
8610
8611 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008612 * get the type if needed
8613 */
8614 if (ctxt->attsSpecial != NULL) {
8615 int type;
8616
8617 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008618 pref, elem, *prefix, name);
8619 if (type != 0)
8620 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008621 }
8622
8623 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008624 * read the value
8625 */
8626 SKIP_BLANKS;
8627 if (RAW == '=') {
8628 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008629 SKIP_BLANKS;
8630 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8631 if (normalize) {
8632 /*
8633 * Sometimes a second normalisation pass for spaces is needed
8634 * but that only happens if charrefs or entities refernces
8635 * have been used in the attribute value, i.e. the attribute
8636 * value have been extracted in an allocated string already.
8637 */
8638 if (*alloc) {
8639 const xmlChar *val2;
8640
8641 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008642 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008643 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008644 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008645 }
8646 }
8647 }
8648 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008649 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008650 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8651 "Specification mandate value for attribute %s\n",
8652 name);
8653 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008654 }
8655
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008656 if (*prefix == ctxt->str_xml) {
8657 /*
8658 * Check that xml:lang conforms to the specification
8659 * No more registered as an error, just generate a warning now
8660 * since this was deprecated in XML second edition
8661 */
8662 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8663 internal_val = xmlStrndup(val, *len);
8664 if (!xmlCheckLanguageID(internal_val)) {
8665 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8666 "Malformed value for xml:lang : %s\n",
8667 internal_val, NULL);
8668 }
8669 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008670
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008671 /*
8672 * Check that xml:space conforms to the specification
8673 */
8674 if (xmlStrEqual(name, BAD_CAST "space")) {
8675 internal_val = xmlStrndup(val, *len);
8676 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8677 *(ctxt->space) = 0;
8678 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8679 *(ctxt->space) = 1;
8680 else {
8681 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8682 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8683 internal_val, NULL);
8684 }
8685 }
8686 if (internal_val) {
8687 xmlFree(internal_val);
8688 }
8689 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008690
8691 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008692 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008693}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008694/**
8695 * xmlParseStartTag2:
8696 * @ctxt: an XML parser context
8697 *
8698 * parse a start of tag either for rule element or
8699 * EmptyElement. In both case we don't parse the tag closing chars.
8700 * This routine is called when running SAX2 parsing
8701 *
8702 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8703 *
8704 * [ WFC: Unique Att Spec ]
8705 * No attribute name may appear more than once in the same start-tag or
8706 * empty-element tag.
8707 *
8708 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8709 *
8710 * [ WFC: Unique Att Spec ]
8711 * No attribute name may appear more than once in the same start-tag or
8712 * empty-element tag.
8713 *
8714 * With namespace:
8715 *
8716 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8717 *
8718 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8719 *
8720 * Returns the element name parsed
8721 */
8722
8723static const xmlChar *
8724xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008725 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008726 const xmlChar *localname;
8727 const xmlChar *prefix;
8728 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008729 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008730 const xmlChar *nsname;
8731 xmlChar *attvalue;
8732 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008733 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008734 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008735 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008736 const xmlChar *base;
8737 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008738 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008739
8740 if (RAW != '<') return(NULL);
8741 NEXT1;
8742
8743 /*
8744 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8745 * point since the attribute values may be stored as pointers to
8746 * the buffer and calling SHRINK would destroy them !
8747 * The Shrinking is only possible once the full set of attribute
8748 * callbacks have been done.
8749 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008750reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008751 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008752 base = ctxt->input->base;
8753 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008754 oldline = ctxt->input->line;
8755 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008756 nbatts = 0;
8757 nratts = 0;
8758 nbdef = 0;
8759 nbNs = 0;
8760 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008761 /* Forget any namespaces added during an earlier parse of this element. */
8762 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008763
8764 localname = xmlParseQName(ctxt, &prefix);
8765 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008766 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8767 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008768 return(NULL);
8769 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008770 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008771
8772 /*
8773 * Now parse the attributes, it ends up with the ending
8774 *
8775 * (S Attribute)* S?
8776 */
8777 SKIP_BLANKS;
8778 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008779 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008780
8781 while ((RAW != '>') &&
8782 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008783 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008784 const xmlChar *q = CUR_PTR;
8785 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008786 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008787
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008788 attname = xmlParseAttribute2(ctxt, prefix, localname,
8789 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008790 if (ctxt->input->base != base) {
8791 if ((attvalue != NULL) && (alloc != 0))
8792 xmlFree(attvalue);
8793 attvalue = NULL;
8794 goto base_changed;
8795 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008796 if ((attname != NULL) && (attvalue != NULL)) {
8797 if (len < 0) len = xmlStrlen(attvalue);
8798 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008799 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8800 xmlURIPtr uri;
8801
8802 if (*URL != 0) {
8803 uri = xmlParseURI((const char *) URL);
8804 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008805 xmlNsErr(ctxt, XML_WAR_NS_URI,
8806 "xmlns: '%s' is not a valid URI\n",
8807 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008808 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008809 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008810 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8811 "xmlns: URI %s is not absolute\n",
8812 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008813 }
8814 xmlFreeURI(uri);
8815 }
Daniel Veillard37334572008-07-31 08:20:02 +00008816 if (URL == ctxt->str_xml_ns) {
8817 if (attname != ctxt->str_xml) {
8818 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8819 "xml namespace URI cannot be the default namespace\n",
8820 NULL, NULL, NULL);
8821 }
8822 goto skip_default_ns;
8823 }
8824 if ((len == 29) &&
8825 (xmlStrEqual(URL,
8826 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8827 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8828 "reuse of the xmlns namespace name is forbidden\n",
8829 NULL, NULL, NULL);
8830 goto skip_default_ns;
8831 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008832 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008833 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008834 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008835 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008836 for (j = 1;j <= nbNs;j++)
8837 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8838 break;
8839 if (j <= nbNs)
8840 xmlErrAttributeDup(ctxt, NULL, attname);
8841 else
8842 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008843skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008844 if (alloc != 0) xmlFree(attvalue);
8845 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008846 continue;
8847 }
8848 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008849 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8850 xmlURIPtr uri;
8851
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008852 if (attname == ctxt->str_xml) {
8853 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008854 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8855 "xml namespace prefix mapped to wrong URI\n",
8856 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008857 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008858 /*
8859 * Do not keep a namespace definition node
8860 */
Daniel Veillard37334572008-07-31 08:20:02 +00008861 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008862 }
Daniel Veillard37334572008-07-31 08:20:02 +00008863 if (URL == ctxt->str_xml_ns) {
8864 if (attname != ctxt->str_xml) {
8865 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8866 "xml namespace URI mapped to wrong prefix\n",
8867 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008868 }
Daniel Veillard37334572008-07-31 08:20:02 +00008869 goto skip_ns;
8870 }
8871 if (attname == ctxt->str_xmlns) {
8872 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8873 "redefinition of the xmlns prefix is forbidden\n",
8874 NULL, NULL, NULL);
8875 goto skip_ns;
8876 }
8877 if ((len == 29) &&
8878 (xmlStrEqual(URL,
8879 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8880 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8881 "reuse of the xmlns namespace name is forbidden\n",
8882 NULL, NULL, NULL);
8883 goto skip_ns;
8884 }
8885 if ((URL == NULL) || (URL[0] == 0)) {
8886 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8887 "xmlns:%s: Empty XML namespace is not allowed\n",
8888 attname, NULL, NULL);
8889 goto skip_ns;
8890 } else {
8891 uri = xmlParseURI((const char *) URL);
8892 if (uri == NULL) {
8893 xmlNsErr(ctxt, XML_WAR_NS_URI,
8894 "xmlns:%s: '%s' is not a valid URI\n",
8895 attname, URL, NULL);
8896 } else {
8897 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8898 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8899 "xmlns:%s: URI %s is not absolute\n",
8900 attname, URL, NULL);
8901 }
8902 xmlFreeURI(uri);
8903 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008904 }
8905
Daniel Veillard0fb18932003-09-07 09:14:37 +00008906 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008907 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008908 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008909 for (j = 1;j <= nbNs;j++)
8910 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8911 break;
8912 if (j <= nbNs)
8913 xmlErrAttributeDup(ctxt, aprefix, attname);
8914 else
8915 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008916skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008917 if (alloc != 0) xmlFree(attvalue);
8918 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008919 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008920 continue;
8921 }
8922
8923 /*
8924 * Add the pair to atts
8925 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008926 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8927 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008928 if (attvalue[len] == 0)
8929 xmlFree(attvalue);
8930 goto failed;
8931 }
8932 maxatts = ctxt->maxatts;
8933 atts = ctxt->atts;
8934 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008935 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008936 atts[nbatts++] = attname;
8937 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008938 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008939 atts[nbatts++] = attvalue;
8940 attvalue += len;
8941 atts[nbatts++] = attvalue;
8942 /*
8943 * tag if some deallocation is needed
8944 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008945 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008946 } else {
8947 if ((attvalue != NULL) && (attvalue[len] == 0))
8948 xmlFree(attvalue);
8949 }
8950
Daniel Veillard37334572008-07-31 08:20:02 +00008951failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008952
8953 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008954 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008955 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8956 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008957 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008958 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8959 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008960 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008961 }
8962 SKIP_BLANKS;
8963 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8964 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008965 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008966 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008967 break;
8968 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008969 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008970 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008971 }
8972
Daniel Veillard0fb18932003-09-07 09:14:37 +00008973 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008974 * The attributes defaulting
8975 */
8976 if (ctxt->attsDefault != NULL) {
8977 xmlDefAttrsPtr defaults;
8978
8979 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8980 if (defaults != NULL) {
8981 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008982 attname = defaults->values[5 * i];
8983 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008984
8985 /*
8986 * special work for namespaces defaulted defs
8987 */
8988 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8989 /*
8990 * check that it's not a defined namespace
8991 */
8992 for (j = 1;j <= nbNs;j++)
8993 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8994 break;
8995 if (j <= nbNs) continue;
8996
8997 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008998 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008999 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009000 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009001 nbNs++;
9002 }
9003 } else if (aprefix == ctxt->str_xmlns) {
9004 /*
9005 * check that it's not a defined namespace
9006 */
9007 for (j = 1;j <= nbNs;j++)
9008 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9009 break;
9010 if (j <= nbNs) continue;
9011
9012 nsname = xmlGetNamespace(ctxt, attname);
9013 if (nsname != defaults->values[2]) {
9014 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009015 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009016 nbNs++;
9017 }
9018 } else {
9019 /*
9020 * check that it's not a defined attribute
9021 */
9022 for (j = 0;j < nbatts;j+=5) {
9023 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9024 break;
9025 }
9026 if (j < nbatts) continue;
9027
9028 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9029 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009030 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009031 }
9032 maxatts = ctxt->maxatts;
9033 atts = ctxt->atts;
9034 }
9035 atts[nbatts++] = attname;
9036 atts[nbatts++] = aprefix;
9037 if (aprefix == NULL)
9038 atts[nbatts++] = NULL;
9039 else
9040 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009041 atts[nbatts++] = defaults->values[5 * i + 2];
9042 atts[nbatts++] = defaults->values[5 * i + 3];
9043 if ((ctxt->standalone == 1) &&
9044 (defaults->values[5 * i + 4] != NULL)) {
9045 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9046 "standalone: attribute %s on %s defaulted from external subset\n",
9047 attname, localname);
9048 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009049 nbdef++;
9050 }
9051 }
9052 }
9053 }
9054
Daniel Veillarde70c8772003-11-25 07:21:18 +00009055 /*
9056 * The attributes checkings
9057 */
9058 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009059 /*
9060 * The default namespace does not apply to attribute names.
9061 */
9062 if (atts[i + 1] != NULL) {
9063 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9064 if (nsname == NULL) {
9065 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9066 "Namespace prefix %s for %s on %s is not defined\n",
9067 atts[i + 1], atts[i], localname);
9068 }
9069 atts[i + 2] = nsname;
9070 } else
9071 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009072 /*
9073 * [ WFC: Unique Att Spec ]
9074 * No attribute name may appear more than once in the same
9075 * start-tag or empty-element tag.
9076 * As extended by the Namespace in XML REC.
9077 */
9078 for (j = 0; j < i;j += 5) {
9079 if (atts[i] == atts[j]) {
9080 if (atts[i+1] == atts[j+1]) {
9081 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9082 break;
9083 }
9084 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9085 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9086 "Namespaced Attribute %s in '%s' redefined\n",
9087 atts[i], nsname, NULL);
9088 break;
9089 }
9090 }
9091 }
9092 }
9093
Daniel Veillarde57ec792003-09-10 10:50:59 +00009094 nsname = xmlGetNamespace(ctxt, prefix);
9095 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009096 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9097 "Namespace prefix %s on %s is not defined\n",
9098 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009099 }
9100 *pref = prefix;
9101 *URI = nsname;
9102
9103 /*
9104 * SAX: Start of Element !
9105 */
9106 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9107 (!ctxt->disableSAX)) {
9108 if (nbNs > 0)
9109 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9110 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9111 nbatts / 5, nbdef, atts);
9112 else
9113 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9114 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9115 }
9116
9117 /*
9118 * Free up attribute allocated strings if needed
9119 */
9120 if (attval != 0) {
9121 for (i = 3,j = 0; j < nratts;i += 5,j++)
9122 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9123 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009124 }
9125
9126 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009127
9128base_changed:
9129 /*
9130 * the attribute strings are valid iif the base didn't changed
9131 */
9132 if (attval != 0) {
9133 for (i = 3,j = 0; j < nratts;i += 5,j++)
9134 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9135 xmlFree((xmlChar *) atts[i]);
9136 }
9137 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009138 ctxt->input->line = oldline;
9139 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009140 if (ctxt->wellFormed == 1) {
9141 goto reparse;
9142 }
9143 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009144}
9145
9146/**
9147 * xmlParseEndTag2:
9148 * @ctxt: an XML parser context
9149 * @line: line of the start tag
9150 * @nsNr: number of namespaces on the start tag
9151 *
9152 * parse an end of tag
9153 *
9154 * [42] ETag ::= '</' Name S? '>'
9155 *
9156 * With namespace
9157 *
9158 * [NS 9] ETag ::= '</' QName S? '>'
9159 */
9160
9161static void
9162xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009163 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009164 const xmlChar *name;
9165
9166 GROW;
9167 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009168 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009169 return;
9170 }
9171 SKIP(2);
9172
William M. Brack13dfa872004-09-18 04:52:08 +00009173 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009174 if (ctxt->input->cur[tlen] == '>') {
9175 ctxt->input->cur += tlen + 1;
9176 goto done;
9177 }
9178 ctxt->input->cur += tlen;
9179 name = (xmlChar*)1;
9180 } else {
9181 if (prefix == NULL)
9182 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9183 else
9184 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9185 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009186
9187 /*
9188 * We should definitely be at the ending "S? '>'" part
9189 */
9190 GROW;
9191 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009192 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009193 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009194 } else
9195 NEXT1;
9196
9197 /*
9198 * [ WFC: Element Type Match ]
9199 * The Name in an element's end-tag must match the element type in the
9200 * start-tag.
9201 *
9202 */
9203 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009204 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009205 if ((line == 0) && (ctxt->node != NULL))
9206 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009207 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009208 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009209 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009210 }
9211
9212 /*
9213 * SAX: End of Tag
9214 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009215done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009216 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9217 (!ctxt->disableSAX))
9218 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9219
Daniel Veillard0fb18932003-09-07 09:14:37 +00009220 spacePop(ctxt);
9221 if (nsNr != 0)
9222 nsPop(ctxt, nsNr);
9223 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009224}
9225
9226/**
Owen Taylor3473f882001-02-23 17:55:21 +00009227 * xmlParseCDSect:
9228 * @ctxt: an XML parser context
9229 *
9230 * Parse escaped pure raw content.
9231 *
9232 * [18] CDSect ::= CDStart CData CDEnd
9233 *
9234 * [19] CDStart ::= '<![CDATA['
9235 *
9236 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9237 *
9238 * [21] CDEnd ::= ']]>'
9239 */
9240void
9241xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9242 xmlChar *buf = NULL;
9243 int len = 0;
9244 int size = XML_PARSER_BUFFER_SIZE;
9245 int r, rl;
9246 int s, sl;
9247 int cur, l;
9248 int count = 0;
9249
Daniel Veillard8f597c32003-10-06 08:19:27 +00009250 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009251 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009252 SKIP(9);
9253 } else
9254 return;
9255
9256 ctxt->instate = XML_PARSER_CDATA_SECTION;
9257 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009258 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009259 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009260 ctxt->instate = XML_PARSER_CONTENT;
9261 return;
9262 }
9263 NEXTL(rl);
9264 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009265 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009266 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009267 ctxt->instate = XML_PARSER_CONTENT;
9268 return;
9269 }
9270 NEXTL(sl);
9271 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009272 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009273 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009274 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009275 return;
9276 }
William M. Brack871611b2003-10-18 04:53:14 +00009277 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009278 ((r != ']') || (s != ']') || (cur != '>'))) {
9279 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009280 xmlChar *tmp;
9281
Owen Taylor3473f882001-02-23 17:55:21 +00009282 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009283 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9284 if (tmp == NULL) {
9285 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009286 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009287 return;
9288 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009289 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009290 }
9291 COPY_BUF(rl,buf,len,r);
9292 r = s;
9293 rl = sl;
9294 s = cur;
9295 sl = l;
9296 count++;
9297 if (count > 50) {
9298 GROW;
9299 count = 0;
9300 }
9301 NEXTL(l);
9302 cur = CUR_CHAR(l);
9303 }
9304 buf[len] = 0;
9305 ctxt->instate = XML_PARSER_CONTENT;
9306 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009307 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009308 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009309 xmlFree(buf);
9310 return;
9311 }
9312 NEXTL(l);
9313
9314 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009315 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009316 */
9317 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9318 if (ctxt->sax->cdataBlock != NULL)
9319 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009320 else if (ctxt->sax->characters != NULL)
9321 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009322 }
9323 xmlFree(buf);
9324}
9325
9326/**
9327 * xmlParseContent:
9328 * @ctxt: an XML parser context
9329 *
9330 * Parse a content:
9331 *
9332 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9333 */
9334
9335void
9336xmlParseContent(xmlParserCtxtPtr ctxt) {
9337 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009338 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009339 ((RAW != '<') || (NXT(1) != '/')) &&
9340 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009341 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009342 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009343 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009344
9345 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009346 * First case : a Processing Instruction.
9347 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009348 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009349 xmlParsePI(ctxt);
9350 }
9351
9352 /*
9353 * Second case : a CDSection
9354 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009355 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009356 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009357 xmlParseCDSect(ctxt);
9358 }
9359
9360 /*
9361 * Third case : a comment
9362 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009363 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009364 (NXT(2) == '-') && (NXT(3) == '-')) {
9365 xmlParseComment(ctxt);
9366 ctxt->instate = XML_PARSER_CONTENT;
9367 }
9368
9369 /*
9370 * Fourth case : a sub-element.
9371 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009372 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009373 xmlParseElement(ctxt);
9374 }
9375
9376 /*
9377 * Fifth case : a reference. If if has not been resolved,
9378 * parsing returns it's Name, create the node
9379 */
9380
Daniel Veillard21a0f912001-02-25 19:54:14 +00009381 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009382 xmlParseReference(ctxt);
9383 }
9384
9385 /*
9386 * Last case, text. Note that References are handled directly.
9387 */
9388 else {
9389 xmlParseCharData(ctxt, 0);
9390 }
9391
9392 GROW;
9393 /*
9394 * Pop-up of finished entities.
9395 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009396 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009397 xmlPopInput(ctxt);
9398 SHRINK;
9399
Daniel Veillardfdc91562002-07-01 21:52:03 +00009400 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009401 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9402 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009403 ctxt->instate = XML_PARSER_EOF;
9404 break;
9405 }
9406 }
9407}
9408
9409/**
9410 * xmlParseElement:
9411 * @ctxt: an XML parser context
9412 *
9413 * parse an XML element, this is highly recursive
9414 *
9415 * [39] element ::= EmptyElemTag | STag content ETag
9416 *
9417 * [ WFC: Element Type Match ]
9418 * The Name in an element's end-tag must match the element type in the
9419 * start-tag.
9420 *
Owen Taylor3473f882001-02-23 17:55:21 +00009421 */
9422
9423void
9424xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009425 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009426 const xmlChar *prefix = NULL;
9427 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009428 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009429 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009430 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009431 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009432
Daniel Veillard8915c152008-08-26 13:05:34 +00009433 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9434 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9435 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9436 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9437 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009438 ctxt->instate = XML_PARSER_EOF;
9439 return;
9440 }
9441
Owen Taylor3473f882001-02-23 17:55:21 +00009442 /* Capture start position */
9443 if (ctxt->record_info) {
9444 node_info.begin_pos = ctxt->input->consumed +
9445 (CUR_PTR - ctxt->input->base);
9446 node_info.begin_line = ctxt->input->line;
9447 }
9448
9449 if (ctxt->spaceNr == 0)
9450 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009451 else if (*ctxt->space == -2)
9452 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009453 else
9454 spacePush(ctxt, *ctxt->space);
9455
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009456 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009457#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009458 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009459#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009460 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009461#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009462 else
9463 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009464#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009465 if (name == NULL) {
9466 spacePop(ctxt);
9467 return;
9468 }
9469 namePush(ctxt, name);
9470 ret = ctxt->node;
9471
Daniel Veillard4432df22003-09-28 18:58:27 +00009472#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009473 /*
9474 * [ VC: Root Element Type ]
9475 * The Name in the document type declaration must match the element
9476 * type of the root element.
9477 */
9478 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9479 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9480 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009481#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009482
9483 /*
9484 * Check for an Empty Element.
9485 */
9486 if ((RAW == '/') && (NXT(1) == '>')) {
9487 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009488 if (ctxt->sax2) {
9489 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9490 (!ctxt->disableSAX))
9491 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009492#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009493 } else {
9494 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9495 (!ctxt->disableSAX))
9496 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009497#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009498 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009499 namePop(ctxt);
9500 spacePop(ctxt);
9501 if (nsNr != ctxt->nsNr)
9502 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009503 if ( ret != NULL && ctxt->record_info ) {
9504 node_info.end_pos = ctxt->input->consumed +
9505 (CUR_PTR - ctxt->input->base);
9506 node_info.end_line = ctxt->input->line;
9507 node_info.node = ret;
9508 xmlParserAddNodeInfo(ctxt, &node_info);
9509 }
9510 return;
9511 }
9512 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009513 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009514 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009515 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9516 "Couldn't find end of Start Tag %s line %d\n",
9517 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009518
9519 /*
9520 * end of parsing of this node.
9521 */
9522 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009523 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009524 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009525 if (nsNr != ctxt->nsNr)
9526 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009527
9528 /*
9529 * Capture end position and add node
9530 */
9531 if ( ret != NULL && ctxt->record_info ) {
9532 node_info.end_pos = ctxt->input->consumed +
9533 (CUR_PTR - ctxt->input->base);
9534 node_info.end_line = ctxt->input->line;
9535 node_info.node = ret;
9536 xmlParserAddNodeInfo(ctxt, &node_info);
9537 }
9538 return;
9539 }
9540
9541 /*
9542 * Parse the content of the element:
9543 */
9544 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009545 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009546 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009547 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009548 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009549
9550 /*
9551 * end of parsing of this node.
9552 */
9553 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009554 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009555 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009556 if (nsNr != ctxt->nsNr)
9557 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009558 return;
9559 }
9560
9561 /*
9562 * parse the end of tag: '</' should be here.
9563 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009564 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009565 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009566 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009567 }
9568#ifdef LIBXML_SAX1_ENABLED
9569 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009570 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009571#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009572
9573 /*
9574 * Capture end position and add node
9575 */
9576 if ( ret != NULL && ctxt->record_info ) {
9577 node_info.end_pos = ctxt->input->consumed +
9578 (CUR_PTR - ctxt->input->base);
9579 node_info.end_line = ctxt->input->line;
9580 node_info.node = ret;
9581 xmlParserAddNodeInfo(ctxt, &node_info);
9582 }
9583}
9584
9585/**
9586 * xmlParseVersionNum:
9587 * @ctxt: an XML parser context
9588 *
9589 * parse the XML version value.
9590 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009591 * [26] VersionNum ::= '1.' [0-9]+
9592 *
9593 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009594 *
9595 * Returns the string giving the XML version number, or NULL
9596 */
9597xmlChar *
9598xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9599 xmlChar *buf = NULL;
9600 int len = 0;
9601 int size = 10;
9602 xmlChar cur;
9603
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009604 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009605 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009606 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009607 return(NULL);
9608 }
9609 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009610 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009611 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009612 return(NULL);
9613 }
9614 buf[len++] = cur;
9615 NEXT;
9616 cur=CUR;
9617 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009618 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009619 return(NULL);
9620 }
9621 buf[len++] = cur;
9622 NEXT;
9623 cur=CUR;
9624 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009625 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009626 xmlChar *tmp;
9627
Owen Taylor3473f882001-02-23 17:55:21 +00009628 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009629 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9630 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009631 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009632 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009633 return(NULL);
9634 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009635 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009636 }
9637 buf[len++] = cur;
9638 NEXT;
9639 cur=CUR;
9640 }
9641 buf[len] = 0;
9642 return(buf);
9643}
9644
9645/**
9646 * xmlParseVersionInfo:
9647 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009648 *
Owen Taylor3473f882001-02-23 17:55:21 +00009649 * parse the XML version.
9650 *
9651 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009652 *
Owen Taylor3473f882001-02-23 17:55:21 +00009653 * [25] Eq ::= S? '=' S?
9654 *
9655 * Returns the version string, e.g. "1.0"
9656 */
9657
9658xmlChar *
9659xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9660 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009661
Daniel Veillarda07050d2003-10-19 14:46:32 +00009662 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009663 SKIP(7);
9664 SKIP_BLANKS;
9665 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009666 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009667 return(NULL);
9668 }
9669 NEXT;
9670 SKIP_BLANKS;
9671 if (RAW == '"') {
9672 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009673 version = xmlParseVersionNum(ctxt);
9674 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009675 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009676 } else
9677 NEXT;
9678 } else if (RAW == '\''){
9679 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009680 version = xmlParseVersionNum(ctxt);
9681 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009682 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009683 } else
9684 NEXT;
9685 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009686 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009687 }
9688 }
9689 return(version);
9690}
9691
9692/**
9693 * xmlParseEncName:
9694 * @ctxt: an XML parser context
9695 *
9696 * parse the XML encoding name
9697 *
9698 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9699 *
9700 * Returns the encoding name value or NULL
9701 */
9702xmlChar *
9703xmlParseEncName(xmlParserCtxtPtr ctxt) {
9704 xmlChar *buf = NULL;
9705 int len = 0;
9706 int size = 10;
9707 xmlChar cur;
9708
9709 cur = CUR;
9710 if (((cur >= 'a') && (cur <= 'z')) ||
9711 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009712 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009713 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009714 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009715 return(NULL);
9716 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009717
Owen Taylor3473f882001-02-23 17:55:21 +00009718 buf[len++] = cur;
9719 NEXT;
9720 cur = CUR;
9721 while (((cur >= 'a') && (cur <= 'z')) ||
9722 ((cur >= 'A') && (cur <= 'Z')) ||
9723 ((cur >= '0') && (cur <= '9')) ||
9724 (cur == '.') || (cur == '_') ||
9725 (cur == '-')) {
9726 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009727 xmlChar *tmp;
9728
Owen Taylor3473f882001-02-23 17:55:21 +00009729 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009730 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9731 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009732 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009733 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009734 return(NULL);
9735 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009736 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009737 }
9738 buf[len++] = cur;
9739 NEXT;
9740 cur = CUR;
9741 if (cur == 0) {
9742 SHRINK;
9743 GROW;
9744 cur = CUR;
9745 }
9746 }
9747 buf[len] = 0;
9748 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009749 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009750 }
9751 return(buf);
9752}
9753
9754/**
9755 * xmlParseEncodingDecl:
9756 * @ctxt: an XML parser context
9757 *
9758 * parse the XML encoding declaration
9759 *
9760 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9761 *
9762 * this setups the conversion filters.
9763 *
9764 * Returns the encoding value or NULL
9765 */
9766
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009767const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009768xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9769 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009770
9771 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009772 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009773 SKIP(8);
9774 SKIP_BLANKS;
9775 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009776 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009777 return(NULL);
9778 }
9779 NEXT;
9780 SKIP_BLANKS;
9781 if (RAW == '"') {
9782 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009783 encoding = xmlParseEncName(ctxt);
9784 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009785 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009786 } else
9787 NEXT;
9788 } else if (RAW == '\''){
9789 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009790 encoding = xmlParseEncName(ctxt);
9791 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009792 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009793 } else
9794 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009795 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009796 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009797 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009798 /*
9799 * UTF-16 encoding stwich has already taken place at this stage,
9800 * more over the little-endian/big-endian selection is already done
9801 */
9802 if ((encoding != NULL) &&
9803 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9804 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009805 /*
9806 * If no encoding was passed to the parser, that we are
9807 * using UTF-16 and no decoder is present i.e. the
9808 * document is apparently UTF-8 compatible, then raise an
9809 * encoding mismatch fatal error
9810 */
9811 if ((ctxt->encoding == NULL) &&
9812 (ctxt->input->buf != NULL) &&
9813 (ctxt->input->buf->encoder == NULL)) {
9814 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9815 "Document labelled UTF-16 but has UTF-8 content\n");
9816 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009817 if (ctxt->encoding != NULL)
9818 xmlFree((xmlChar *) ctxt->encoding);
9819 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009820 }
9821 /*
9822 * UTF-8 encoding is handled natively
9823 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009824 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009825 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9826 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009827 if (ctxt->encoding != NULL)
9828 xmlFree((xmlChar *) ctxt->encoding);
9829 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009830 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009831 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009832 xmlCharEncodingHandlerPtr handler;
9833
9834 if (ctxt->input->encoding != NULL)
9835 xmlFree((xmlChar *) ctxt->input->encoding);
9836 ctxt->input->encoding = encoding;
9837
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009838 handler = xmlFindCharEncodingHandler((const char *) encoding);
9839 if (handler != NULL) {
9840 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009841 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009842 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009843 "Unsupported encoding %s\n", encoding);
9844 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009845 }
9846 }
9847 }
9848 return(encoding);
9849}
9850
9851/**
9852 * xmlParseSDDecl:
9853 * @ctxt: an XML parser context
9854 *
9855 * parse the XML standalone declaration
9856 *
9857 * [32] SDDecl ::= S 'standalone' Eq
9858 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9859 *
9860 * [ VC: Standalone Document Declaration ]
9861 * TODO The standalone document declaration must have the value "no"
9862 * if any external markup declarations contain declarations of:
9863 * - attributes with default values, if elements to which these
9864 * attributes apply appear in the document without specifications
9865 * of values for these attributes, or
9866 * - entities (other than amp, lt, gt, apos, quot), if references
9867 * to those entities appear in the document, or
9868 * - attributes with values subject to normalization, where the
9869 * attribute appears in the document with a value which will change
9870 * as a result of normalization, or
9871 * - element types with element content, if white space occurs directly
9872 * within any instance of those types.
9873 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009874 * Returns:
9875 * 1 if standalone="yes"
9876 * 0 if standalone="no"
9877 * -2 if standalone attribute is missing or invalid
9878 * (A standalone value of -2 means that the XML declaration was found,
9879 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009880 */
9881
9882int
9883xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009884 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009885
9886 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009887 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009888 SKIP(10);
9889 SKIP_BLANKS;
9890 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009891 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009892 return(standalone);
9893 }
9894 NEXT;
9895 SKIP_BLANKS;
9896 if (RAW == '\''){
9897 NEXT;
9898 if ((RAW == 'n') && (NXT(1) == 'o')) {
9899 standalone = 0;
9900 SKIP(2);
9901 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9902 (NXT(2) == 's')) {
9903 standalone = 1;
9904 SKIP(3);
9905 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009906 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009907 }
9908 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009909 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009910 } else
9911 NEXT;
9912 } else if (RAW == '"'){
9913 NEXT;
9914 if ((RAW == 'n') && (NXT(1) == 'o')) {
9915 standalone = 0;
9916 SKIP(2);
9917 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9918 (NXT(2) == 's')) {
9919 standalone = 1;
9920 SKIP(3);
9921 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009922 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009923 }
9924 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009925 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009926 } else
9927 NEXT;
9928 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009929 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009930 }
9931 }
9932 return(standalone);
9933}
9934
9935/**
9936 * xmlParseXMLDecl:
9937 * @ctxt: an XML parser context
9938 *
9939 * parse an XML declaration header
9940 *
9941 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9942 */
9943
9944void
9945xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9946 xmlChar *version;
9947
9948 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009949 * This value for standalone indicates that the document has an
9950 * XML declaration but it does not have a standalone attribute.
9951 * It will be overwritten later if a standalone attribute is found.
9952 */
9953 ctxt->input->standalone = -2;
9954
9955 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009956 * We know that '<?xml' is here.
9957 */
9958 SKIP(5);
9959
William M. Brack76e95df2003-10-18 16:20:14 +00009960 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009961 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9962 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009963 }
9964 SKIP_BLANKS;
9965
9966 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009967 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009968 */
9969 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009970 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009971 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009972 } else {
9973 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9974 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009975 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009976 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009977 if (ctxt->options & XML_PARSE_OLD10) {
9978 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9979 "Unsupported version '%s'\n",
9980 version);
9981 } else {
9982 if ((version[0] == '1') && ((version[1] == '.'))) {
9983 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9984 "Unsupported version '%s'\n",
9985 version, NULL);
9986 } else {
9987 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9988 "Unsupported version '%s'\n",
9989 version);
9990 }
9991 }
Daniel Veillard19840942001-11-29 16:11:38 +00009992 }
9993 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009994 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009995 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009996 }
Owen Taylor3473f882001-02-23 17:55:21 +00009997
9998 /*
9999 * We may have the encoding declaration
10000 */
William M. Brack76e95df2003-10-18 16:20:14 +000010001 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010002 if ((RAW == '?') && (NXT(1) == '>')) {
10003 SKIP(2);
10004 return;
10005 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010006 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010007 }
10008 xmlParseEncodingDecl(ctxt);
10009 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10010 /*
10011 * The XML REC instructs us to stop parsing right here
10012 */
10013 return;
10014 }
10015
10016 /*
10017 * We may have the standalone status.
10018 */
William M. Brack76e95df2003-10-18 16:20:14 +000010019 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010020 if ((RAW == '?') && (NXT(1) == '>')) {
10021 SKIP(2);
10022 return;
10023 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010024 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010025 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010026
10027 /*
10028 * We can grow the input buffer freely at that point
10029 */
10030 GROW;
10031
Owen Taylor3473f882001-02-23 17:55:21 +000010032 SKIP_BLANKS;
10033 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10034
10035 SKIP_BLANKS;
10036 if ((RAW == '?') && (NXT(1) == '>')) {
10037 SKIP(2);
10038 } else if (RAW == '>') {
10039 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010040 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010041 NEXT;
10042 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010043 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010044 MOVETO_ENDTAG(CUR_PTR);
10045 NEXT;
10046 }
10047}
10048
10049/**
10050 * xmlParseMisc:
10051 * @ctxt: an XML parser context
10052 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010053 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010054 *
10055 * [27] Misc ::= Comment | PI | S
10056 */
10057
10058void
10059xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010060 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010061 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010062 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010063 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010064 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010065 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010066 NEXT;
10067 } else
10068 xmlParseComment(ctxt);
10069 }
10070}
10071
10072/**
10073 * xmlParseDocument:
10074 * @ctxt: an XML parser context
10075 *
10076 * parse an XML document (and build a tree if using the standard SAX
10077 * interface).
10078 *
10079 * [1] document ::= prolog element Misc*
10080 *
10081 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10082 *
10083 * Returns 0, -1 in case of error. the parser context is augmented
10084 * as a result of the parsing.
10085 */
10086
10087int
10088xmlParseDocument(xmlParserCtxtPtr ctxt) {
10089 xmlChar start[4];
10090 xmlCharEncoding enc;
10091
10092 xmlInitParser();
10093
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010094 if ((ctxt == NULL) || (ctxt->input == NULL))
10095 return(-1);
10096
Owen Taylor3473f882001-02-23 17:55:21 +000010097 GROW;
10098
10099 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010100 * SAX: detecting the level.
10101 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010102 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010103
10104 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010105 * SAX: beginning of the document processing.
10106 */
10107 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10108 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10109
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010110 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10111 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010112 /*
10113 * Get the 4 first bytes and decode the charset
10114 * if enc != XML_CHAR_ENCODING_NONE
10115 * plug some encoding conversion routines.
10116 */
10117 start[0] = RAW;
10118 start[1] = NXT(1);
10119 start[2] = NXT(2);
10120 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010121 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010122 if (enc != XML_CHAR_ENCODING_NONE) {
10123 xmlSwitchEncoding(ctxt, enc);
10124 }
Owen Taylor3473f882001-02-23 17:55:21 +000010125 }
10126
10127
10128 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010129 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010130 }
10131
10132 /*
10133 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010134 * do not GROW here to avoid the detected encoder to decode more
10135 * than just the first line
Owen Taylor3473f882001-02-23 17:55:21 +000010136 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010137 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010138
10139 /*
10140 * Note that we will switch encoding on the fly.
10141 */
10142 xmlParseXMLDecl(ctxt);
10143 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10144 /*
10145 * The XML REC instructs us to stop parsing right here
10146 */
10147 return(-1);
10148 }
10149 ctxt->standalone = ctxt->input->standalone;
10150 SKIP_BLANKS;
10151 } else {
10152 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10153 }
10154 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10155 ctxt->sax->startDocument(ctxt->userData);
10156
10157 /*
10158 * The Misc part of the Prolog
10159 */
10160 GROW;
10161 xmlParseMisc(ctxt);
10162
10163 /*
10164 * Then possibly doc type declaration(s) and more Misc
10165 * (doctypedecl Misc*)?
10166 */
10167 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010168 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010169
10170 ctxt->inSubset = 1;
10171 xmlParseDocTypeDecl(ctxt);
10172 if (RAW == '[') {
10173 ctxt->instate = XML_PARSER_DTD;
10174 xmlParseInternalSubset(ctxt);
10175 }
10176
10177 /*
10178 * Create and update the external subset.
10179 */
10180 ctxt->inSubset = 2;
10181 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10182 (!ctxt->disableSAX))
10183 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10184 ctxt->extSubSystem, ctxt->extSubURI);
10185 ctxt->inSubset = 0;
10186
Daniel Veillardac4118d2008-01-11 05:27:32 +000010187 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010188
10189 ctxt->instate = XML_PARSER_PROLOG;
10190 xmlParseMisc(ctxt);
10191 }
10192
10193 /*
10194 * Time to start parsing the tree itself
10195 */
10196 GROW;
10197 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010198 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10199 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010200 } else {
10201 ctxt->instate = XML_PARSER_CONTENT;
10202 xmlParseElement(ctxt);
10203 ctxt->instate = XML_PARSER_EPILOG;
10204
10205
10206 /*
10207 * The Misc part at the end
10208 */
10209 xmlParseMisc(ctxt);
10210
Daniel Veillard561b7f82002-03-20 21:55:57 +000010211 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010212 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010213 }
10214 ctxt->instate = XML_PARSER_EOF;
10215 }
10216
10217 /*
10218 * SAX: end of the document processing.
10219 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010220 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010221 ctxt->sax->endDocument(ctxt->userData);
10222
Daniel Veillard5997aca2002-03-18 18:36:20 +000010223 /*
10224 * Remove locally kept entity definitions if the tree was not built
10225 */
10226 if ((ctxt->myDoc != NULL) &&
10227 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10228 xmlFreeDoc(ctxt->myDoc);
10229 ctxt->myDoc = NULL;
10230 }
10231
Daniel Veillardae0765b2008-07-31 19:54:59 +000010232 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10233 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10234 if (ctxt->valid)
10235 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10236 if (ctxt->nsWellFormed)
10237 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10238 if (ctxt->options & XML_PARSE_OLD10)
10239 ctxt->myDoc->properties |= XML_DOC_OLD10;
10240 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010241 if (! ctxt->wellFormed) {
10242 ctxt->valid = 0;
10243 return(-1);
10244 }
Owen Taylor3473f882001-02-23 17:55:21 +000010245 return(0);
10246}
10247
10248/**
10249 * xmlParseExtParsedEnt:
10250 * @ctxt: an XML parser context
10251 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010252 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010253 * An external general parsed entity is well-formed if it matches the
10254 * production labeled extParsedEnt.
10255 *
10256 * [78] extParsedEnt ::= TextDecl? content
10257 *
10258 * Returns 0, -1 in case of error. the parser context is augmented
10259 * as a result of the parsing.
10260 */
10261
10262int
10263xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10264 xmlChar start[4];
10265 xmlCharEncoding enc;
10266
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010267 if ((ctxt == NULL) || (ctxt->input == NULL))
10268 return(-1);
10269
Owen Taylor3473f882001-02-23 17:55:21 +000010270 xmlDefaultSAXHandlerInit();
10271
Daniel Veillard309f81d2003-09-23 09:02:53 +000010272 xmlDetectSAX2(ctxt);
10273
Owen Taylor3473f882001-02-23 17:55:21 +000010274 GROW;
10275
10276 /*
10277 * SAX: beginning of the document processing.
10278 */
10279 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10280 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10281
10282 /*
10283 * Get the 4 first bytes and decode the charset
10284 * if enc != XML_CHAR_ENCODING_NONE
10285 * plug some encoding conversion routines.
10286 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010287 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10288 start[0] = RAW;
10289 start[1] = NXT(1);
10290 start[2] = NXT(2);
10291 start[3] = NXT(3);
10292 enc = xmlDetectCharEncoding(start, 4);
10293 if (enc != XML_CHAR_ENCODING_NONE) {
10294 xmlSwitchEncoding(ctxt, enc);
10295 }
Owen Taylor3473f882001-02-23 17:55:21 +000010296 }
10297
10298
10299 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010300 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010301 }
10302
10303 /*
10304 * Check for the XMLDecl in the Prolog.
10305 */
10306 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010307 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010308
10309 /*
10310 * Note that we will switch encoding on the fly.
10311 */
10312 xmlParseXMLDecl(ctxt);
10313 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10314 /*
10315 * The XML REC instructs us to stop parsing right here
10316 */
10317 return(-1);
10318 }
10319 SKIP_BLANKS;
10320 } else {
10321 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10322 }
10323 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10324 ctxt->sax->startDocument(ctxt->userData);
10325
10326 /*
10327 * Doing validity checking on chunk doesn't make sense
10328 */
10329 ctxt->instate = XML_PARSER_CONTENT;
10330 ctxt->validate = 0;
10331 ctxt->loadsubset = 0;
10332 ctxt->depth = 0;
10333
10334 xmlParseContent(ctxt);
10335
10336 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010337 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010338 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010339 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010340 }
10341
10342 /*
10343 * SAX: end of the document processing.
10344 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010345 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010346 ctxt->sax->endDocument(ctxt->userData);
10347
10348 if (! ctxt->wellFormed) return(-1);
10349 return(0);
10350}
10351
Daniel Veillard73b013f2003-09-30 12:36:01 +000010352#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010353/************************************************************************
10354 * *
10355 * Progressive parsing interfaces *
10356 * *
10357 ************************************************************************/
10358
10359/**
10360 * xmlParseLookupSequence:
10361 * @ctxt: an XML parser context
10362 * @first: the first char to lookup
10363 * @next: the next char to lookup or zero
10364 * @third: the next char to lookup or zero
10365 *
10366 * Try to find if a sequence (first, next, third) or just (first next) or
10367 * (first) is available in the input stream.
10368 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10369 * to avoid rescanning sequences of bytes, it DOES change the state of the
10370 * parser, do not use liberally.
10371 *
10372 * Returns the index to the current parsing point if the full sequence
10373 * is available, -1 otherwise.
10374 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010375static int
Owen Taylor3473f882001-02-23 17:55:21 +000010376xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10377 xmlChar next, xmlChar third) {
10378 int base, len;
10379 xmlParserInputPtr in;
10380 const xmlChar *buf;
10381
10382 in = ctxt->input;
10383 if (in == NULL) return(-1);
10384 base = in->cur - in->base;
10385 if (base < 0) return(-1);
10386 if (ctxt->checkIndex > base)
10387 base = ctxt->checkIndex;
10388 if (in->buf == NULL) {
10389 buf = in->base;
10390 len = in->length;
10391 } else {
10392 buf = in->buf->buffer->content;
10393 len = in->buf->buffer->use;
10394 }
10395 /* take into account the sequence length */
10396 if (third) len -= 2;
10397 else if (next) len --;
10398 for (;base < len;base++) {
10399 if (buf[base] == first) {
10400 if (third != 0) {
10401 if ((buf[base + 1] != next) ||
10402 (buf[base + 2] != third)) continue;
10403 } else if (next != 0) {
10404 if (buf[base + 1] != next) continue;
10405 }
10406 ctxt->checkIndex = 0;
10407#ifdef DEBUG_PUSH
10408 if (next == 0)
10409 xmlGenericError(xmlGenericErrorContext,
10410 "PP: lookup '%c' found at %d\n",
10411 first, base);
10412 else if (third == 0)
10413 xmlGenericError(xmlGenericErrorContext,
10414 "PP: lookup '%c%c' found at %d\n",
10415 first, next, base);
10416 else
10417 xmlGenericError(xmlGenericErrorContext,
10418 "PP: lookup '%c%c%c' found at %d\n",
10419 first, next, third, base);
10420#endif
10421 return(base - (in->cur - in->base));
10422 }
10423 }
10424 ctxt->checkIndex = base;
10425#ifdef DEBUG_PUSH
10426 if (next == 0)
10427 xmlGenericError(xmlGenericErrorContext,
10428 "PP: lookup '%c' failed\n", first);
10429 else if (third == 0)
10430 xmlGenericError(xmlGenericErrorContext,
10431 "PP: lookup '%c%c' failed\n", first, next);
10432 else
10433 xmlGenericError(xmlGenericErrorContext,
10434 "PP: lookup '%c%c%c' failed\n", first, next, third);
10435#endif
10436 return(-1);
10437}
10438
10439/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010440 * xmlParseGetLasts:
10441 * @ctxt: an XML parser context
10442 * @lastlt: pointer to store the last '<' from the input
10443 * @lastgt: pointer to store the last '>' from the input
10444 *
10445 * Lookup the last < and > in the current chunk
10446 */
10447static void
10448xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10449 const xmlChar **lastgt) {
10450 const xmlChar *tmp;
10451
10452 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10453 xmlGenericError(xmlGenericErrorContext,
10454 "Internal error: xmlParseGetLasts\n");
10455 return;
10456 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010457 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010458 tmp = ctxt->input->end;
10459 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010460 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010461 if (tmp < ctxt->input->base) {
10462 *lastlt = NULL;
10463 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010464 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010465 *lastlt = tmp;
10466 tmp++;
10467 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10468 if (*tmp == '\'') {
10469 tmp++;
10470 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10471 if (tmp < ctxt->input->end) tmp++;
10472 } else if (*tmp == '"') {
10473 tmp++;
10474 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10475 if (tmp < ctxt->input->end) tmp++;
10476 } else
10477 tmp++;
10478 }
10479 if (tmp < ctxt->input->end)
10480 *lastgt = tmp;
10481 else {
10482 tmp = *lastlt;
10483 tmp--;
10484 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10485 if (tmp >= ctxt->input->base)
10486 *lastgt = tmp;
10487 else
10488 *lastgt = NULL;
10489 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010490 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010491 } else {
10492 *lastlt = NULL;
10493 *lastgt = NULL;
10494 }
10495}
10496/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010497 * xmlCheckCdataPush:
10498 * @cur: pointer to the bock of characters
10499 * @len: length of the block in bytes
10500 *
10501 * Check that the block of characters is okay as SCdata content [20]
10502 *
10503 * Returns the number of bytes to pass if okay, a negative index where an
10504 * UTF-8 error occured otherwise
10505 */
10506static int
10507xmlCheckCdataPush(const xmlChar *utf, int len) {
10508 int ix;
10509 unsigned char c;
10510 int codepoint;
10511
10512 if ((utf == NULL) || (len <= 0))
10513 return(0);
10514
10515 for (ix = 0; ix < len;) { /* string is 0-terminated */
10516 c = utf[ix];
10517 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10518 if (c >= 0x20)
10519 ix++;
10520 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10521 ix++;
10522 else
10523 return(-ix);
10524 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10525 if (ix + 2 > len) return(ix);
10526 if ((utf[ix+1] & 0xc0 ) != 0x80)
10527 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010528 codepoint = (utf[ix] & 0x1f) << 6;
10529 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010530 if (!xmlIsCharQ(codepoint))
10531 return(-ix);
10532 ix += 2;
10533 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10534 if (ix + 3 > len) return(ix);
10535 if (((utf[ix+1] & 0xc0) != 0x80) ||
10536 ((utf[ix+2] & 0xc0) != 0x80))
10537 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010538 codepoint = (utf[ix] & 0xf) << 12;
10539 codepoint |= (utf[ix+1] & 0x3f) << 6;
10540 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010541 if (!xmlIsCharQ(codepoint))
10542 return(-ix);
10543 ix += 3;
10544 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10545 if (ix + 4 > len) return(ix);
10546 if (((utf[ix+1] & 0xc0) != 0x80) ||
10547 ((utf[ix+2] & 0xc0) != 0x80) ||
10548 ((utf[ix+3] & 0xc0) != 0x80))
10549 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010550 codepoint = (utf[ix] & 0x7) << 18;
10551 codepoint |= (utf[ix+1] & 0x3f) << 12;
10552 codepoint |= (utf[ix+2] & 0x3f) << 6;
10553 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010554 if (!xmlIsCharQ(codepoint))
10555 return(-ix);
10556 ix += 4;
10557 } else /* unknown encoding */
10558 return(-ix);
10559 }
10560 return(ix);
10561}
10562
10563/**
Owen Taylor3473f882001-02-23 17:55:21 +000010564 * xmlParseTryOrFinish:
10565 * @ctxt: an XML parser context
10566 * @terminate: last chunk indicator
10567 *
10568 * Try to progress on parsing
10569 *
10570 * Returns zero if no parsing was possible
10571 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010572static int
Owen Taylor3473f882001-02-23 17:55:21 +000010573xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10574 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010575 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010576 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010577 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010578
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010579 if (ctxt->input == NULL)
10580 return(0);
10581
Owen Taylor3473f882001-02-23 17:55:21 +000010582#ifdef DEBUG_PUSH
10583 switch (ctxt->instate) {
10584 case XML_PARSER_EOF:
10585 xmlGenericError(xmlGenericErrorContext,
10586 "PP: try EOF\n"); break;
10587 case XML_PARSER_START:
10588 xmlGenericError(xmlGenericErrorContext,
10589 "PP: try START\n"); break;
10590 case XML_PARSER_MISC:
10591 xmlGenericError(xmlGenericErrorContext,
10592 "PP: try MISC\n");break;
10593 case XML_PARSER_COMMENT:
10594 xmlGenericError(xmlGenericErrorContext,
10595 "PP: try COMMENT\n");break;
10596 case XML_PARSER_PROLOG:
10597 xmlGenericError(xmlGenericErrorContext,
10598 "PP: try PROLOG\n");break;
10599 case XML_PARSER_START_TAG:
10600 xmlGenericError(xmlGenericErrorContext,
10601 "PP: try START_TAG\n");break;
10602 case XML_PARSER_CONTENT:
10603 xmlGenericError(xmlGenericErrorContext,
10604 "PP: try CONTENT\n");break;
10605 case XML_PARSER_CDATA_SECTION:
10606 xmlGenericError(xmlGenericErrorContext,
10607 "PP: try CDATA_SECTION\n");break;
10608 case XML_PARSER_END_TAG:
10609 xmlGenericError(xmlGenericErrorContext,
10610 "PP: try END_TAG\n");break;
10611 case XML_PARSER_ENTITY_DECL:
10612 xmlGenericError(xmlGenericErrorContext,
10613 "PP: try ENTITY_DECL\n");break;
10614 case XML_PARSER_ENTITY_VALUE:
10615 xmlGenericError(xmlGenericErrorContext,
10616 "PP: try ENTITY_VALUE\n");break;
10617 case XML_PARSER_ATTRIBUTE_VALUE:
10618 xmlGenericError(xmlGenericErrorContext,
10619 "PP: try ATTRIBUTE_VALUE\n");break;
10620 case XML_PARSER_DTD:
10621 xmlGenericError(xmlGenericErrorContext,
10622 "PP: try DTD\n");break;
10623 case XML_PARSER_EPILOG:
10624 xmlGenericError(xmlGenericErrorContext,
10625 "PP: try EPILOG\n");break;
10626 case XML_PARSER_PI:
10627 xmlGenericError(xmlGenericErrorContext,
10628 "PP: try PI\n");break;
10629 case XML_PARSER_IGNORE:
10630 xmlGenericError(xmlGenericErrorContext,
10631 "PP: try IGNORE\n");break;
10632 }
10633#endif
10634
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010635 if ((ctxt->input != NULL) &&
10636 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010637 xmlSHRINK(ctxt);
10638 ctxt->checkIndex = 0;
10639 }
10640 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010641
Daniel Veillarda880b122003-04-21 21:36:41 +000010642 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010643 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010644 return(0);
10645
10646
Owen Taylor3473f882001-02-23 17:55:21 +000010647 /*
10648 * Pop-up of finished entities.
10649 */
10650 while ((RAW == 0) && (ctxt->inputNr > 1))
10651 xmlPopInput(ctxt);
10652
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010653 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010654 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010655 avail = ctxt->input->length -
10656 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010657 else {
10658 /*
10659 * If we are operating on converted input, try to flush
10660 * remainng chars to avoid them stalling in the non-converted
10661 * buffer.
10662 */
10663 if ((ctxt->input->buf->raw != NULL) &&
10664 (ctxt->input->buf->raw->use > 0)) {
10665 int base = ctxt->input->base -
10666 ctxt->input->buf->buffer->content;
10667 int current = ctxt->input->cur - ctxt->input->base;
10668
10669 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10670 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10671 ctxt->input->cur = ctxt->input->base + current;
10672 ctxt->input->end =
10673 &ctxt->input->buf->buffer->content[
10674 ctxt->input->buf->buffer->use];
10675 }
10676 avail = ctxt->input->buf->buffer->use -
10677 (ctxt->input->cur - ctxt->input->base);
10678 }
Owen Taylor3473f882001-02-23 17:55:21 +000010679 if (avail < 1)
10680 goto done;
10681 switch (ctxt->instate) {
10682 case XML_PARSER_EOF:
10683 /*
10684 * Document parsing is done !
10685 */
10686 goto done;
10687 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010688 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10689 xmlChar start[4];
10690 xmlCharEncoding enc;
10691
10692 /*
10693 * Very first chars read from the document flow.
10694 */
10695 if (avail < 4)
10696 goto done;
10697
10698 /*
10699 * Get the 4 first bytes and decode the charset
10700 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010701 * plug some encoding conversion routines,
10702 * else xmlSwitchEncoding will set to (default)
10703 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010704 */
10705 start[0] = RAW;
10706 start[1] = NXT(1);
10707 start[2] = NXT(2);
10708 start[3] = NXT(3);
10709 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010710 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010711 break;
10712 }
Owen Taylor3473f882001-02-23 17:55:21 +000010713
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010714 if (avail < 2)
10715 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010716 cur = ctxt->input->cur[0];
10717 next = ctxt->input->cur[1];
10718 if (cur == 0) {
10719 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10720 ctxt->sax->setDocumentLocator(ctxt->userData,
10721 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010722 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010723 ctxt->instate = XML_PARSER_EOF;
10724#ifdef DEBUG_PUSH
10725 xmlGenericError(xmlGenericErrorContext,
10726 "PP: entering EOF\n");
10727#endif
10728 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10729 ctxt->sax->endDocument(ctxt->userData);
10730 goto done;
10731 }
10732 if ((cur == '<') && (next == '?')) {
10733 /* PI or XML decl */
10734 if (avail < 5) return(ret);
10735 if ((!terminate) &&
10736 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10737 return(ret);
10738 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10739 ctxt->sax->setDocumentLocator(ctxt->userData,
10740 &xmlDefaultSAXLocator);
10741 if ((ctxt->input->cur[2] == 'x') &&
10742 (ctxt->input->cur[3] == 'm') &&
10743 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010744 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010745 ret += 5;
10746#ifdef DEBUG_PUSH
10747 xmlGenericError(xmlGenericErrorContext,
10748 "PP: Parsing XML Decl\n");
10749#endif
10750 xmlParseXMLDecl(ctxt);
10751 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10752 /*
10753 * The XML REC instructs us to stop parsing right
10754 * here
10755 */
10756 ctxt->instate = XML_PARSER_EOF;
10757 return(0);
10758 }
10759 ctxt->standalone = ctxt->input->standalone;
10760 if ((ctxt->encoding == NULL) &&
10761 (ctxt->input->encoding != NULL))
10762 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10763 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10764 (!ctxt->disableSAX))
10765 ctxt->sax->startDocument(ctxt->userData);
10766 ctxt->instate = XML_PARSER_MISC;
10767#ifdef DEBUG_PUSH
10768 xmlGenericError(xmlGenericErrorContext,
10769 "PP: entering MISC\n");
10770#endif
10771 } else {
10772 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10773 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10774 (!ctxt->disableSAX))
10775 ctxt->sax->startDocument(ctxt->userData);
10776 ctxt->instate = XML_PARSER_MISC;
10777#ifdef DEBUG_PUSH
10778 xmlGenericError(xmlGenericErrorContext,
10779 "PP: entering MISC\n");
10780#endif
10781 }
10782 } else {
10783 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10784 ctxt->sax->setDocumentLocator(ctxt->userData,
10785 &xmlDefaultSAXLocator);
10786 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010787 if (ctxt->version == NULL) {
10788 xmlErrMemory(ctxt, NULL);
10789 break;
10790 }
Owen Taylor3473f882001-02-23 17:55:21 +000010791 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10792 (!ctxt->disableSAX))
10793 ctxt->sax->startDocument(ctxt->userData);
10794 ctxt->instate = XML_PARSER_MISC;
10795#ifdef DEBUG_PUSH
10796 xmlGenericError(xmlGenericErrorContext,
10797 "PP: entering MISC\n");
10798#endif
10799 }
10800 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010801 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010802 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010803 const xmlChar *prefix = NULL;
10804 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010805 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010806
10807 if ((avail < 2) && (ctxt->inputNr == 1))
10808 goto done;
10809 cur = ctxt->input->cur[0];
10810 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010811 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010812 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010813 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10814 ctxt->sax->endDocument(ctxt->userData);
10815 goto done;
10816 }
10817 if (!terminate) {
10818 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010819 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010820 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010821 goto done;
10822 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10823 goto done;
10824 }
10825 }
10826 if (ctxt->spaceNr == 0)
10827 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010828 else if (*ctxt->space == -2)
10829 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010830 else
10831 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010832#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010833 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010834#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010835 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010836#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010837 else
10838 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010839#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010840 if (name == NULL) {
10841 spacePop(ctxt);
10842 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010843 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10844 ctxt->sax->endDocument(ctxt->userData);
10845 goto done;
10846 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010847#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010848 /*
10849 * [ VC: Root Element Type ]
10850 * The Name in the document type declaration must match
10851 * the element type of the root element.
10852 */
10853 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10854 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10855 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010856#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010857
10858 /*
10859 * Check for an Empty Element.
10860 */
10861 if ((RAW == '/') && (NXT(1) == '>')) {
10862 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010863
10864 if (ctxt->sax2) {
10865 if ((ctxt->sax != NULL) &&
10866 (ctxt->sax->endElementNs != NULL) &&
10867 (!ctxt->disableSAX))
10868 ctxt->sax->endElementNs(ctxt->userData, name,
10869 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010870 if (ctxt->nsNr - nsNr > 0)
10871 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010872#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010873 } else {
10874 if ((ctxt->sax != NULL) &&
10875 (ctxt->sax->endElement != NULL) &&
10876 (!ctxt->disableSAX))
10877 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010878#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010879 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010880 spacePop(ctxt);
10881 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010882 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010883 } else {
10884 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010885 }
10886 break;
10887 }
10888 if (RAW == '>') {
10889 NEXT;
10890 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010891 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010892 "Couldn't find end of Start Tag %s\n",
10893 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010894 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010895 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010896 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010897 if (ctxt->sax2)
10898 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010899#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010900 else
10901 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010902#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010903
Daniel Veillarda880b122003-04-21 21:36:41 +000010904 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010905 break;
10906 }
10907 case XML_PARSER_CONTENT: {
10908 const xmlChar *test;
10909 unsigned int cons;
10910 if ((avail < 2) && (ctxt->inputNr == 1))
10911 goto done;
10912 cur = ctxt->input->cur[0];
10913 next = ctxt->input->cur[1];
10914
10915 test = CUR_PTR;
10916 cons = ctxt->input->consumed;
10917 if ((cur == '<') && (next == '/')) {
10918 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010919 break;
10920 } else if ((cur == '<') && (next == '?')) {
10921 if ((!terminate) &&
10922 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10923 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010924 xmlParsePI(ctxt);
10925 } else if ((cur == '<') && (next != '!')) {
10926 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010927 break;
10928 } else if ((cur == '<') && (next == '!') &&
10929 (ctxt->input->cur[2] == '-') &&
10930 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010931 int term;
10932
10933 if (avail < 4)
10934 goto done;
10935 ctxt->input->cur += 4;
10936 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10937 ctxt->input->cur -= 4;
10938 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010939 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010940 xmlParseComment(ctxt);
10941 ctxt->instate = XML_PARSER_CONTENT;
10942 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10943 (ctxt->input->cur[2] == '[') &&
10944 (ctxt->input->cur[3] == 'C') &&
10945 (ctxt->input->cur[4] == 'D') &&
10946 (ctxt->input->cur[5] == 'A') &&
10947 (ctxt->input->cur[6] == 'T') &&
10948 (ctxt->input->cur[7] == 'A') &&
10949 (ctxt->input->cur[8] == '[')) {
10950 SKIP(9);
10951 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010952 break;
10953 } else if ((cur == '<') && (next == '!') &&
10954 (avail < 9)) {
10955 goto done;
10956 } else if (cur == '&') {
10957 if ((!terminate) &&
10958 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10959 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010960 xmlParseReference(ctxt);
10961 } else {
10962 /* TODO Avoid the extra copy, handle directly !!! */
10963 /*
10964 * Goal of the following test is:
10965 * - minimize calls to the SAX 'character' callback
10966 * when they are mergeable
10967 * - handle an problem for isBlank when we only parse
10968 * a sequence of blank chars and the next one is
10969 * not available to check against '<' presence.
10970 * - tries to homogenize the differences in SAX
10971 * callbacks between the push and pull versions
10972 * of the parser.
10973 */
10974 if ((ctxt->inputNr == 1) &&
10975 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10976 if (!terminate) {
10977 if (ctxt->progressive) {
10978 if ((lastlt == NULL) ||
10979 (ctxt->input->cur > lastlt))
10980 goto done;
10981 } else if (xmlParseLookupSequence(ctxt,
10982 '<', 0, 0) < 0) {
10983 goto done;
10984 }
10985 }
10986 }
10987 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010988 xmlParseCharData(ctxt, 0);
10989 }
10990 /*
10991 * Pop-up of finished entities.
10992 */
10993 while ((RAW == 0) && (ctxt->inputNr > 1))
10994 xmlPopInput(ctxt);
10995 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010996 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10997 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010998 ctxt->instate = XML_PARSER_EOF;
10999 break;
11000 }
11001 break;
11002 }
11003 case XML_PARSER_END_TAG:
11004 if (avail < 2)
11005 goto done;
11006 if (!terminate) {
11007 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011008 /* > can be found unescaped in attribute values */
11009 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011010 goto done;
11011 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11012 goto done;
11013 }
11014 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011015 if (ctxt->sax2) {
11016 xmlParseEndTag2(ctxt,
11017 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11018 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011019 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011020 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011021 }
11022#ifdef LIBXML_SAX1_ENABLED
11023 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011024 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011025#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011026 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011027 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011028 } else {
11029 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011030 }
11031 break;
11032 case XML_PARSER_CDATA_SECTION: {
11033 /*
11034 * The Push mode need to have the SAX callback for
11035 * cdataBlock merge back contiguous callbacks.
11036 */
11037 int base;
11038
11039 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11040 if (base < 0) {
11041 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011042 int tmp;
11043
11044 tmp = xmlCheckCdataPush(ctxt->input->cur,
11045 XML_PARSER_BIG_BUFFER_SIZE);
11046 if (tmp < 0) {
11047 tmp = -tmp;
11048 ctxt->input->cur += tmp;
11049 goto encoding_error;
11050 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011051 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11052 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011053 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011054 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011055 else if (ctxt->sax->characters != NULL)
11056 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011057 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011058 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011059 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011060 ctxt->checkIndex = 0;
11061 }
11062 goto done;
11063 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011064 int tmp;
11065
11066 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11067 if ((tmp < 0) || (tmp != base)) {
11068 tmp = -tmp;
11069 ctxt->input->cur += tmp;
11070 goto encoding_error;
11071 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011072 if ((ctxt->sax != NULL) && (base == 0) &&
11073 (ctxt->sax->cdataBlock != NULL) &&
11074 (!ctxt->disableSAX)) {
11075 /*
11076 * Special case to provide identical behaviour
11077 * between pull and push parsers on enpty CDATA
11078 * sections
11079 */
11080 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11081 (!strncmp((const char *)&ctxt->input->cur[-9],
11082 "<![CDATA[", 9)))
11083 ctxt->sax->cdataBlock(ctxt->userData,
11084 BAD_CAST "", 0);
11085 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011086 (!ctxt->disableSAX)) {
11087 if (ctxt->sax->cdataBlock != NULL)
11088 ctxt->sax->cdataBlock(ctxt->userData,
11089 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011090 else if (ctxt->sax->characters != NULL)
11091 ctxt->sax->characters(ctxt->userData,
11092 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011093 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011094 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011095 ctxt->checkIndex = 0;
11096 ctxt->instate = XML_PARSER_CONTENT;
11097#ifdef DEBUG_PUSH
11098 xmlGenericError(xmlGenericErrorContext,
11099 "PP: entering CONTENT\n");
11100#endif
11101 }
11102 break;
11103 }
Owen Taylor3473f882001-02-23 17:55:21 +000011104 case XML_PARSER_MISC:
11105 SKIP_BLANKS;
11106 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011107 avail = ctxt->input->length -
11108 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011109 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011110 avail = ctxt->input->buf->buffer->use -
11111 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011112 if (avail < 2)
11113 goto done;
11114 cur = ctxt->input->cur[0];
11115 next = ctxt->input->cur[1];
11116 if ((cur == '<') && (next == '?')) {
11117 if ((!terminate) &&
11118 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11119 goto done;
11120#ifdef DEBUG_PUSH
11121 xmlGenericError(xmlGenericErrorContext,
11122 "PP: Parsing PI\n");
11123#endif
11124 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011125 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011126 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011127 (ctxt->input->cur[2] == '-') &&
11128 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011129 if ((!terminate) &&
11130 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11131 goto done;
11132#ifdef DEBUG_PUSH
11133 xmlGenericError(xmlGenericErrorContext,
11134 "PP: Parsing Comment\n");
11135#endif
11136 xmlParseComment(ctxt);
11137 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011138 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011139 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011140 (ctxt->input->cur[2] == 'D') &&
11141 (ctxt->input->cur[3] == 'O') &&
11142 (ctxt->input->cur[4] == 'C') &&
11143 (ctxt->input->cur[5] == 'T') &&
11144 (ctxt->input->cur[6] == 'Y') &&
11145 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011146 (ctxt->input->cur[8] == 'E')) {
11147 if ((!terminate) &&
11148 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11149 goto done;
11150#ifdef DEBUG_PUSH
11151 xmlGenericError(xmlGenericErrorContext,
11152 "PP: Parsing internal subset\n");
11153#endif
11154 ctxt->inSubset = 1;
11155 xmlParseDocTypeDecl(ctxt);
11156 if (RAW == '[') {
11157 ctxt->instate = XML_PARSER_DTD;
11158#ifdef DEBUG_PUSH
11159 xmlGenericError(xmlGenericErrorContext,
11160 "PP: entering DTD\n");
11161#endif
11162 } else {
11163 /*
11164 * Create and update the external subset.
11165 */
11166 ctxt->inSubset = 2;
11167 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11168 (ctxt->sax->externalSubset != NULL))
11169 ctxt->sax->externalSubset(ctxt->userData,
11170 ctxt->intSubName, ctxt->extSubSystem,
11171 ctxt->extSubURI);
11172 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011173 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011174 ctxt->instate = XML_PARSER_PROLOG;
11175#ifdef DEBUG_PUSH
11176 xmlGenericError(xmlGenericErrorContext,
11177 "PP: entering PROLOG\n");
11178#endif
11179 }
11180 } else if ((cur == '<') && (next == '!') &&
11181 (avail < 9)) {
11182 goto done;
11183 } else {
11184 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011185 ctxt->progressive = 1;
11186 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011187#ifdef DEBUG_PUSH
11188 xmlGenericError(xmlGenericErrorContext,
11189 "PP: entering START_TAG\n");
11190#endif
11191 }
11192 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011193 case XML_PARSER_PROLOG:
11194 SKIP_BLANKS;
11195 if (ctxt->input->buf == NULL)
11196 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11197 else
11198 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11199 if (avail < 2)
11200 goto done;
11201 cur = ctxt->input->cur[0];
11202 next = ctxt->input->cur[1];
11203 if ((cur == '<') && (next == '?')) {
11204 if ((!terminate) &&
11205 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11206 goto done;
11207#ifdef DEBUG_PUSH
11208 xmlGenericError(xmlGenericErrorContext,
11209 "PP: Parsing PI\n");
11210#endif
11211 xmlParsePI(ctxt);
11212 } else if ((cur == '<') && (next == '!') &&
11213 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11214 if ((!terminate) &&
11215 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11216 goto done;
11217#ifdef DEBUG_PUSH
11218 xmlGenericError(xmlGenericErrorContext,
11219 "PP: Parsing Comment\n");
11220#endif
11221 xmlParseComment(ctxt);
11222 ctxt->instate = XML_PARSER_PROLOG;
11223 } else if ((cur == '<') && (next == '!') &&
11224 (avail < 4)) {
11225 goto done;
11226 } else {
11227 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011228 if (ctxt->progressive == 0)
11229 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011230 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011231#ifdef DEBUG_PUSH
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: entering START_TAG\n");
11234#endif
11235 }
11236 break;
11237 case XML_PARSER_EPILOG:
11238 SKIP_BLANKS;
11239 if (ctxt->input->buf == NULL)
11240 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11241 else
11242 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11243 if (avail < 2)
11244 goto done;
11245 cur = ctxt->input->cur[0];
11246 next = ctxt->input->cur[1];
11247 if ((cur == '<') && (next == '?')) {
11248 if ((!terminate) &&
11249 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11250 goto done;
11251#ifdef DEBUG_PUSH
11252 xmlGenericError(xmlGenericErrorContext,
11253 "PP: Parsing PI\n");
11254#endif
11255 xmlParsePI(ctxt);
11256 ctxt->instate = XML_PARSER_EPILOG;
11257 } else if ((cur == '<') && (next == '!') &&
11258 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11259 if ((!terminate) &&
11260 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11261 goto done;
11262#ifdef DEBUG_PUSH
11263 xmlGenericError(xmlGenericErrorContext,
11264 "PP: Parsing Comment\n");
11265#endif
11266 xmlParseComment(ctxt);
11267 ctxt->instate = XML_PARSER_EPILOG;
11268 } else if ((cur == '<') && (next == '!') &&
11269 (avail < 4)) {
11270 goto done;
11271 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011272 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011273 ctxt->instate = XML_PARSER_EOF;
11274#ifdef DEBUG_PUSH
11275 xmlGenericError(xmlGenericErrorContext,
11276 "PP: entering EOF\n");
11277#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011278 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011279 ctxt->sax->endDocument(ctxt->userData);
11280 goto done;
11281 }
11282 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011283 case XML_PARSER_DTD: {
11284 /*
11285 * Sorry but progressive parsing of the internal subset
11286 * is not expected to be supported. We first check that
11287 * the full content of the internal subset is available and
11288 * the parsing is launched only at that point.
11289 * Internal subset ends up with "']' S? '>'" in an unescaped
11290 * section and not in a ']]>' sequence which are conditional
11291 * sections (whoever argued to keep that crap in XML deserve
11292 * a place in hell !).
11293 */
11294 int base, i;
11295 xmlChar *buf;
11296 xmlChar quote = 0;
11297
11298 base = ctxt->input->cur - ctxt->input->base;
11299 if (base < 0) return(0);
11300 if (ctxt->checkIndex > base)
11301 base = ctxt->checkIndex;
11302 buf = ctxt->input->buf->buffer->content;
11303 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11304 base++) {
11305 if (quote != 0) {
11306 if (buf[base] == quote)
11307 quote = 0;
11308 continue;
11309 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011310 if ((quote == 0) && (buf[base] == '<')) {
11311 int found = 0;
11312 /* special handling of comments */
11313 if (((unsigned int) base + 4 <
11314 ctxt->input->buf->buffer->use) &&
11315 (buf[base + 1] == '!') &&
11316 (buf[base + 2] == '-') &&
11317 (buf[base + 3] == '-')) {
11318 for (;(unsigned int) base + 3 <
11319 ctxt->input->buf->buffer->use; base++) {
11320 if ((buf[base] == '-') &&
11321 (buf[base + 1] == '-') &&
11322 (buf[base + 2] == '>')) {
11323 found = 1;
11324 base += 2;
11325 break;
11326 }
11327 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011328 if (!found) {
11329#if 0
11330 fprintf(stderr, "unfinished comment\n");
11331#endif
11332 break; /* for */
11333 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011334 continue;
11335 }
11336 }
Owen Taylor3473f882001-02-23 17:55:21 +000011337 if (buf[base] == '"') {
11338 quote = '"';
11339 continue;
11340 }
11341 if (buf[base] == '\'') {
11342 quote = '\'';
11343 continue;
11344 }
11345 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011346#if 0
11347 fprintf(stderr, "%c%c%c%c: ", buf[base],
11348 buf[base + 1], buf[base + 2], buf[base + 3]);
11349#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011350 if ((unsigned int) base +1 >=
11351 ctxt->input->buf->buffer->use)
11352 break;
11353 if (buf[base + 1] == ']') {
11354 /* conditional crap, skip both ']' ! */
11355 base++;
11356 continue;
11357 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011358 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011359 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11360 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011361 if (buf[base + i] == '>') {
11362#if 0
11363 fprintf(stderr, "found\n");
11364#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011365 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011366 }
11367 if (!IS_BLANK_CH(buf[base + i])) {
11368#if 0
11369 fprintf(stderr, "not found\n");
11370#endif
11371 goto not_end_of_int_subset;
11372 }
Owen Taylor3473f882001-02-23 17:55:21 +000011373 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011374#if 0
11375 fprintf(stderr, "end of stream\n");
11376#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011377 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011378
Owen Taylor3473f882001-02-23 17:55:21 +000011379 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011380not_end_of_int_subset:
11381 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011382 }
11383 /*
11384 * We didn't found the end of the Internal subset
11385 */
Owen Taylor3473f882001-02-23 17:55:21 +000011386#ifdef DEBUG_PUSH
11387 if (next == 0)
11388 xmlGenericError(xmlGenericErrorContext,
11389 "PP: lookup of int subset end filed\n");
11390#endif
11391 goto done;
11392
11393found_end_int_subset:
11394 xmlParseInternalSubset(ctxt);
11395 ctxt->inSubset = 2;
11396 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11397 (ctxt->sax->externalSubset != NULL))
11398 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11399 ctxt->extSubSystem, ctxt->extSubURI);
11400 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011401 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011402 ctxt->instate = XML_PARSER_PROLOG;
11403 ctxt->checkIndex = 0;
11404#ifdef DEBUG_PUSH
11405 xmlGenericError(xmlGenericErrorContext,
11406 "PP: entering PROLOG\n");
11407#endif
11408 break;
11409 }
11410 case XML_PARSER_COMMENT:
11411 xmlGenericError(xmlGenericErrorContext,
11412 "PP: internal error, state == COMMENT\n");
11413 ctxt->instate = XML_PARSER_CONTENT;
11414#ifdef DEBUG_PUSH
11415 xmlGenericError(xmlGenericErrorContext,
11416 "PP: entering CONTENT\n");
11417#endif
11418 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011419 case XML_PARSER_IGNORE:
11420 xmlGenericError(xmlGenericErrorContext,
11421 "PP: internal error, state == IGNORE");
11422 ctxt->instate = XML_PARSER_DTD;
11423#ifdef DEBUG_PUSH
11424 xmlGenericError(xmlGenericErrorContext,
11425 "PP: entering DTD\n");
11426#endif
11427 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011428 case XML_PARSER_PI:
11429 xmlGenericError(xmlGenericErrorContext,
11430 "PP: internal error, state == PI\n");
11431 ctxt->instate = XML_PARSER_CONTENT;
11432#ifdef DEBUG_PUSH
11433 xmlGenericError(xmlGenericErrorContext,
11434 "PP: entering CONTENT\n");
11435#endif
11436 break;
11437 case XML_PARSER_ENTITY_DECL:
11438 xmlGenericError(xmlGenericErrorContext,
11439 "PP: internal error, state == ENTITY_DECL\n");
11440 ctxt->instate = XML_PARSER_DTD;
11441#ifdef DEBUG_PUSH
11442 xmlGenericError(xmlGenericErrorContext,
11443 "PP: entering DTD\n");
11444#endif
11445 break;
11446 case XML_PARSER_ENTITY_VALUE:
11447 xmlGenericError(xmlGenericErrorContext,
11448 "PP: internal error, state == ENTITY_VALUE\n");
11449 ctxt->instate = XML_PARSER_CONTENT;
11450#ifdef DEBUG_PUSH
11451 xmlGenericError(xmlGenericErrorContext,
11452 "PP: entering DTD\n");
11453#endif
11454 break;
11455 case XML_PARSER_ATTRIBUTE_VALUE:
11456 xmlGenericError(xmlGenericErrorContext,
11457 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11458 ctxt->instate = XML_PARSER_START_TAG;
11459#ifdef DEBUG_PUSH
11460 xmlGenericError(xmlGenericErrorContext,
11461 "PP: entering START_TAG\n");
11462#endif
11463 break;
11464 case XML_PARSER_SYSTEM_LITERAL:
11465 xmlGenericError(xmlGenericErrorContext,
11466 "PP: internal error, state == SYSTEM_LITERAL\n");
11467 ctxt->instate = XML_PARSER_START_TAG;
11468#ifdef DEBUG_PUSH
11469 xmlGenericError(xmlGenericErrorContext,
11470 "PP: entering START_TAG\n");
11471#endif
11472 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011473 case XML_PARSER_PUBLIC_LITERAL:
11474 xmlGenericError(xmlGenericErrorContext,
11475 "PP: internal error, state == PUBLIC_LITERAL\n");
11476 ctxt->instate = XML_PARSER_START_TAG;
11477#ifdef DEBUG_PUSH
11478 xmlGenericError(xmlGenericErrorContext,
11479 "PP: entering START_TAG\n");
11480#endif
11481 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011482 }
11483 }
11484done:
11485#ifdef DEBUG_PUSH
11486 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11487#endif
11488 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011489encoding_error:
11490 {
11491 char buffer[150];
11492
11493 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11494 ctxt->input->cur[0], ctxt->input->cur[1],
11495 ctxt->input->cur[2], ctxt->input->cur[3]);
11496 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11497 "Input is not proper UTF-8, indicate encoding !\n%s",
11498 BAD_CAST buffer, NULL);
11499 }
11500 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011501}
11502
11503/**
Owen Taylor3473f882001-02-23 17:55:21 +000011504 * xmlParseChunk:
11505 * @ctxt: an XML parser context
11506 * @chunk: an char array
11507 * @size: the size in byte of the chunk
11508 * @terminate: last chunk indicator
11509 *
11510 * Parse a Chunk of memory
11511 *
11512 * Returns zero if no error, the xmlParserErrors otherwise.
11513 */
11514int
11515xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11516 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011517 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011518 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011519
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011520 if (ctxt == NULL)
11521 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011522 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011523 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011524 if (ctxt->instate == XML_PARSER_START)
11525 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011526 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11527 (chunk[size - 1] == '\r')) {
11528 end_in_lf = 1;
11529 size--;
11530 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011531
11532xmldecl_done:
11533
Owen Taylor3473f882001-02-23 17:55:21 +000011534 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11535 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11536 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11537 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011538 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011539
11540 /*
11541 * Specific handling if we autodetected an encoding, we should not
11542 * push more than the first line ... which depend on the encoding
11543 * And only push the rest once the final encoding was detected
11544 */
11545 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11546 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11547 int len = 45;
11548
11549 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11550 BAD_CAST "UTF-16")) ||
11551 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11552 BAD_CAST "UTF16")))
11553 len = 90;
11554 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11555 BAD_CAST "UCS-4")) ||
11556 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11557 BAD_CAST "UCS4")))
11558 len = 180;
11559
11560 if (ctxt->input->buf->rawconsumed < len)
11561 len -= ctxt->input->buf->rawconsumed;
11562
11563 remain = size - len;
11564 size = len;
11565 }
William M. Bracka3215c72004-07-31 16:24:01 +000011566 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11567 if (res < 0) {
11568 ctxt->errNo = XML_PARSER_EOF;
11569 ctxt->disableSAX = 1;
11570 return (XML_PARSER_EOF);
11571 }
Owen Taylor3473f882001-02-23 17:55:21 +000011572 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11573 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011574 ctxt->input->end =
11575 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011576#ifdef DEBUG_PUSH
11577 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11578#endif
11579
Owen Taylor3473f882001-02-23 17:55:21 +000011580 } else if (ctxt->instate != XML_PARSER_EOF) {
11581 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11582 xmlParserInputBufferPtr in = ctxt->input->buf;
11583 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11584 (in->raw != NULL)) {
11585 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011586
Owen Taylor3473f882001-02-23 17:55:21 +000011587 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11588 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011589 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011590 xmlGenericError(xmlGenericErrorContext,
11591 "xmlParseChunk: encoder error\n");
11592 return(XML_ERR_INVALID_ENCODING);
11593 }
11594 }
11595 }
11596 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011597 if (remain != 0)
11598 xmlParseTryOrFinish(ctxt, 0);
11599 else
11600 xmlParseTryOrFinish(ctxt, terminate);
11601 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11602 return(ctxt->errNo);
11603
11604 if (remain != 0) {
11605 chunk += size;
11606 size = remain;
11607 remain = 0;
11608 goto xmldecl_done;
11609 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011610 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11611 (ctxt->input->buf != NULL)) {
11612 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11613 }
Owen Taylor3473f882001-02-23 17:55:21 +000011614 if (terminate) {
11615 /*
11616 * Check for termination
11617 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011618 int avail = 0;
11619
11620 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011621 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011622 avail = ctxt->input->length -
11623 (ctxt->input->cur - ctxt->input->base);
11624 else
11625 avail = ctxt->input->buf->buffer->use -
11626 (ctxt->input->cur - ctxt->input->base);
11627 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011628
Owen Taylor3473f882001-02-23 17:55:21 +000011629 if ((ctxt->instate != XML_PARSER_EOF) &&
11630 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011631 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011632 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011633 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011634 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011635 }
Owen Taylor3473f882001-02-23 17:55:21 +000011636 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011637 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011638 ctxt->sax->endDocument(ctxt->userData);
11639 }
11640 ctxt->instate = XML_PARSER_EOF;
11641 }
11642 return((xmlParserErrors) ctxt->errNo);
11643}
11644
11645/************************************************************************
11646 * *
11647 * I/O front end functions to the parser *
11648 * *
11649 ************************************************************************/
11650
11651/**
Owen Taylor3473f882001-02-23 17:55:21 +000011652 * xmlCreatePushParserCtxt:
11653 * @sax: a SAX handler
11654 * @user_data: The user data returned on SAX callbacks
11655 * @chunk: a pointer to an array of chars
11656 * @size: number of chars in the array
11657 * @filename: an optional file name or URI
11658 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011659 * Create a parser context for using the XML parser in push mode.
11660 * If @buffer and @size are non-NULL, the data is used to detect
11661 * the encoding. The remaining characters will be parsed so they
11662 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011663 * To allow content encoding detection, @size should be >= 4
11664 * The value of @filename is used for fetching external entities
11665 * and error/warning reports.
11666 *
11667 * Returns the new parser context or NULL
11668 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011669
Owen Taylor3473f882001-02-23 17:55:21 +000011670xmlParserCtxtPtr
11671xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11672 const char *chunk, int size, const char *filename) {
11673 xmlParserCtxtPtr ctxt;
11674 xmlParserInputPtr inputStream;
11675 xmlParserInputBufferPtr buf;
11676 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11677
11678 /*
11679 * plug some encoding conversion routines
11680 */
11681 if ((chunk != NULL) && (size >= 4))
11682 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11683
11684 buf = xmlAllocParserInputBuffer(enc);
11685 if (buf == NULL) return(NULL);
11686
11687 ctxt = xmlNewParserCtxt();
11688 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011689 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011690 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011691 return(NULL);
11692 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011693 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011694 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11695 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011696 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011697 xmlFreeParserInputBuffer(buf);
11698 xmlFreeParserCtxt(ctxt);
11699 return(NULL);
11700 }
Owen Taylor3473f882001-02-23 17:55:21 +000011701 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011702#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011703 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011704#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011705 xmlFree(ctxt->sax);
11706 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11707 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011708 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011709 xmlFreeParserInputBuffer(buf);
11710 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011711 return(NULL);
11712 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011713 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11714 if (sax->initialized == XML_SAX2_MAGIC)
11715 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11716 else
11717 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011718 if (user_data != NULL)
11719 ctxt->userData = user_data;
11720 }
11721 if (filename == NULL) {
11722 ctxt->directory = NULL;
11723 } else {
11724 ctxt->directory = xmlParserGetDirectory(filename);
11725 }
11726
11727 inputStream = xmlNewInputStream(ctxt);
11728 if (inputStream == NULL) {
11729 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011730 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011731 return(NULL);
11732 }
11733
11734 if (filename == NULL)
11735 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011736 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011737 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011738 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011739 if (inputStream->filename == NULL) {
11740 xmlFreeParserCtxt(ctxt);
11741 xmlFreeParserInputBuffer(buf);
11742 return(NULL);
11743 }
11744 }
Owen Taylor3473f882001-02-23 17:55:21 +000011745 inputStream->buf = buf;
11746 inputStream->base = inputStream->buf->buffer->content;
11747 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011748 inputStream->end =
11749 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011750
11751 inputPush(ctxt, inputStream);
11752
William M. Brack3a1cd212005-02-11 14:35:54 +000011753 /*
11754 * If the caller didn't provide an initial 'chunk' for determining
11755 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11756 * that it can be automatically determined later
11757 */
11758 if ((size == 0) || (chunk == NULL)) {
11759 ctxt->charset = XML_CHAR_ENCODING_NONE;
11760 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011761 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11762 int cur = ctxt->input->cur - ctxt->input->base;
11763
Owen Taylor3473f882001-02-23 17:55:21 +000011764 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011765
11766 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11767 ctxt->input->cur = ctxt->input->base + cur;
11768 ctxt->input->end =
11769 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011770#ifdef DEBUG_PUSH
11771 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11772#endif
11773 }
11774
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011775 if (enc != XML_CHAR_ENCODING_NONE) {
11776 xmlSwitchEncoding(ctxt, enc);
11777 }
11778
Owen Taylor3473f882001-02-23 17:55:21 +000011779 return(ctxt);
11780}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011781#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011782
11783/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011784 * xmlStopParser:
11785 * @ctxt: an XML parser context
11786 *
11787 * Blocks further parser processing
11788 */
11789void
11790xmlStopParser(xmlParserCtxtPtr ctxt) {
11791 if (ctxt == NULL)
11792 return;
11793 ctxt->instate = XML_PARSER_EOF;
11794 ctxt->disableSAX = 1;
11795 if (ctxt->input != NULL) {
11796 ctxt->input->cur = BAD_CAST"";
11797 ctxt->input->base = ctxt->input->cur;
11798 }
11799}
11800
11801/**
Owen Taylor3473f882001-02-23 17:55:21 +000011802 * xmlCreateIOParserCtxt:
11803 * @sax: a SAX handler
11804 * @user_data: The user data returned on SAX callbacks
11805 * @ioread: an I/O read function
11806 * @ioclose: an I/O close function
11807 * @ioctx: an I/O handler
11808 * @enc: the charset encoding if known
11809 *
11810 * Create a parser context for using the XML parser with an existing
11811 * I/O stream
11812 *
11813 * Returns the new parser context or NULL
11814 */
11815xmlParserCtxtPtr
11816xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11817 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11818 void *ioctx, xmlCharEncoding enc) {
11819 xmlParserCtxtPtr ctxt;
11820 xmlParserInputPtr inputStream;
11821 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011822
11823 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011824
11825 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11826 if (buf == NULL) return(NULL);
11827
11828 ctxt = xmlNewParserCtxt();
11829 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011830 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011831 return(NULL);
11832 }
11833 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011834#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011835 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011836#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011837 xmlFree(ctxt->sax);
11838 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11839 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011840 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011841 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011842 return(NULL);
11843 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011844 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11845 if (sax->initialized == XML_SAX2_MAGIC)
11846 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11847 else
11848 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011849 if (user_data != NULL)
11850 ctxt->userData = user_data;
11851 }
11852
11853 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11854 if (inputStream == NULL) {
11855 xmlFreeParserCtxt(ctxt);
11856 return(NULL);
11857 }
11858 inputPush(ctxt, inputStream);
11859
11860 return(ctxt);
11861}
11862
Daniel Veillard4432df22003-09-28 18:58:27 +000011863#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011864/************************************************************************
11865 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011866 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011867 * *
11868 ************************************************************************/
11869
11870/**
11871 * xmlIOParseDTD:
11872 * @sax: the SAX handler block or NULL
11873 * @input: an Input Buffer
11874 * @enc: the charset encoding if known
11875 *
11876 * Load and parse a DTD
11877 *
11878 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011879 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011880 */
11881
11882xmlDtdPtr
11883xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11884 xmlCharEncoding enc) {
11885 xmlDtdPtr ret = NULL;
11886 xmlParserCtxtPtr ctxt;
11887 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011888 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011889
11890 if (input == NULL)
11891 return(NULL);
11892
11893 ctxt = xmlNewParserCtxt();
11894 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011895 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011896 return(NULL);
11897 }
11898
11899 /*
11900 * Set-up the SAX context
11901 */
11902 if (sax != NULL) {
11903 if (ctxt->sax != NULL)
11904 xmlFree(ctxt->sax);
11905 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011906 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011907 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011908 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011909
11910 /*
11911 * generate a parser input from the I/O handler
11912 */
11913
Daniel Veillard43caefb2003-12-07 19:32:22 +000011914 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011915 if (pinput == NULL) {
11916 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011917 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011918 xmlFreeParserCtxt(ctxt);
11919 return(NULL);
11920 }
11921
11922 /*
11923 * plug some encoding conversion routines here.
11924 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011925 if (xmlPushInput(ctxt, pinput) < 0) {
11926 if (sax != NULL) ctxt->sax = NULL;
11927 xmlFreeParserCtxt(ctxt);
11928 return(NULL);
11929 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011930 if (enc != XML_CHAR_ENCODING_NONE) {
11931 xmlSwitchEncoding(ctxt, enc);
11932 }
Owen Taylor3473f882001-02-23 17:55:21 +000011933
11934 pinput->filename = NULL;
11935 pinput->line = 1;
11936 pinput->col = 1;
11937 pinput->base = ctxt->input->cur;
11938 pinput->cur = ctxt->input->cur;
11939 pinput->free = NULL;
11940
11941 /*
11942 * let's parse that entity knowing it's an external subset.
11943 */
11944 ctxt->inSubset = 2;
11945 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011946 if (ctxt->myDoc == NULL) {
11947 xmlErrMemory(ctxt, "New Doc failed");
11948 return(NULL);
11949 }
11950 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011951 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11952 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011953
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011954 if ((enc == XML_CHAR_ENCODING_NONE) &&
11955 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011956 /*
11957 * Get the 4 first bytes and decode the charset
11958 * if enc != XML_CHAR_ENCODING_NONE
11959 * plug some encoding conversion routines.
11960 */
11961 start[0] = RAW;
11962 start[1] = NXT(1);
11963 start[2] = NXT(2);
11964 start[3] = NXT(3);
11965 enc = xmlDetectCharEncoding(start, 4);
11966 if (enc != XML_CHAR_ENCODING_NONE) {
11967 xmlSwitchEncoding(ctxt, enc);
11968 }
11969 }
11970
Owen Taylor3473f882001-02-23 17:55:21 +000011971 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11972
11973 if (ctxt->myDoc != NULL) {
11974 if (ctxt->wellFormed) {
11975 ret = ctxt->myDoc->extSubset;
11976 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011977 if (ret != NULL) {
11978 xmlNodePtr tmp;
11979
11980 ret->doc = NULL;
11981 tmp = ret->children;
11982 while (tmp != NULL) {
11983 tmp->doc = NULL;
11984 tmp = tmp->next;
11985 }
11986 }
Owen Taylor3473f882001-02-23 17:55:21 +000011987 } else {
11988 ret = NULL;
11989 }
11990 xmlFreeDoc(ctxt->myDoc);
11991 ctxt->myDoc = NULL;
11992 }
11993 if (sax != NULL) ctxt->sax = NULL;
11994 xmlFreeParserCtxt(ctxt);
11995
11996 return(ret);
11997}
11998
11999/**
12000 * xmlSAXParseDTD:
12001 * @sax: the SAX handler block
12002 * @ExternalID: a NAME* containing the External ID of the DTD
12003 * @SystemID: a NAME* containing the URL to the DTD
12004 *
12005 * Load and parse an external subset.
12006 *
12007 * Returns the resulting xmlDtdPtr or NULL in case of error.
12008 */
12009
12010xmlDtdPtr
12011xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12012 const xmlChar *SystemID) {
12013 xmlDtdPtr ret = NULL;
12014 xmlParserCtxtPtr ctxt;
12015 xmlParserInputPtr input = NULL;
12016 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012017 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012018
12019 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12020
12021 ctxt = xmlNewParserCtxt();
12022 if (ctxt == NULL) {
12023 return(NULL);
12024 }
12025
12026 /*
12027 * Set-up the SAX context
12028 */
12029 if (sax != NULL) {
12030 if (ctxt->sax != NULL)
12031 xmlFree(ctxt->sax);
12032 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012033 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012034 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012035
12036 /*
12037 * Canonicalise the system ID
12038 */
12039 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012040 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012041 xmlFreeParserCtxt(ctxt);
12042 return(NULL);
12043 }
Owen Taylor3473f882001-02-23 17:55:21 +000012044
12045 /*
12046 * Ask the Entity resolver to load the damn thing
12047 */
12048
12049 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012050 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12051 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012052 if (input == NULL) {
12053 if (sax != NULL) ctxt->sax = NULL;
12054 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012055 if (systemIdCanonic != NULL)
12056 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012057 return(NULL);
12058 }
12059
12060 /*
12061 * plug some encoding conversion routines here.
12062 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012063 if (xmlPushInput(ctxt, input) < 0) {
12064 if (sax != NULL) ctxt->sax = NULL;
12065 xmlFreeParserCtxt(ctxt);
12066 if (systemIdCanonic != NULL)
12067 xmlFree(systemIdCanonic);
12068 return(NULL);
12069 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012070 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12071 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12072 xmlSwitchEncoding(ctxt, enc);
12073 }
Owen Taylor3473f882001-02-23 17:55:21 +000012074
12075 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012076 input->filename = (char *) systemIdCanonic;
12077 else
12078 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012079 input->line = 1;
12080 input->col = 1;
12081 input->base = ctxt->input->cur;
12082 input->cur = ctxt->input->cur;
12083 input->free = NULL;
12084
12085 /*
12086 * let's parse that entity knowing it's an external subset.
12087 */
12088 ctxt->inSubset = 2;
12089 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012090 if (ctxt->myDoc == NULL) {
12091 xmlErrMemory(ctxt, "New Doc failed");
12092 if (sax != NULL) ctxt->sax = NULL;
12093 xmlFreeParserCtxt(ctxt);
12094 return(NULL);
12095 }
12096 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012097 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12098 ExternalID, SystemID);
12099 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12100
12101 if (ctxt->myDoc != NULL) {
12102 if (ctxt->wellFormed) {
12103 ret = ctxt->myDoc->extSubset;
12104 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012105 if (ret != NULL) {
12106 xmlNodePtr tmp;
12107
12108 ret->doc = NULL;
12109 tmp = ret->children;
12110 while (tmp != NULL) {
12111 tmp->doc = NULL;
12112 tmp = tmp->next;
12113 }
12114 }
Owen Taylor3473f882001-02-23 17:55:21 +000012115 } else {
12116 ret = NULL;
12117 }
12118 xmlFreeDoc(ctxt->myDoc);
12119 ctxt->myDoc = NULL;
12120 }
12121 if (sax != NULL) ctxt->sax = NULL;
12122 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012123
Owen Taylor3473f882001-02-23 17:55:21 +000012124 return(ret);
12125}
12126
Daniel Veillard4432df22003-09-28 18:58:27 +000012127
Owen Taylor3473f882001-02-23 17:55:21 +000012128/**
12129 * xmlParseDTD:
12130 * @ExternalID: a NAME* containing the External ID of the DTD
12131 * @SystemID: a NAME* containing the URL to the DTD
12132 *
12133 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012134 *
Owen Taylor3473f882001-02-23 17:55:21 +000012135 * Returns the resulting xmlDtdPtr or NULL in case of error.
12136 */
12137
12138xmlDtdPtr
12139xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12140 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12141}
Daniel Veillard4432df22003-09-28 18:58:27 +000012142#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012143
12144/************************************************************************
12145 * *
12146 * Front ends when parsing an Entity *
12147 * *
12148 ************************************************************************/
12149
12150/**
Owen Taylor3473f882001-02-23 17:55:21 +000012151 * xmlParseCtxtExternalEntity:
12152 * @ctx: the existing parsing context
12153 * @URL: the URL for the entity to load
12154 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012155 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012156 *
12157 * Parse an external general entity within an existing parsing context
12158 * An external general parsed entity is well-formed if it matches the
12159 * production labeled extParsedEnt.
12160 *
12161 * [78] extParsedEnt ::= TextDecl? content
12162 *
12163 * Returns 0 if the entity is well formed, -1 in case of args problem and
12164 * the parser error code otherwise
12165 */
12166
12167int
12168xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012169 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012170 xmlParserCtxtPtr ctxt;
12171 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012172 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012173 xmlSAXHandlerPtr oldsax = NULL;
12174 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012175 xmlChar start[4];
12176 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012177
Daniel Veillardce682bc2004-11-05 17:22:25 +000012178 if (ctx == NULL) return(-1);
12179
Daniel Veillard0161e632008-08-28 15:36:32 +000012180 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12181 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012182 return(XML_ERR_ENTITY_LOOP);
12183 }
12184
Daniel Veillardcda96922001-08-21 10:56:31 +000012185 if (lst != NULL)
12186 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012187 if ((URL == NULL) && (ID == NULL))
12188 return(-1);
12189 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12190 return(-1);
12191
Rob Richards798743a2009-06-19 13:54:25 -040012192 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012193 if (ctxt == NULL) {
12194 return(-1);
12195 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012196
Owen Taylor3473f882001-02-23 17:55:21 +000012197 oldsax = ctxt->sax;
12198 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012199 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012200 newDoc = xmlNewDoc(BAD_CAST "1.0");
12201 if (newDoc == NULL) {
12202 xmlFreeParserCtxt(ctxt);
12203 return(-1);
12204 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012205 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012206 if (ctx->myDoc->dict) {
12207 newDoc->dict = ctx->myDoc->dict;
12208 xmlDictReference(newDoc->dict);
12209 }
Owen Taylor3473f882001-02-23 17:55:21 +000012210 if (ctx->myDoc != NULL) {
12211 newDoc->intSubset = ctx->myDoc->intSubset;
12212 newDoc->extSubset = ctx->myDoc->extSubset;
12213 }
12214 if (ctx->myDoc->URL != NULL) {
12215 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12216 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012217 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12218 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012219 ctxt->sax = oldsax;
12220 xmlFreeParserCtxt(ctxt);
12221 newDoc->intSubset = NULL;
12222 newDoc->extSubset = NULL;
12223 xmlFreeDoc(newDoc);
12224 return(-1);
12225 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012226 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012227 nodePush(ctxt, newDoc->children);
12228 if (ctx->myDoc == NULL) {
12229 ctxt->myDoc = newDoc;
12230 } else {
12231 ctxt->myDoc = ctx->myDoc;
12232 newDoc->children->doc = ctx->myDoc;
12233 }
12234
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012235 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012236 * Get the 4 first bytes and decode the charset
12237 * if enc != XML_CHAR_ENCODING_NONE
12238 * plug some encoding conversion routines.
12239 */
12240 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012241 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12242 start[0] = RAW;
12243 start[1] = NXT(1);
12244 start[2] = NXT(2);
12245 start[3] = NXT(3);
12246 enc = xmlDetectCharEncoding(start, 4);
12247 if (enc != XML_CHAR_ENCODING_NONE) {
12248 xmlSwitchEncoding(ctxt, enc);
12249 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012250 }
12251
Owen Taylor3473f882001-02-23 17:55:21 +000012252 /*
12253 * Parse a possible text declaration first
12254 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012255 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012256 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012257 /*
12258 * An XML-1.0 document can't reference an entity not XML-1.0
12259 */
12260 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12261 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12262 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12263 "Version mismatch between document and entity\n");
12264 }
Owen Taylor3473f882001-02-23 17:55:21 +000012265 }
12266
12267 /*
12268 * Doing validity checking on chunk doesn't make sense
12269 */
12270 ctxt->instate = XML_PARSER_CONTENT;
12271 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012272 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012273 ctxt->loadsubset = ctx->loadsubset;
12274 ctxt->depth = ctx->depth + 1;
12275 ctxt->replaceEntities = ctx->replaceEntities;
12276 if (ctxt->validate) {
12277 ctxt->vctxt.error = ctx->vctxt.error;
12278 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012279 } else {
12280 ctxt->vctxt.error = NULL;
12281 ctxt->vctxt.warning = NULL;
12282 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012283 ctxt->vctxt.nodeTab = NULL;
12284 ctxt->vctxt.nodeNr = 0;
12285 ctxt->vctxt.nodeMax = 0;
12286 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012287 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12288 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012289 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12290 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12291 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012292 ctxt->dictNames = ctx->dictNames;
12293 ctxt->attsDefault = ctx->attsDefault;
12294 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012295 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012296
12297 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012298
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012299 ctx->validate = ctxt->validate;
12300 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012301 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012302 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012303 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012304 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012305 }
12306 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012307 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012308 }
12309
12310 if (!ctxt->wellFormed) {
12311 if (ctxt->errNo == 0)
12312 ret = 1;
12313 else
12314 ret = ctxt->errNo;
12315 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012316 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012317 xmlNodePtr cur;
12318
12319 /*
12320 * Return the newly created nodeset after unlinking it from
12321 * they pseudo parent.
12322 */
12323 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012324 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012325 while (cur != NULL) {
12326 cur->parent = NULL;
12327 cur = cur->next;
12328 }
12329 newDoc->children->children = NULL;
12330 }
12331 ret = 0;
12332 }
12333 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012334 ctxt->dict = NULL;
12335 ctxt->attsDefault = NULL;
12336 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012337 xmlFreeParserCtxt(ctxt);
12338 newDoc->intSubset = NULL;
12339 newDoc->extSubset = NULL;
12340 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012341
Owen Taylor3473f882001-02-23 17:55:21 +000012342 return(ret);
12343}
12344
12345/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012346 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012347 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012348 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012349 * @sax: the SAX handler bloc (possibly NULL)
12350 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12351 * @depth: Used for loop detection, use 0
12352 * @URL: the URL for the entity to load
12353 * @ID: the System ID for the entity to load
12354 * @list: the return value for the set of parsed nodes
12355 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012356 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012357 *
12358 * Returns 0 if the entity is well formed, -1 in case of args problem and
12359 * the parser error code otherwise
12360 */
12361
Daniel Veillard7d515752003-09-26 19:12:37 +000012362static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012363xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12364 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012365 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012366 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012367 xmlParserCtxtPtr ctxt;
12368 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012369 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012370 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012371 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012372 xmlChar start[4];
12373 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012374
Daniel Veillard0161e632008-08-28 15:36:32 +000012375 if (((depth > 40) &&
12376 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12377 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012378 return(XML_ERR_ENTITY_LOOP);
12379 }
12380
Owen Taylor3473f882001-02-23 17:55:21 +000012381 if (list != NULL)
12382 *list = NULL;
12383 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012384 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012385 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012386 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012387
12388
Rob Richards9c0aa472009-03-26 18:10:19 +000012389 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012390 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012391 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012392 if (oldctxt != NULL) {
12393 ctxt->_private = oldctxt->_private;
12394 ctxt->loadsubset = oldctxt->loadsubset;
12395 ctxt->validate = oldctxt->validate;
12396 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012397 ctxt->record_info = oldctxt->record_info;
12398 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12399 ctxt->node_seq.length = oldctxt->node_seq.length;
12400 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012401 } else {
12402 /*
12403 * Doing validity checking on chunk without context
12404 * doesn't make sense
12405 */
12406 ctxt->_private = NULL;
12407 ctxt->validate = 0;
12408 ctxt->external = 2;
12409 ctxt->loadsubset = 0;
12410 }
Owen Taylor3473f882001-02-23 17:55:21 +000012411 if (sax != NULL) {
12412 oldsax = ctxt->sax;
12413 ctxt->sax = sax;
12414 if (user_data != NULL)
12415 ctxt->userData = user_data;
12416 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012417 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012418 newDoc = xmlNewDoc(BAD_CAST "1.0");
12419 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012420 ctxt->node_seq.maximum = 0;
12421 ctxt->node_seq.length = 0;
12422 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012423 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012424 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012425 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012426 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012427 newDoc->intSubset = doc->intSubset;
12428 newDoc->extSubset = doc->extSubset;
12429 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012430 xmlDictReference(newDoc->dict);
12431
Owen Taylor3473f882001-02-23 17:55:21 +000012432 if (doc->URL != NULL) {
12433 newDoc->URL = xmlStrdup(doc->URL);
12434 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012435 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12436 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012437 if (sax != NULL)
12438 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012439 ctxt->node_seq.maximum = 0;
12440 ctxt->node_seq.length = 0;
12441 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012442 xmlFreeParserCtxt(ctxt);
12443 newDoc->intSubset = NULL;
12444 newDoc->extSubset = NULL;
12445 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012446 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012447 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012448 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012449 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012450 ctxt->myDoc = doc;
12451 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012452
Daniel Veillard0161e632008-08-28 15:36:32 +000012453 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012454 * Get the 4 first bytes and decode the charset
12455 * if enc != XML_CHAR_ENCODING_NONE
12456 * plug some encoding conversion routines.
12457 */
12458 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012459 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12460 start[0] = RAW;
12461 start[1] = NXT(1);
12462 start[2] = NXT(2);
12463 start[3] = NXT(3);
12464 enc = xmlDetectCharEncoding(start, 4);
12465 if (enc != XML_CHAR_ENCODING_NONE) {
12466 xmlSwitchEncoding(ctxt, enc);
12467 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012468 }
12469
Owen Taylor3473f882001-02-23 17:55:21 +000012470 /*
12471 * Parse a possible text declaration first
12472 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012473 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012474 xmlParseTextDecl(ctxt);
12475 }
12476
Owen Taylor3473f882001-02-23 17:55:21 +000012477 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012478 ctxt->depth = depth;
12479
12480 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012481
Daniel Veillard561b7f82002-03-20 21:55:57 +000012482 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012483 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012484 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012485 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012486 }
12487 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012488 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012489 }
12490
12491 if (!ctxt->wellFormed) {
12492 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012493 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012494 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012495 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012496 } else {
12497 if (list != NULL) {
12498 xmlNodePtr cur;
12499
12500 /*
12501 * Return the newly created nodeset after unlinking it from
12502 * they pseudo parent.
12503 */
12504 cur = newDoc->children->children;
12505 *list = cur;
12506 while (cur != NULL) {
12507 cur->parent = NULL;
12508 cur = cur->next;
12509 }
12510 newDoc->children->children = NULL;
12511 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012512 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012513 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012514
12515 /*
12516 * Record in the parent context the number of entities replacement
12517 * done when parsing that reference.
12518 */
Daniel Veillard76d36452009-09-07 11:19:33 +020012519 if (oldctxt != NULL)
12520 oldctxt->nbentities += ctxt->nbentities;
12521
Daniel Veillard0161e632008-08-28 15:36:32 +000012522 /*
12523 * Also record the size of the entity parsed
12524 */
12525 if (ctxt->input != NULL) {
12526 oldctxt->sizeentities += ctxt->input->consumed;
12527 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12528 }
12529 /*
12530 * And record the last error if any
12531 */
12532 if (ctxt->lastError.code != XML_ERR_OK)
12533 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12534
Owen Taylor3473f882001-02-23 17:55:21 +000012535 if (sax != NULL)
12536 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012537 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12538 oldctxt->node_seq.length = ctxt->node_seq.length;
12539 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012540 ctxt->node_seq.maximum = 0;
12541 ctxt->node_seq.length = 0;
12542 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012543 xmlFreeParserCtxt(ctxt);
12544 newDoc->intSubset = NULL;
12545 newDoc->extSubset = NULL;
12546 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012547
Owen Taylor3473f882001-02-23 17:55:21 +000012548 return(ret);
12549}
12550
Daniel Veillard81273902003-09-30 00:43:48 +000012551#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012552/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012553 * xmlParseExternalEntity:
12554 * @doc: the document the chunk pertains to
12555 * @sax: the SAX handler bloc (possibly NULL)
12556 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12557 * @depth: Used for loop detection, use 0
12558 * @URL: the URL for the entity to load
12559 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012560 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012561 *
12562 * Parse an external general entity
12563 * An external general parsed entity is well-formed if it matches the
12564 * production labeled extParsedEnt.
12565 *
12566 * [78] extParsedEnt ::= TextDecl? content
12567 *
12568 * Returns 0 if the entity is well formed, -1 in case of args problem and
12569 * the parser error code otherwise
12570 */
12571
12572int
12573xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012574 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012575 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012576 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012577}
12578
12579/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012580 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012581 * @doc: the document the chunk pertains to
12582 * @sax: the SAX handler bloc (possibly NULL)
12583 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12584 * @depth: Used for loop detection, use 0
12585 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012586 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012587 *
12588 * Parse a well-balanced chunk of an XML document
12589 * called by the parser
12590 * The allowed sequence for the Well Balanced Chunk is the one defined by
12591 * the content production in the XML grammar:
12592 *
12593 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12594 *
12595 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12596 * the parser error code otherwise
12597 */
12598
12599int
12600xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012601 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012602 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12603 depth, string, lst, 0 );
12604}
Daniel Veillard81273902003-09-30 00:43:48 +000012605#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012606
12607/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012608 * xmlParseBalancedChunkMemoryInternal:
12609 * @oldctxt: the existing parsing context
12610 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12611 * @user_data: the user data field for the parser context
12612 * @lst: the return value for the set of parsed nodes
12613 *
12614 *
12615 * Parse a well-balanced chunk of an XML document
12616 * called by the parser
12617 * The allowed sequence for the Well Balanced Chunk is the one defined by
12618 * the content production in the XML grammar:
12619 *
12620 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12621 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012622 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12623 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012624 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012625 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012626 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012627 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012628static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012629xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12630 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12631 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012632 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012633 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012634 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012635 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012636 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012637 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012638 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012639#ifdef SAX2
12640 int i;
12641#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012642
Daniel Veillard0161e632008-08-28 15:36:32 +000012643 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12644 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012645 return(XML_ERR_ENTITY_LOOP);
12646 }
12647
12648
12649 if (lst != NULL)
12650 *lst = NULL;
12651 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012652 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012653
12654 size = xmlStrlen(string);
12655
12656 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012657 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012658 if (user_data != NULL)
12659 ctxt->userData = user_data;
12660 else
12661 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012662 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12663 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012664 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12665 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12666 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012667
Daniel Veillard74eaec12009-08-26 15:57:20 +020012668#ifdef SAX2
12669 /* propagate namespaces down the entity */
12670 for (i = 0;i < oldctxt->nsNr;i += 2) {
12671 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12672 }
12673#endif
12674
Daniel Veillard328f48c2002-11-15 15:24:34 +000012675 oldsax = ctxt->sax;
12676 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012677 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012678 ctxt->replaceEntities = oldctxt->replaceEntities;
12679 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012680
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012681 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012682 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012683 newDoc = xmlNewDoc(BAD_CAST "1.0");
12684 if (newDoc == NULL) {
12685 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012686 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012687 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012688 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012689 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012690 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012691 newDoc->dict = ctxt->dict;
12692 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012693 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012694 } else {
12695 ctxt->myDoc = oldctxt->myDoc;
12696 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012697 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012698 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012699 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12700 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012701 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012702 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012703 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012704 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012705 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012706 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012707 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012708 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012709 ctxt->myDoc->children = NULL;
12710 ctxt->myDoc->last = NULL;
12711 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012712 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012713 ctxt->instate = XML_PARSER_CONTENT;
12714 ctxt->depth = oldctxt->depth + 1;
12715
Daniel Veillard328f48c2002-11-15 15:24:34 +000012716 ctxt->validate = 0;
12717 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012718 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12719 /*
12720 * ID/IDREF registration will be done in xmlValidateElement below
12721 */
12722 ctxt->loadsubset |= XML_SKIP_IDS;
12723 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012724 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012725 ctxt->attsDefault = oldctxt->attsDefault;
12726 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012727
Daniel Veillard68e9e742002-11-16 15:35:11 +000012728 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012729 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012730 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012731 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012732 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012733 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012734 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012735 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012736 }
12737
12738 if (!ctxt->wellFormed) {
12739 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012740 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012741 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012742 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012743 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012744 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012745 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012746
William M. Brack7b9154b2003-09-27 19:23:50 +000012747 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012748 xmlNodePtr cur;
12749
12750 /*
12751 * Return the newly created nodeset after unlinking it from
12752 * they pseudo parent.
12753 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012754 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012755 *lst = cur;
12756 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012757#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012758 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12759 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12760 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012761 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12762 oldctxt->myDoc, cur);
12763 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012764#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012765 cur->parent = NULL;
12766 cur = cur->next;
12767 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012768 ctxt->myDoc->children->children = NULL;
12769 }
12770 if (ctxt->myDoc != NULL) {
12771 xmlFreeNode(ctxt->myDoc->children);
12772 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012773 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012774 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012775
12776 /*
12777 * Record in the parent context the number of entities replacement
12778 * done when parsing that reference.
12779 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020012780 if (oldctxt != NULL)
12781 oldctxt->nbentities += ctxt->nbentities;
12782
Daniel Veillard0161e632008-08-28 15:36:32 +000012783 /*
12784 * Also record the last error if any
12785 */
12786 if (ctxt->lastError.code != XML_ERR_OK)
12787 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12788
Daniel Veillard328f48c2002-11-15 15:24:34 +000012789 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012790 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012791 ctxt->attsDefault = NULL;
12792 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012793 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012794 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012795 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012796 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012797
Daniel Veillard328f48c2002-11-15 15:24:34 +000012798 return(ret);
12799}
12800
Daniel Veillard29b17482004-08-16 00:39:03 +000012801/**
12802 * xmlParseInNodeContext:
12803 * @node: the context node
12804 * @data: the input string
12805 * @datalen: the input string length in bytes
12806 * @options: a combination of xmlParserOption
12807 * @lst: the return value for the set of parsed nodes
12808 *
12809 * Parse a well-balanced chunk of an XML document
12810 * within the context (DTD, namespaces, etc ...) of the given node.
12811 *
12812 * The allowed sequence for the data is a Well Balanced Chunk defined by
12813 * the content production in the XML grammar:
12814 *
12815 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12816 *
12817 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12818 * error code otherwise
12819 */
12820xmlParserErrors
12821xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12822 int options, xmlNodePtr *lst) {
12823#ifdef SAX2
12824 xmlParserCtxtPtr ctxt;
12825 xmlDocPtr doc = NULL;
12826 xmlNodePtr fake, cur;
12827 int nsnr = 0;
12828
12829 xmlParserErrors ret = XML_ERR_OK;
12830
12831 /*
12832 * check all input parameters, grab the document
12833 */
12834 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12835 return(XML_ERR_INTERNAL_ERROR);
12836 switch (node->type) {
12837 case XML_ELEMENT_NODE:
12838 case XML_ATTRIBUTE_NODE:
12839 case XML_TEXT_NODE:
12840 case XML_CDATA_SECTION_NODE:
12841 case XML_ENTITY_REF_NODE:
12842 case XML_PI_NODE:
12843 case XML_COMMENT_NODE:
12844 case XML_DOCUMENT_NODE:
12845 case XML_HTML_DOCUMENT_NODE:
12846 break;
12847 default:
12848 return(XML_ERR_INTERNAL_ERROR);
12849
12850 }
12851 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12852 (node->type != XML_DOCUMENT_NODE) &&
12853 (node->type != XML_HTML_DOCUMENT_NODE))
12854 node = node->parent;
12855 if (node == NULL)
12856 return(XML_ERR_INTERNAL_ERROR);
12857 if (node->type == XML_ELEMENT_NODE)
12858 doc = node->doc;
12859 else
12860 doc = (xmlDocPtr) node;
12861 if (doc == NULL)
12862 return(XML_ERR_INTERNAL_ERROR);
12863
12864 /*
12865 * allocate a context and set-up everything not related to the
12866 * node position in the tree
12867 */
12868 if (doc->type == XML_DOCUMENT_NODE)
12869 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12870#ifdef LIBXML_HTML_ENABLED
12871 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12872 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12873#endif
12874 else
12875 return(XML_ERR_INTERNAL_ERROR);
12876
12877 if (ctxt == NULL)
12878 return(XML_ERR_NO_MEMORY);
12879 fake = xmlNewComment(NULL);
12880 if (fake == NULL) {
12881 xmlFreeParserCtxt(ctxt);
12882 return(XML_ERR_NO_MEMORY);
12883 }
12884 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012885
12886 /*
12887 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12888 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12889 * we must wait until the last moment to free the original one.
12890 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012891 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012892 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012893 xmlDictFree(ctxt->dict);
12894 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012895 } else
12896 options |= XML_PARSE_NODICT;
12897
Daniel Veillard37334572008-07-31 08:20:02 +000012898 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012899 xmlDetectSAX2(ctxt);
12900 ctxt->myDoc = doc;
12901
12902 if (node->type == XML_ELEMENT_NODE) {
12903 nodePush(ctxt, node);
12904 /*
12905 * initialize the SAX2 namespaces stack
12906 */
12907 cur = node;
12908 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12909 xmlNsPtr ns = cur->nsDef;
12910 const xmlChar *iprefix, *ihref;
12911
12912 while (ns != NULL) {
12913 if (ctxt->dict) {
12914 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12915 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12916 } else {
12917 iprefix = ns->prefix;
12918 ihref = ns->href;
12919 }
12920
12921 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12922 nsPush(ctxt, iprefix, ihref);
12923 nsnr++;
12924 }
12925 ns = ns->next;
12926 }
12927 cur = cur->parent;
12928 }
12929 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012930 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012931
12932 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12933 /*
12934 * ID/IDREF registration will be done in xmlValidateElement below
12935 */
12936 ctxt->loadsubset |= XML_SKIP_IDS;
12937 }
12938
Daniel Veillard499cc922006-01-18 17:22:35 +000012939#ifdef LIBXML_HTML_ENABLED
12940 if (doc->type == XML_HTML_DOCUMENT_NODE)
12941 __htmlParseContent(ctxt);
12942 else
12943#endif
12944 xmlParseContent(ctxt);
12945
Daniel Veillard29b17482004-08-16 00:39:03 +000012946 nsPop(ctxt, nsnr);
12947 if ((RAW == '<') && (NXT(1) == '/')) {
12948 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12949 } else if (RAW != 0) {
12950 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12951 }
12952 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12953 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12954 ctxt->wellFormed = 0;
12955 }
12956
12957 if (!ctxt->wellFormed) {
12958 if (ctxt->errNo == 0)
12959 ret = XML_ERR_INTERNAL_ERROR;
12960 else
12961 ret = (xmlParserErrors)ctxt->errNo;
12962 } else {
12963 ret = XML_ERR_OK;
12964 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012965
Daniel Veillard29b17482004-08-16 00:39:03 +000012966 /*
12967 * Return the newly created nodeset after unlinking it from
12968 * the pseudo sibling.
12969 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012970
Daniel Veillard29b17482004-08-16 00:39:03 +000012971 cur = fake->next;
12972 fake->next = NULL;
12973 node->last = fake;
12974
12975 if (cur != NULL) {
12976 cur->prev = NULL;
12977 }
12978
12979 *lst = cur;
12980
12981 while (cur != NULL) {
12982 cur->parent = NULL;
12983 cur = cur->next;
12984 }
12985
12986 xmlUnlinkNode(fake);
12987 xmlFreeNode(fake);
12988
12989
12990 if (ret != XML_ERR_OK) {
12991 xmlFreeNodeList(*lst);
12992 *lst = NULL;
12993 }
William M. Brackc3f81342004-10-03 01:22:44 +000012994
William M. Brackb7b54de2004-10-06 16:38:01 +000012995 if (doc->dict != NULL)
12996 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012997 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012998
Daniel Veillard29b17482004-08-16 00:39:03 +000012999 return(ret);
13000#else /* !SAX2 */
13001 return(XML_ERR_INTERNAL_ERROR);
13002#endif
13003}
13004
Daniel Veillard81273902003-09-30 00:43:48 +000013005#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013006/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013007 * xmlParseBalancedChunkMemoryRecover:
13008 * @doc: the document the chunk pertains to
13009 * @sax: the SAX handler bloc (possibly NULL)
13010 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13011 * @depth: Used for loop detection, use 0
13012 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13013 * @lst: the return value for the set of parsed nodes
13014 * @recover: return nodes even if the data is broken (use 0)
13015 *
13016 *
13017 * Parse a well-balanced chunk of an XML document
13018 * called by the parser
13019 * The allowed sequence for the Well Balanced Chunk is the one defined by
13020 * the content production in the XML grammar:
13021 *
13022 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13023 *
13024 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13025 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013026 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013027 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013028 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13029 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013030 */
13031int
13032xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013033 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013034 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013035 xmlParserCtxtPtr ctxt;
13036 xmlDocPtr newDoc;
13037 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013038 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013039 int size;
13040 int ret = 0;
13041
Daniel Veillard0161e632008-08-28 15:36:32 +000013042 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013043 return(XML_ERR_ENTITY_LOOP);
13044 }
13045
13046
Daniel Veillardcda96922001-08-21 10:56:31 +000013047 if (lst != NULL)
13048 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013049 if (string == NULL)
13050 return(-1);
13051
13052 size = xmlStrlen(string);
13053
13054 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13055 if (ctxt == NULL) return(-1);
13056 ctxt->userData = ctxt;
13057 if (sax != NULL) {
13058 oldsax = ctxt->sax;
13059 ctxt->sax = sax;
13060 if (user_data != NULL)
13061 ctxt->userData = user_data;
13062 }
13063 newDoc = xmlNewDoc(BAD_CAST "1.0");
13064 if (newDoc == NULL) {
13065 xmlFreeParserCtxt(ctxt);
13066 return(-1);
13067 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013068 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013069 if ((doc != NULL) && (doc->dict != NULL)) {
13070 xmlDictFree(ctxt->dict);
13071 ctxt->dict = doc->dict;
13072 xmlDictReference(ctxt->dict);
13073 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13074 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13075 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13076 ctxt->dictNames = 1;
13077 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013078 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013079 }
Owen Taylor3473f882001-02-23 17:55:21 +000013080 if (doc != NULL) {
13081 newDoc->intSubset = doc->intSubset;
13082 newDoc->extSubset = doc->extSubset;
13083 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013084 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13085 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013086 if (sax != NULL)
13087 ctxt->sax = oldsax;
13088 xmlFreeParserCtxt(ctxt);
13089 newDoc->intSubset = NULL;
13090 newDoc->extSubset = NULL;
13091 xmlFreeDoc(newDoc);
13092 return(-1);
13093 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013094 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13095 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013096 if (doc == NULL) {
13097 ctxt->myDoc = newDoc;
13098 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013099 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013100 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013101 /* Ensure that doc has XML spec namespace */
13102 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13103 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013104 }
13105 ctxt->instate = XML_PARSER_CONTENT;
13106 ctxt->depth = depth;
13107
13108 /*
13109 * Doing validity checking on chunk doesn't make sense
13110 */
13111 ctxt->validate = 0;
13112 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013113 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013114
Daniel Veillardb39bc392002-10-26 19:29:51 +000013115 if ( doc != NULL ){
13116 content = doc->children;
13117 doc->children = NULL;
13118 xmlParseContent(ctxt);
13119 doc->children = content;
13120 }
13121 else {
13122 xmlParseContent(ctxt);
13123 }
Owen Taylor3473f882001-02-23 17:55:21 +000013124 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013125 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013126 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013127 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013128 }
13129 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013130 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013131 }
13132
13133 if (!ctxt->wellFormed) {
13134 if (ctxt->errNo == 0)
13135 ret = 1;
13136 else
13137 ret = ctxt->errNo;
13138 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013139 ret = 0;
13140 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013141
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013142 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13143 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013144
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013145 /*
13146 * Return the newly created nodeset after unlinking it from
13147 * they pseudo parent.
13148 */
13149 cur = newDoc->children->children;
13150 *lst = cur;
13151 while (cur != NULL) {
13152 xmlSetTreeDoc(cur, doc);
13153 cur->parent = NULL;
13154 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013155 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013156 newDoc->children->children = NULL;
13157 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013158
13159 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013160 ctxt->sax = oldsax;
13161 xmlFreeParserCtxt(ctxt);
13162 newDoc->intSubset = NULL;
13163 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013164 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013165 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013166
Owen Taylor3473f882001-02-23 17:55:21 +000013167 return(ret);
13168}
13169
13170/**
13171 * xmlSAXParseEntity:
13172 * @sax: the SAX handler block
13173 * @filename: the filename
13174 *
13175 * parse an XML external entity out of context and build a tree.
13176 * It use the given SAX function block to handle the parsing callback.
13177 * If sax is NULL, fallback to the default DOM tree building routines.
13178 *
13179 * [78] extParsedEnt ::= TextDecl? content
13180 *
13181 * This correspond to a "Well Balanced" chunk
13182 *
13183 * Returns the resulting document tree
13184 */
13185
13186xmlDocPtr
13187xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13188 xmlDocPtr ret;
13189 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013190
13191 ctxt = xmlCreateFileParserCtxt(filename);
13192 if (ctxt == NULL) {
13193 return(NULL);
13194 }
13195 if (sax != NULL) {
13196 if (ctxt->sax != NULL)
13197 xmlFree(ctxt->sax);
13198 ctxt->sax = sax;
13199 ctxt->userData = NULL;
13200 }
13201
Owen Taylor3473f882001-02-23 17:55:21 +000013202 xmlParseExtParsedEnt(ctxt);
13203
13204 if (ctxt->wellFormed)
13205 ret = ctxt->myDoc;
13206 else {
13207 ret = NULL;
13208 xmlFreeDoc(ctxt->myDoc);
13209 ctxt->myDoc = NULL;
13210 }
13211 if (sax != NULL)
13212 ctxt->sax = NULL;
13213 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013214
Owen Taylor3473f882001-02-23 17:55:21 +000013215 return(ret);
13216}
13217
13218/**
13219 * xmlParseEntity:
13220 * @filename: the filename
13221 *
13222 * parse an XML external entity out of context and build a tree.
13223 *
13224 * [78] extParsedEnt ::= TextDecl? content
13225 *
13226 * This correspond to a "Well Balanced" chunk
13227 *
13228 * Returns the resulting document tree
13229 */
13230
13231xmlDocPtr
13232xmlParseEntity(const char *filename) {
13233 return(xmlSAXParseEntity(NULL, filename));
13234}
Daniel Veillard81273902003-09-30 00:43:48 +000013235#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013236
13237/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013238 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013239 * @URL: the entity URL
13240 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013241 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013242 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013243 *
13244 * Create a parser context for an external entity
13245 * Automatic support for ZLIB/Compress compressed document is provided
13246 * by default if found at compile-time.
13247 *
13248 * Returns the new parser context or NULL
13249 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013250static xmlParserCtxtPtr
13251xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13252 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013253 xmlParserCtxtPtr ctxt;
13254 xmlParserInputPtr inputStream;
13255 char *directory = NULL;
13256 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013257
Owen Taylor3473f882001-02-23 17:55:21 +000013258 ctxt = xmlNewParserCtxt();
13259 if (ctxt == NULL) {
13260 return(NULL);
13261 }
13262
Daniel Veillard48247b42009-07-10 16:12:46 +020013263 if (pctx != NULL) {
13264 ctxt->options = pctx->options;
13265 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013266 }
13267
Owen Taylor3473f882001-02-23 17:55:21 +000013268 uri = xmlBuildURI(URL, base);
13269
13270 if (uri == NULL) {
13271 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13272 if (inputStream == NULL) {
13273 xmlFreeParserCtxt(ctxt);
13274 return(NULL);
13275 }
13276
13277 inputPush(ctxt, inputStream);
13278
13279 if ((ctxt->directory == NULL) && (directory == NULL))
13280 directory = xmlParserGetDirectory((char *)URL);
13281 if ((ctxt->directory == NULL) && (directory != NULL))
13282 ctxt->directory = directory;
13283 } else {
13284 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13285 if (inputStream == NULL) {
13286 xmlFree(uri);
13287 xmlFreeParserCtxt(ctxt);
13288 return(NULL);
13289 }
13290
13291 inputPush(ctxt, inputStream);
13292
13293 if ((ctxt->directory == NULL) && (directory == NULL))
13294 directory = xmlParserGetDirectory((char *)uri);
13295 if ((ctxt->directory == NULL) && (directory != NULL))
13296 ctxt->directory = directory;
13297 xmlFree(uri);
13298 }
Owen Taylor3473f882001-02-23 17:55:21 +000013299 return(ctxt);
13300}
13301
Rob Richards9c0aa472009-03-26 18:10:19 +000013302/**
13303 * xmlCreateEntityParserCtxt:
13304 * @URL: the entity URL
13305 * @ID: the entity PUBLIC ID
13306 * @base: a possible base for the target URI
13307 *
13308 * Create a parser context for an external entity
13309 * Automatic support for ZLIB/Compress compressed document is provided
13310 * by default if found at compile-time.
13311 *
13312 * Returns the new parser context or NULL
13313 */
13314xmlParserCtxtPtr
13315xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13316 const xmlChar *base) {
13317 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13318
13319}
13320
Owen Taylor3473f882001-02-23 17:55:21 +000013321/************************************************************************
13322 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013323 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013324 * *
13325 ************************************************************************/
13326
13327/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013328 * xmlCreateURLParserCtxt:
13329 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013330 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013331 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013332 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013333 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013334 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013335 *
13336 * Returns the new parser context or NULL
13337 */
13338xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013339xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013340{
13341 xmlParserCtxtPtr ctxt;
13342 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013343 char *directory = NULL;
13344
Owen Taylor3473f882001-02-23 17:55:21 +000013345 ctxt = xmlNewParserCtxt();
13346 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013347 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013348 return(NULL);
13349 }
13350
Daniel Veillarddf292f72005-01-16 19:00:15 +000013351 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013352 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013353 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013354
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013355 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013356 if (inputStream == NULL) {
13357 xmlFreeParserCtxt(ctxt);
13358 return(NULL);
13359 }
13360
Owen Taylor3473f882001-02-23 17:55:21 +000013361 inputPush(ctxt, inputStream);
13362 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013363 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013364 if ((ctxt->directory == NULL) && (directory != NULL))
13365 ctxt->directory = directory;
13366
13367 return(ctxt);
13368}
13369
Daniel Veillard61b93382003-11-03 14:28:31 +000013370/**
13371 * xmlCreateFileParserCtxt:
13372 * @filename: the filename
13373 *
13374 * Create a parser context for a file content.
13375 * Automatic support for ZLIB/Compress compressed document is provided
13376 * by default if found at compile-time.
13377 *
13378 * Returns the new parser context or NULL
13379 */
13380xmlParserCtxtPtr
13381xmlCreateFileParserCtxt(const char *filename)
13382{
13383 return(xmlCreateURLParserCtxt(filename, 0));
13384}
13385
Daniel Veillard81273902003-09-30 00:43:48 +000013386#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013387/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013388 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013389 * @sax: the SAX handler block
13390 * @filename: the filename
13391 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13392 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013393 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013394 *
13395 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13396 * compressed document is provided by default if found at compile-time.
13397 * It use the given SAX function block to handle the parsing callback.
13398 * If sax is NULL, fallback to the default DOM tree building routines.
13399 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013400 * User data (void *) is stored within the parser context in the
13401 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013402 *
Owen Taylor3473f882001-02-23 17:55:21 +000013403 * Returns the resulting document tree
13404 */
13405
13406xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013407xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13408 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013409 xmlDocPtr ret;
13410 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013411
Daniel Veillard635ef722001-10-29 11:48:19 +000013412 xmlInitParser();
13413
Owen Taylor3473f882001-02-23 17:55:21 +000013414 ctxt = xmlCreateFileParserCtxt(filename);
13415 if (ctxt == NULL) {
13416 return(NULL);
13417 }
13418 if (sax != NULL) {
13419 if (ctxt->sax != NULL)
13420 xmlFree(ctxt->sax);
13421 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013422 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013423 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013424 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013425 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013426 }
Owen Taylor3473f882001-02-23 17:55:21 +000013427
Daniel Veillard37d2d162008-03-14 10:54:00 +000013428 if (ctxt->directory == NULL)
13429 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013430
Daniel Veillarddad3f682002-11-17 16:47:27 +000013431 ctxt->recovery = recovery;
13432
Owen Taylor3473f882001-02-23 17:55:21 +000013433 xmlParseDocument(ctxt);
13434
William M. Brackc07329e2003-09-08 01:57:30 +000013435 if ((ctxt->wellFormed) || recovery) {
13436 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013437 if (ret != NULL) {
13438 if (ctxt->input->buf->compressed > 0)
13439 ret->compression = 9;
13440 else
13441 ret->compression = ctxt->input->buf->compressed;
13442 }
William M. Brackc07329e2003-09-08 01:57:30 +000013443 }
Owen Taylor3473f882001-02-23 17:55:21 +000013444 else {
13445 ret = NULL;
13446 xmlFreeDoc(ctxt->myDoc);
13447 ctxt->myDoc = NULL;
13448 }
13449 if (sax != NULL)
13450 ctxt->sax = NULL;
13451 xmlFreeParserCtxt(ctxt);
13452
13453 return(ret);
13454}
13455
13456/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013457 * xmlSAXParseFile:
13458 * @sax: the SAX handler block
13459 * @filename: the filename
13460 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13461 * documents
13462 *
13463 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13464 * compressed document is provided by default if found at compile-time.
13465 * It use the given SAX function block to handle the parsing callback.
13466 * If sax is NULL, fallback to the default DOM tree building routines.
13467 *
13468 * Returns the resulting document tree
13469 */
13470
13471xmlDocPtr
13472xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13473 int recovery) {
13474 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13475}
13476
13477/**
Owen Taylor3473f882001-02-23 17:55:21 +000013478 * xmlRecoverDoc:
13479 * @cur: a pointer to an array of xmlChar
13480 *
13481 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013482 * In the case the document is not Well Formed, a attempt to build a
13483 * tree is tried anyway
13484 *
13485 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013486 */
13487
13488xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013489xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013490 return(xmlSAXParseDoc(NULL, cur, 1));
13491}
13492
13493/**
13494 * xmlParseFile:
13495 * @filename: the filename
13496 *
13497 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13498 * compressed document is provided by default if found at compile-time.
13499 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013500 * Returns the resulting document tree if the file was wellformed,
13501 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013502 */
13503
13504xmlDocPtr
13505xmlParseFile(const char *filename) {
13506 return(xmlSAXParseFile(NULL, filename, 0));
13507}
13508
13509/**
13510 * xmlRecoverFile:
13511 * @filename: the filename
13512 *
13513 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13514 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013515 * In the case the document is not Well Formed, it attempts to build
13516 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013517 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013518 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013519 */
13520
13521xmlDocPtr
13522xmlRecoverFile(const char *filename) {
13523 return(xmlSAXParseFile(NULL, filename, 1));
13524}
13525
13526
13527/**
13528 * xmlSetupParserForBuffer:
13529 * @ctxt: an XML parser context
13530 * @buffer: a xmlChar * buffer
13531 * @filename: a file name
13532 *
13533 * Setup the parser context to parse a new buffer; Clears any prior
13534 * contents from the parser context. The buffer parameter must not be
13535 * NULL, but the filename parameter can be
13536 */
13537void
13538xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13539 const char* filename)
13540{
13541 xmlParserInputPtr input;
13542
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013543 if ((ctxt == NULL) || (buffer == NULL))
13544 return;
13545
Owen Taylor3473f882001-02-23 17:55:21 +000013546 input = xmlNewInputStream(ctxt);
13547 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013548 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013549 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013550 return;
13551 }
13552
13553 xmlClearParserCtxt(ctxt);
13554 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013555 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013556 input->base = buffer;
13557 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013558 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013559 inputPush(ctxt, input);
13560}
13561
13562/**
13563 * xmlSAXUserParseFile:
13564 * @sax: a SAX handler
13565 * @user_data: The user data returned on SAX callbacks
13566 * @filename: a file name
13567 *
13568 * parse an XML file and call the given SAX handler routines.
13569 * Automatic support for ZLIB/Compress compressed document is provided
13570 *
13571 * Returns 0 in case of success or a error number otherwise
13572 */
13573int
13574xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13575 const char *filename) {
13576 int ret = 0;
13577 xmlParserCtxtPtr ctxt;
13578
13579 ctxt = xmlCreateFileParserCtxt(filename);
13580 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013581 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013582 xmlFree(ctxt->sax);
13583 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013584 xmlDetectSAX2(ctxt);
13585
Owen Taylor3473f882001-02-23 17:55:21 +000013586 if (user_data != NULL)
13587 ctxt->userData = user_data;
13588
13589 xmlParseDocument(ctxt);
13590
13591 if (ctxt->wellFormed)
13592 ret = 0;
13593 else {
13594 if (ctxt->errNo != 0)
13595 ret = ctxt->errNo;
13596 else
13597 ret = -1;
13598 }
13599 if (sax != NULL)
13600 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013601 if (ctxt->myDoc != NULL) {
13602 xmlFreeDoc(ctxt->myDoc);
13603 ctxt->myDoc = NULL;
13604 }
Owen Taylor3473f882001-02-23 17:55:21 +000013605 xmlFreeParserCtxt(ctxt);
13606
13607 return ret;
13608}
Daniel Veillard81273902003-09-30 00:43:48 +000013609#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013610
13611/************************************************************************
13612 * *
13613 * Front ends when parsing from memory *
13614 * *
13615 ************************************************************************/
13616
13617/**
13618 * xmlCreateMemoryParserCtxt:
13619 * @buffer: a pointer to a char array
13620 * @size: the size of the array
13621 *
13622 * Create a parser context for an XML in-memory document.
13623 *
13624 * Returns the new parser context or NULL
13625 */
13626xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013627xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013628 xmlParserCtxtPtr ctxt;
13629 xmlParserInputPtr input;
13630 xmlParserInputBufferPtr buf;
13631
13632 if (buffer == NULL)
13633 return(NULL);
13634 if (size <= 0)
13635 return(NULL);
13636
13637 ctxt = xmlNewParserCtxt();
13638 if (ctxt == NULL)
13639 return(NULL);
13640
Daniel Veillard53350552003-09-18 13:35:51 +000013641 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013642 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013643 if (buf == NULL) {
13644 xmlFreeParserCtxt(ctxt);
13645 return(NULL);
13646 }
Owen Taylor3473f882001-02-23 17:55:21 +000013647
13648 input = xmlNewInputStream(ctxt);
13649 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013650 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013651 xmlFreeParserCtxt(ctxt);
13652 return(NULL);
13653 }
13654
13655 input->filename = NULL;
13656 input->buf = buf;
13657 input->base = input->buf->buffer->content;
13658 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013659 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013660
13661 inputPush(ctxt, input);
13662 return(ctxt);
13663}
13664
Daniel Veillard81273902003-09-30 00:43:48 +000013665#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013666/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013667 * xmlSAXParseMemoryWithData:
13668 * @sax: the SAX handler block
13669 * @buffer: an pointer to a char array
13670 * @size: the size of the array
13671 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13672 * documents
13673 * @data: the userdata
13674 *
13675 * parse an XML in-memory block and use the given SAX function block
13676 * to handle the parsing callback. If sax is NULL, fallback to the default
13677 * DOM tree building routines.
13678 *
13679 * User data (void *) is stored within the parser context in the
13680 * context's _private member, so it is available nearly everywhere in libxml
13681 *
13682 * Returns the resulting document tree
13683 */
13684
13685xmlDocPtr
13686xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13687 int size, int recovery, void *data) {
13688 xmlDocPtr ret;
13689 xmlParserCtxtPtr ctxt;
13690
Daniel Veillardab2a7632009-07-09 08:45:03 +020013691 xmlInitParser();
13692
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013693 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13694 if (ctxt == NULL) return(NULL);
13695 if (sax != NULL) {
13696 if (ctxt->sax != NULL)
13697 xmlFree(ctxt->sax);
13698 ctxt->sax = sax;
13699 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013700 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013701 if (data!=NULL) {
13702 ctxt->_private=data;
13703 }
13704
Daniel Veillardadba5f12003-04-04 16:09:01 +000013705 ctxt->recovery = recovery;
13706
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013707 xmlParseDocument(ctxt);
13708
13709 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13710 else {
13711 ret = NULL;
13712 xmlFreeDoc(ctxt->myDoc);
13713 ctxt->myDoc = NULL;
13714 }
13715 if (sax != NULL)
13716 ctxt->sax = NULL;
13717 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013718
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013719 return(ret);
13720}
13721
13722/**
Owen Taylor3473f882001-02-23 17:55:21 +000013723 * xmlSAXParseMemory:
13724 * @sax: the SAX handler block
13725 * @buffer: an pointer to a char array
13726 * @size: the size of the array
13727 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13728 * documents
13729 *
13730 * parse an XML in-memory block and use the given SAX function block
13731 * to handle the parsing callback. If sax is NULL, fallback to the default
13732 * DOM tree building routines.
13733 *
13734 * Returns the resulting document tree
13735 */
13736xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013737xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13738 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013739 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013740}
13741
13742/**
13743 * xmlParseMemory:
13744 * @buffer: an pointer to a char array
13745 * @size: the size of the array
13746 *
13747 * parse an XML in-memory block and build a tree.
13748 *
13749 * Returns the resulting document tree
13750 */
13751
Daniel Veillard50822cb2001-07-26 20:05:51 +000013752xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013753 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13754}
13755
13756/**
13757 * xmlRecoverMemory:
13758 * @buffer: an pointer to a char array
13759 * @size: the size of the array
13760 *
13761 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013762 * In the case the document is not Well Formed, an attempt to
13763 * build a tree is tried anyway
13764 *
13765 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013766 */
13767
Daniel Veillard50822cb2001-07-26 20:05:51 +000013768xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013769 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13770}
13771
13772/**
13773 * xmlSAXUserParseMemory:
13774 * @sax: a SAX handler
13775 * @user_data: The user data returned on SAX callbacks
13776 * @buffer: an in-memory XML document input
13777 * @size: the length of the XML document in bytes
13778 *
13779 * A better SAX parsing routine.
13780 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013781 *
Owen Taylor3473f882001-02-23 17:55:21 +000013782 * Returns 0 in case of success or a error number otherwise
13783 */
13784int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013785 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013786 int ret = 0;
13787 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013788
13789 xmlInitParser();
13790
Owen Taylor3473f882001-02-23 17:55:21 +000013791 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13792 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013793 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13794 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013795 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013796 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013797
Daniel Veillard30211a02001-04-26 09:33:18 +000013798 if (user_data != NULL)
13799 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013800
Owen Taylor3473f882001-02-23 17:55:21 +000013801 xmlParseDocument(ctxt);
13802
13803 if (ctxt->wellFormed)
13804 ret = 0;
13805 else {
13806 if (ctxt->errNo != 0)
13807 ret = ctxt->errNo;
13808 else
13809 ret = -1;
13810 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013811 if (sax != NULL)
13812 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013813 if (ctxt->myDoc != NULL) {
13814 xmlFreeDoc(ctxt->myDoc);
13815 ctxt->myDoc = NULL;
13816 }
Owen Taylor3473f882001-02-23 17:55:21 +000013817 xmlFreeParserCtxt(ctxt);
13818
13819 return ret;
13820}
Daniel Veillard81273902003-09-30 00:43:48 +000013821#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013822
13823/**
13824 * xmlCreateDocParserCtxt:
13825 * @cur: a pointer to an array of xmlChar
13826 *
13827 * Creates a parser context for an XML in-memory document.
13828 *
13829 * Returns the new parser context or NULL
13830 */
13831xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013832xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013833 int len;
13834
13835 if (cur == NULL)
13836 return(NULL);
13837 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013838 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013839}
13840
Daniel Veillard81273902003-09-30 00:43:48 +000013841#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013842/**
13843 * xmlSAXParseDoc:
13844 * @sax: the SAX handler block
13845 * @cur: a pointer to an array of xmlChar
13846 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13847 * documents
13848 *
13849 * parse an XML in-memory document and build a tree.
13850 * It use the given SAX function block to handle the parsing callback.
13851 * If sax is NULL, fallback to the default DOM tree building routines.
13852 *
13853 * Returns the resulting document tree
13854 */
13855
13856xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013857xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013858 xmlDocPtr ret;
13859 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013860 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013861
Daniel Veillard38936062004-11-04 17:45:11 +000013862 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013863
13864
13865 ctxt = xmlCreateDocParserCtxt(cur);
13866 if (ctxt == NULL) return(NULL);
13867 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013868 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013869 ctxt->sax = sax;
13870 ctxt->userData = NULL;
13871 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013872 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013873
13874 xmlParseDocument(ctxt);
13875 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13876 else {
13877 ret = NULL;
13878 xmlFreeDoc(ctxt->myDoc);
13879 ctxt->myDoc = NULL;
13880 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013881 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013882 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013883 xmlFreeParserCtxt(ctxt);
13884
13885 return(ret);
13886}
13887
13888/**
13889 * xmlParseDoc:
13890 * @cur: a pointer to an array of xmlChar
13891 *
13892 * parse an XML in-memory document and build a tree.
13893 *
13894 * Returns the resulting document tree
13895 */
13896
13897xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013898xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013899 return(xmlSAXParseDoc(NULL, cur, 0));
13900}
Daniel Veillard81273902003-09-30 00:43:48 +000013901#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013902
Daniel Veillard81273902003-09-30 00:43:48 +000013903#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013904/************************************************************************
13905 * *
13906 * Specific function to keep track of entities references *
13907 * and used by the XSLT debugger *
13908 * *
13909 ************************************************************************/
13910
13911static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13912
13913/**
13914 * xmlAddEntityReference:
13915 * @ent : A valid entity
13916 * @firstNode : A valid first node for children of entity
13917 * @lastNode : A valid last node of children entity
13918 *
13919 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13920 */
13921static void
13922xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13923 xmlNodePtr lastNode)
13924{
13925 if (xmlEntityRefFunc != NULL) {
13926 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13927 }
13928}
13929
13930
13931/**
13932 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013933 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013934 *
13935 * Set the function to call call back when a xml reference has been made
13936 */
13937void
13938xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13939{
13940 xmlEntityRefFunc = func;
13941}
Daniel Veillard81273902003-09-30 00:43:48 +000013942#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013943
13944/************************************************************************
13945 * *
13946 * Miscellaneous *
13947 * *
13948 ************************************************************************/
13949
13950#ifdef LIBXML_XPATH_ENABLED
13951#include <libxml/xpath.h>
13952#endif
13953
Daniel Veillardffa3c742005-07-21 13:24:09 +000013954extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013955static int xmlParserInitialized = 0;
13956
13957/**
13958 * xmlInitParser:
13959 *
13960 * Initialization function for the XML parser.
13961 * This is not reentrant. Call once before processing in case of
13962 * use in multithreaded programs.
13963 */
13964
13965void
13966xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013967 if (xmlParserInitialized != 0)
13968 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013969
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013970#ifdef LIBXML_THREAD_ENABLED
13971 __xmlGlobalInitMutexLock();
13972 if (xmlParserInitialized == 0) {
13973#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020013974 xmlInitGlobals();
13975 xmlInitThreads();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013976 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13977 (xmlGenericError == NULL))
13978 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013979 xmlInitMemory();
13980 xmlInitCharEncodingHandlers();
13981 xmlDefaultSAXHandlerInit();
13982 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013983#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013984 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013985#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013986#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013987 htmlInitAutoClose();
13988 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013989#endif
13990#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013991 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013992#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013993 xmlParserInitialized = 1;
13994#ifdef LIBXML_THREAD_ENABLED
13995 }
13996 __xmlGlobalInitMutexUnlock();
13997#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013998}
13999
14000/**
14001 * xmlCleanupParser:
14002 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014003 * This function name is somewhat misleading. It does not clean up
14004 * parser state, it cleans up memory allocated by the library itself.
14005 * It is a cleanup function for the XML library. It tries to reclaim all
14006 * related global memory allocated for the library processing.
14007 * It doesn't deallocate any document related memory. One should
14008 * call xmlCleanupParser() only when the process has finished using
14009 * the library and all XML/HTML documents built with it.
14010 * See also xmlInitParser() which has the opposite function of preparing
14011 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014012 *
14013 * WARNING: if your application is multithreaded or has plugin support
14014 * calling this may crash the application if another thread or
14015 * a plugin is still using libxml2. It's sometimes very hard to
14016 * guess if libxml2 is in use in the application, some libraries
14017 * or plugins may use it without notice. In case of doubt abstain
14018 * from calling this function or do it just before calling exit()
14019 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014020 */
14021
14022void
14023xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014024 if (!xmlParserInitialized)
14025 return;
14026
Owen Taylor3473f882001-02-23 17:55:21 +000014027 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014028#ifdef LIBXML_CATALOG_ENABLED
14029 xmlCatalogCleanup();
14030#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014031 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014032 xmlCleanupInputCallbacks();
14033#ifdef LIBXML_OUTPUT_ENABLED
14034 xmlCleanupOutputCallbacks();
14035#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014036#ifdef LIBXML_SCHEMAS_ENABLED
14037 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014038 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014039#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014040 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014041 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014042 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014043 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014044 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014045}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014046
14047/************************************************************************
14048 * *
14049 * New set (2.6.0) of simpler and more flexible APIs *
14050 * *
14051 ************************************************************************/
14052
14053/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014054 * DICT_FREE:
14055 * @str: a string
14056 *
14057 * Free a string if it is not owned by the "dict" dictionnary in the
14058 * current scope
14059 */
14060#define DICT_FREE(str) \
14061 if ((str) && ((!dict) || \
14062 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14063 xmlFree((char *)(str));
14064
14065/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014066 * xmlCtxtReset:
14067 * @ctxt: an XML parser context
14068 *
14069 * Reset a parser context
14070 */
14071void
14072xmlCtxtReset(xmlParserCtxtPtr ctxt)
14073{
14074 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014075 xmlDictPtr dict;
14076
14077 if (ctxt == NULL)
14078 return;
14079
14080 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014081
14082 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14083 xmlFreeInputStream(input);
14084 }
14085 ctxt->inputNr = 0;
14086 ctxt->input = NULL;
14087
14088 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014089 if (ctxt->spaceTab != NULL) {
14090 ctxt->spaceTab[0] = -1;
14091 ctxt->space = &ctxt->spaceTab[0];
14092 } else {
14093 ctxt->space = NULL;
14094 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014095
14096
14097 ctxt->nodeNr = 0;
14098 ctxt->node = NULL;
14099
14100 ctxt->nameNr = 0;
14101 ctxt->name = NULL;
14102
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014103 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014104 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014105 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014106 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014107 DICT_FREE(ctxt->directory);
14108 ctxt->directory = NULL;
14109 DICT_FREE(ctxt->extSubURI);
14110 ctxt->extSubURI = NULL;
14111 DICT_FREE(ctxt->extSubSystem);
14112 ctxt->extSubSystem = NULL;
14113 if (ctxt->myDoc != NULL)
14114 xmlFreeDoc(ctxt->myDoc);
14115 ctxt->myDoc = NULL;
14116
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014117 ctxt->standalone = -1;
14118 ctxt->hasExternalSubset = 0;
14119 ctxt->hasPErefs = 0;
14120 ctxt->html = 0;
14121 ctxt->external = 0;
14122 ctxt->instate = XML_PARSER_START;
14123 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014124
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014125 ctxt->wellFormed = 1;
14126 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014127 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014128 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014129#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014130 ctxt->vctxt.userData = ctxt;
14131 ctxt->vctxt.error = xmlParserValidityError;
14132 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014133#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014134 ctxt->record_info = 0;
14135 ctxt->nbChars = 0;
14136 ctxt->checkIndex = 0;
14137 ctxt->inSubset = 0;
14138 ctxt->errNo = XML_ERR_OK;
14139 ctxt->depth = 0;
14140 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14141 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014142 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014143 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014144 xmlInitNodeInfoSeq(&ctxt->node_seq);
14145
14146 if (ctxt->attsDefault != NULL) {
14147 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14148 ctxt->attsDefault = NULL;
14149 }
14150 if (ctxt->attsSpecial != NULL) {
14151 xmlHashFree(ctxt->attsSpecial, NULL);
14152 ctxt->attsSpecial = NULL;
14153 }
14154
Daniel Veillard4432df22003-09-28 18:58:27 +000014155#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014156 if (ctxt->catalogs != NULL)
14157 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014158#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014159 if (ctxt->lastError.code != XML_ERR_OK)
14160 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014161}
14162
14163/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014164 * xmlCtxtResetPush:
14165 * @ctxt: an XML parser context
14166 * @chunk: a pointer to an array of chars
14167 * @size: number of chars in the array
14168 * @filename: an optional file name or URI
14169 * @encoding: the document encoding, or NULL
14170 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014171 * Reset a push parser context
14172 *
14173 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014174 */
14175int
14176xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14177 int size, const char *filename, const char *encoding)
14178{
14179 xmlParserInputPtr inputStream;
14180 xmlParserInputBufferPtr buf;
14181 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14182
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014183 if (ctxt == NULL)
14184 return(1);
14185
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014186 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14187 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14188
14189 buf = xmlAllocParserInputBuffer(enc);
14190 if (buf == NULL)
14191 return(1);
14192
14193 if (ctxt == NULL) {
14194 xmlFreeParserInputBuffer(buf);
14195 return(1);
14196 }
14197
14198 xmlCtxtReset(ctxt);
14199
14200 if (ctxt->pushTab == NULL) {
14201 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14202 sizeof(xmlChar *));
14203 if (ctxt->pushTab == NULL) {
14204 xmlErrMemory(ctxt, NULL);
14205 xmlFreeParserInputBuffer(buf);
14206 return(1);
14207 }
14208 }
14209
14210 if (filename == NULL) {
14211 ctxt->directory = NULL;
14212 } else {
14213 ctxt->directory = xmlParserGetDirectory(filename);
14214 }
14215
14216 inputStream = xmlNewInputStream(ctxt);
14217 if (inputStream == NULL) {
14218 xmlFreeParserInputBuffer(buf);
14219 return(1);
14220 }
14221
14222 if (filename == NULL)
14223 inputStream->filename = NULL;
14224 else
14225 inputStream->filename = (char *)
14226 xmlCanonicPath((const xmlChar *) filename);
14227 inputStream->buf = buf;
14228 inputStream->base = inputStream->buf->buffer->content;
14229 inputStream->cur = inputStream->buf->buffer->content;
14230 inputStream->end =
14231 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14232
14233 inputPush(ctxt, inputStream);
14234
14235 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14236 (ctxt->input->buf != NULL)) {
14237 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14238 int cur = ctxt->input->cur - ctxt->input->base;
14239
14240 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14241
14242 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14243 ctxt->input->cur = ctxt->input->base + cur;
14244 ctxt->input->end =
14245 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14246 use];
14247#ifdef DEBUG_PUSH
14248 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14249#endif
14250 }
14251
14252 if (encoding != NULL) {
14253 xmlCharEncodingHandlerPtr hdlr;
14254
Daniel Veillard37334572008-07-31 08:20:02 +000014255 if (ctxt->encoding != NULL)
14256 xmlFree((xmlChar *) ctxt->encoding);
14257 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14258
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014259 hdlr = xmlFindCharEncodingHandler(encoding);
14260 if (hdlr != NULL) {
14261 xmlSwitchToEncoding(ctxt, hdlr);
14262 } else {
14263 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14264 "Unsupported encoding %s\n", BAD_CAST encoding);
14265 }
14266 } else if (enc != XML_CHAR_ENCODING_NONE) {
14267 xmlSwitchEncoding(ctxt, enc);
14268 }
14269
14270 return(0);
14271}
14272
Daniel Veillard37334572008-07-31 08:20:02 +000014273
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014274/**
Daniel Veillard37334572008-07-31 08:20:02 +000014275 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014276 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014277 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014278 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014279 *
14280 * Applies the options to the parser context
14281 *
14282 * Returns 0 in case of success, the set of unknown or unimplemented options
14283 * in case of error.
14284 */
Daniel Veillard37334572008-07-31 08:20:02 +000014285static int
14286xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014287{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014288 if (ctxt == NULL)
14289 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014290 if (encoding != NULL) {
14291 if (ctxt->encoding != NULL)
14292 xmlFree((xmlChar *) ctxt->encoding);
14293 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14294 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014295 if (options & XML_PARSE_RECOVER) {
14296 ctxt->recovery = 1;
14297 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014298 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014299 } else
14300 ctxt->recovery = 0;
14301 if (options & XML_PARSE_DTDLOAD) {
14302 ctxt->loadsubset = XML_DETECT_IDS;
14303 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014304 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014305 } else
14306 ctxt->loadsubset = 0;
14307 if (options & XML_PARSE_DTDATTR) {
14308 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14309 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014310 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014311 }
14312 if (options & XML_PARSE_NOENT) {
14313 ctxt->replaceEntities = 1;
14314 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14315 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014316 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014317 } else
14318 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014319 if (options & XML_PARSE_PEDANTIC) {
14320 ctxt->pedantic = 1;
14321 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014322 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014323 } else
14324 ctxt->pedantic = 0;
14325 if (options & XML_PARSE_NOBLANKS) {
14326 ctxt->keepBlanks = 0;
14327 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14328 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014329 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014330 } else
14331 ctxt->keepBlanks = 1;
14332 if (options & XML_PARSE_DTDVALID) {
14333 ctxt->validate = 1;
14334 if (options & XML_PARSE_NOWARNING)
14335 ctxt->vctxt.warning = NULL;
14336 if (options & XML_PARSE_NOERROR)
14337 ctxt->vctxt.error = NULL;
14338 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014339 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014340 } else
14341 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014342 if (options & XML_PARSE_NOWARNING) {
14343 ctxt->sax->warning = NULL;
14344 options -= XML_PARSE_NOWARNING;
14345 }
14346 if (options & XML_PARSE_NOERROR) {
14347 ctxt->sax->error = NULL;
14348 ctxt->sax->fatalError = NULL;
14349 options -= XML_PARSE_NOERROR;
14350 }
Daniel Veillard81273902003-09-30 00:43:48 +000014351#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014352 if (options & XML_PARSE_SAX1) {
14353 ctxt->sax->startElement = xmlSAX2StartElement;
14354 ctxt->sax->endElement = xmlSAX2EndElement;
14355 ctxt->sax->startElementNs = NULL;
14356 ctxt->sax->endElementNs = NULL;
14357 ctxt->sax->initialized = 1;
14358 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014359 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014360 }
Daniel Veillard81273902003-09-30 00:43:48 +000014361#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014362 if (options & XML_PARSE_NODICT) {
14363 ctxt->dictNames = 0;
14364 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014365 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014366 } else {
14367 ctxt->dictNames = 1;
14368 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014369 if (options & XML_PARSE_NOCDATA) {
14370 ctxt->sax->cdataBlock = NULL;
14371 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014372 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014373 }
14374 if (options & XML_PARSE_NSCLEAN) {
14375 ctxt->options |= XML_PARSE_NSCLEAN;
14376 options -= XML_PARSE_NSCLEAN;
14377 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014378 if (options & XML_PARSE_NONET) {
14379 ctxt->options |= XML_PARSE_NONET;
14380 options -= XML_PARSE_NONET;
14381 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014382 if (options & XML_PARSE_COMPACT) {
14383 ctxt->options |= XML_PARSE_COMPACT;
14384 options -= XML_PARSE_COMPACT;
14385 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014386 if (options & XML_PARSE_OLD10) {
14387 ctxt->options |= XML_PARSE_OLD10;
14388 options -= XML_PARSE_OLD10;
14389 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014390 if (options & XML_PARSE_NOBASEFIX) {
14391 ctxt->options |= XML_PARSE_NOBASEFIX;
14392 options -= XML_PARSE_NOBASEFIX;
14393 }
14394 if (options & XML_PARSE_HUGE) {
14395 ctxt->options |= XML_PARSE_HUGE;
14396 options -= XML_PARSE_HUGE;
14397 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014398 if (options & XML_PARSE_OLDSAX) {
14399 ctxt->options |= XML_PARSE_OLDSAX;
14400 options -= XML_PARSE_OLDSAX;
14401 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014402 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014403 return (options);
14404}
14405
14406/**
Daniel Veillard37334572008-07-31 08:20:02 +000014407 * xmlCtxtUseOptions:
14408 * @ctxt: an XML parser context
14409 * @options: a combination of xmlParserOption
14410 *
14411 * Applies the options to the parser context
14412 *
14413 * Returns 0 in case of success, the set of unknown or unimplemented options
14414 * in case of error.
14415 */
14416int
14417xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14418{
14419 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14420}
14421
14422/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014423 * xmlDoRead:
14424 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014425 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014426 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014427 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014428 * @reuse: keep the context for reuse
14429 *
14430 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014431 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014432 * Returns the resulting document tree or NULL
14433 */
14434static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014435xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14436 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014437{
14438 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014439
14440 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014441 if (encoding != NULL) {
14442 xmlCharEncodingHandlerPtr hdlr;
14443
14444 hdlr = xmlFindCharEncodingHandler(encoding);
14445 if (hdlr != NULL)
14446 xmlSwitchToEncoding(ctxt, hdlr);
14447 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014448 if ((URL != NULL) && (ctxt->input != NULL) &&
14449 (ctxt->input->filename == NULL))
14450 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014451 xmlParseDocument(ctxt);
14452 if ((ctxt->wellFormed) || ctxt->recovery)
14453 ret = ctxt->myDoc;
14454 else {
14455 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014456 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014457 xmlFreeDoc(ctxt->myDoc);
14458 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014459 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014460 ctxt->myDoc = NULL;
14461 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014462 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014463 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014464
14465 return (ret);
14466}
14467
14468/**
14469 * xmlReadDoc:
14470 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014471 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014472 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014473 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014474 *
14475 * parse an XML in-memory document and build a tree.
14476 *
14477 * Returns the resulting document tree
14478 */
14479xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014480xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014481{
14482 xmlParserCtxtPtr ctxt;
14483
14484 if (cur == NULL)
14485 return (NULL);
14486
14487 ctxt = xmlCreateDocParserCtxt(cur);
14488 if (ctxt == NULL)
14489 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014490 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014491}
14492
14493/**
14494 * xmlReadFile:
14495 * @filename: a file or URL
14496 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014497 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014498 *
14499 * parse an XML file from the filesystem or the network.
14500 *
14501 * Returns the resulting document tree
14502 */
14503xmlDocPtr
14504xmlReadFile(const char *filename, const char *encoding, int options)
14505{
14506 xmlParserCtxtPtr ctxt;
14507
Daniel Veillard61b93382003-11-03 14:28:31 +000014508 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014509 if (ctxt == NULL)
14510 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014511 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014512}
14513
14514/**
14515 * xmlReadMemory:
14516 * @buffer: a pointer to a char array
14517 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014518 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014519 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014520 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014521 *
14522 * parse an XML in-memory document and build a tree.
14523 *
14524 * Returns the resulting document tree
14525 */
14526xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014527xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014528{
14529 xmlParserCtxtPtr ctxt;
14530
14531 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14532 if (ctxt == NULL)
14533 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014534 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014535}
14536
14537/**
14538 * xmlReadFd:
14539 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014540 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014541 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014542 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014543 *
14544 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014545 * NOTE that the file descriptor will not be closed when the
14546 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014547 *
14548 * Returns the resulting document tree
14549 */
14550xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014551xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014552{
14553 xmlParserCtxtPtr ctxt;
14554 xmlParserInputBufferPtr input;
14555 xmlParserInputPtr stream;
14556
14557 if (fd < 0)
14558 return (NULL);
14559
14560 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14561 if (input == NULL)
14562 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014563 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014564 ctxt = xmlNewParserCtxt();
14565 if (ctxt == NULL) {
14566 xmlFreeParserInputBuffer(input);
14567 return (NULL);
14568 }
14569 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14570 if (stream == NULL) {
14571 xmlFreeParserInputBuffer(input);
14572 xmlFreeParserCtxt(ctxt);
14573 return (NULL);
14574 }
14575 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014576 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014577}
14578
14579/**
14580 * xmlReadIO:
14581 * @ioread: an I/O read function
14582 * @ioclose: an I/O close function
14583 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014584 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014585 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014586 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014587 *
14588 * parse an XML document from I/O functions and source and build a tree.
14589 *
14590 * Returns the resulting document tree
14591 */
14592xmlDocPtr
14593xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014594 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014595{
14596 xmlParserCtxtPtr ctxt;
14597 xmlParserInputBufferPtr input;
14598 xmlParserInputPtr stream;
14599
14600 if (ioread == NULL)
14601 return (NULL);
14602
14603 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14604 XML_CHAR_ENCODING_NONE);
14605 if (input == NULL)
14606 return (NULL);
14607 ctxt = xmlNewParserCtxt();
14608 if (ctxt == NULL) {
14609 xmlFreeParserInputBuffer(input);
14610 return (NULL);
14611 }
14612 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14613 if (stream == NULL) {
14614 xmlFreeParserInputBuffer(input);
14615 xmlFreeParserCtxt(ctxt);
14616 return (NULL);
14617 }
14618 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014619 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014620}
14621
14622/**
14623 * xmlCtxtReadDoc:
14624 * @ctxt: an XML parser context
14625 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014626 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014627 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014628 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014629 *
14630 * parse an XML in-memory document and build a tree.
14631 * This reuses the existing @ctxt parser context
14632 *
14633 * Returns the resulting document tree
14634 */
14635xmlDocPtr
14636xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014637 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014638{
14639 xmlParserInputPtr stream;
14640
14641 if (cur == NULL)
14642 return (NULL);
14643 if (ctxt == NULL)
14644 return (NULL);
14645
14646 xmlCtxtReset(ctxt);
14647
14648 stream = xmlNewStringInputStream(ctxt, cur);
14649 if (stream == NULL) {
14650 return (NULL);
14651 }
14652 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014653 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014654}
14655
14656/**
14657 * xmlCtxtReadFile:
14658 * @ctxt: an XML parser context
14659 * @filename: a file or URL
14660 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014661 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014662 *
14663 * parse an XML file from the filesystem or the network.
14664 * This reuses the existing @ctxt parser context
14665 *
14666 * Returns the resulting document tree
14667 */
14668xmlDocPtr
14669xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14670 const char *encoding, int options)
14671{
14672 xmlParserInputPtr stream;
14673
14674 if (filename == NULL)
14675 return (NULL);
14676 if (ctxt == NULL)
14677 return (NULL);
14678
14679 xmlCtxtReset(ctxt);
14680
Daniel Veillard29614c72004-11-26 10:47:26 +000014681 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014682 if (stream == NULL) {
14683 return (NULL);
14684 }
14685 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014686 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014687}
14688
14689/**
14690 * xmlCtxtReadMemory:
14691 * @ctxt: an XML parser context
14692 * @buffer: a pointer to a char array
14693 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014694 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014695 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014696 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014697 *
14698 * parse an XML in-memory document and build a tree.
14699 * This reuses the existing @ctxt parser context
14700 *
14701 * Returns the resulting document tree
14702 */
14703xmlDocPtr
14704xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014705 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014706{
14707 xmlParserInputBufferPtr input;
14708 xmlParserInputPtr stream;
14709
14710 if (ctxt == NULL)
14711 return (NULL);
14712 if (buffer == NULL)
14713 return (NULL);
14714
14715 xmlCtxtReset(ctxt);
14716
14717 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14718 if (input == NULL) {
14719 return(NULL);
14720 }
14721
14722 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14723 if (stream == NULL) {
14724 xmlFreeParserInputBuffer(input);
14725 return(NULL);
14726 }
14727
14728 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014729 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014730}
14731
14732/**
14733 * xmlCtxtReadFd:
14734 * @ctxt: an XML parser context
14735 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014736 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014737 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014738 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014739 *
14740 * parse an XML from a file descriptor and build a tree.
14741 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014742 * NOTE that the file descriptor will not be closed when the
14743 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014744 *
14745 * Returns the resulting document tree
14746 */
14747xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014748xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14749 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014750{
14751 xmlParserInputBufferPtr input;
14752 xmlParserInputPtr stream;
14753
14754 if (fd < 0)
14755 return (NULL);
14756 if (ctxt == NULL)
14757 return (NULL);
14758
14759 xmlCtxtReset(ctxt);
14760
14761
14762 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14763 if (input == NULL)
14764 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014765 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014766 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14767 if (stream == NULL) {
14768 xmlFreeParserInputBuffer(input);
14769 return (NULL);
14770 }
14771 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014772 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014773}
14774
14775/**
14776 * xmlCtxtReadIO:
14777 * @ctxt: an XML parser context
14778 * @ioread: an I/O read function
14779 * @ioclose: an I/O close function
14780 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014781 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014782 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014783 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014784 *
14785 * parse an XML document from I/O functions and source and build a tree.
14786 * This reuses the existing @ctxt parser context
14787 *
14788 * Returns the resulting document tree
14789 */
14790xmlDocPtr
14791xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14792 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014793 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014794 const char *encoding, int options)
14795{
14796 xmlParserInputBufferPtr input;
14797 xmlParserInputPtr stream;
14798
14799 if (ioread == NULL)
14800 return (NULL);
14801 if (ctxt == NULL)
14802 return (NULL);
14803
14804 xmlCtxtReset(ctxt);
14805
14806 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14807 XML_CHAR_ENCODING_NONE);
14808 if (input == NULL)
14809 return (NULL);
14810 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14811 if (stream == NULL) {
14812 xmlFreeParserInputBuffer(input);
14813 return (NULL);
14814 }
14815 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014816 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014817}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014818
14819#define bottom_parser
14820#include "elfgcchack.h"