blob: 533754190d9d74f2b28f67809c3f8753b1a13d30 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
Rob Richards9c0aa472009-03-26 18:10:19 +000086static xmlParserCtxtPtr
87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090/************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96#define XML_PARSER_BIG_ENTITY 1000
97#define XML_PARSER_LOT_ENTITY 5000
98
99/*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105#define XML_PARSER_NON_LINEAR 10
106
107/*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116static int
117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119{
Daniel Veillardcba68392008-08-29 12:43:40 +0000120 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175}
176
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000177/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000178 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000179 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000185unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000186
Daniel Veillard0fb18932003-09-07 09:14:37 +0000187
Daniel Veillard0161e632008-08-28 15:36:32 +0000188
189#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000190#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000191#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
Owen Taylor3473f882001-02-23 17:55:21 +0000194/*
Owen Taylor3473f882001-02-23 17:55:21 +0000195 * List of XML prefixed PI allowed by W3C specs
196 */
197
Daniel Veillardb44025c2001-10-11 22:55:55 +0000198static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000199 "xml-stylesheet",
200 NULL
201};
202
Daniel Veillarda07050d2003-10-19 14:46:32 +0000203
Owen Taylor3473f882001-02-23 17:55:21 +0000204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200205static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000207
Daniel Veillard7d515752003-09-26 19:12:37 +0000208static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000211 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000212 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000213
Daniel Veillard37334572008-07-31 08:20:02 +0000214static int
215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000218static void
219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000222
Daniel Veillard7d515752003-09-26 19:12:37 +0000223static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000227static int
228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230/************************************************************************
231 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244static void
245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247{
Daniel Veillard157fee02003-10-31 10:36:03 +0000248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200253
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000254 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 (const char *) localname, NULL, NULL, 0, 0,
258 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000259 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 (const char *) prefix, (const char *) localname,
263 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
264 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000265 if (ctxt != NULL) {
266 ctxt->wellFormed = 0;
267 if (ctxt->recovery == 0)
268 ctxt->disableSAX = 1;
269 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000270}
271
272/**
273 * xmlFatalErr:
274 * @ctxt: an XML parser context
275 * @error: the error number
276 * @extra: extra information string
277 *
278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
279 */
280static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000282{
283 const char *errmsg;
284
Daniel Veillard157fee02003-10-31 10:36:03 +0000285 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286 (ctxt->instate == XML_PARSER_EOF))
287 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000288 switch (error) {
289 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "CharRef: invalid hexadecimal value\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg = "CharRef: invalid decimal value\n";
294 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000295 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000296 errmsg = "CharRef: invalid value\n";
297 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000298 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000299 errmsg = "internal error";
300 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000301 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000302 errmsg = "PEReference at end of document\n";
303 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000304 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000305 errmsg = "PEReference in prolog\n";
306 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000307 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000308 errmsg = "PEReference in epilog\n";
309 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000310 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000311 errmsg = "PEReference: no name\n";
312 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000313 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000314 errmsg = "PEReference: expecting ';'\n";
315 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000316 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000317 errmsg = "Detected an entity reference loop\n";
318 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000319 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000320 errmsg = "EntityValue: \" or ' expected\n";
321 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000322 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000323 errmsg = "PEReferences forbidden in internal subset\n";
324 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000325 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 errmsg = "EntityValue: \" or ' expected\n";
327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000329 errmsg = "AttValue: \" or ' expected\n";
330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 errmsg = "Unescaped '<' not allowed in attributes values\n";
333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "SystemLiteral \" or ' expected\n";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000338 errmsg = "Unfinished System or Public ID \" or ' expected\n";
339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000341 errmsg = "Sequence ']]>' not allowed in content\n";
342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000344 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000347 errmsg = "PUBLIC, the Public Identifier is missing\n";
348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350 errmsg = "Comment must not contain '--' (double-hyphen)\n";
351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000353 errmsg = "xmlParsePI : no target name\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 errmsg = "Invalid PI name\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 errmsg = "NOTATION: Name expected here\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 errmsg = "'>' required to close NOTATION declaration\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 errmsg = "Entity value required\n";
366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 errmsg = "Fragment not allowed";
369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 errmsg = "'(' required to start ATTLIST enumeration\n";
372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 errmsg = "NmToken expected in ATTLIST enumeration\n";
375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377 errmsg = "')' required to finish ATTLIST enumeration\n";
378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000383 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386 errmsg = "ContentDecl : Name or '(' expected\n";
387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000392 errmsg =
393 "PEReference: forbidden within markup decl in internal subset\n";
394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 errmsg = "expected '>'\n";
397 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000398 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 errmsg = "XML conditional section '[' expected\n";
400 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000401 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 errmsg = "Content error in the external subset\n";
403 break;
404 case XML_ERR_CONDSEC_INVALID_KEYWORD:
405 errmsg =
406 "conditional section INCLUDE or IGNORE keyword expected\n";
407 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000408 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000409 errmsg = "XML conditional section not closed\n";
410 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000411 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 errmsg = "Text declaration '<?xml' required\n";
413 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000414 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000415 errmsg = "parsing XML declaration: '?>' expected\n";
416 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000417 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000418 errmsg = "external parsed entities cannot be standalone\n";
419 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000420 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000421 errmsg = "EntityRef: expecting ';'\n";
422 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000423 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000424 errmsg = "DOCTYPE improperly terminated\n";
425 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000426 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 errmsg = "EndTag: '</' not found\n";
428 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000429 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 errmsg = "expected '='\n";
431 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000432 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 errmsg = "String not closed expecting \" or '\n";
434 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000435 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 errmsg = "String not started expecting ' or \"\n";
437 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000438 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 errmsg = "Invalid XML encoding name\n";
440 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000441 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 errmsg = "standalone accepts only 'yes' or 'no'\n";
443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 errmsg = "Document is empty\n";
446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 errmsg = "Extra content at the end of the document\n";
449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 errmsg = "chunk is not well balanced\n";
452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 errmsg = "extra content at the end of well balanced chunk\n";
455 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000456 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000457 errmsg = "Malformed declaration expecting version\n";
458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000460 case:
461 errmsg = "\n";
462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 default:
465 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000466 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000467 if (ctxt != NULL)
468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
471 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000472 if (ctxt != NULL) {
473 ctxt->wellFormed = 0;
474 if (ctxt->recovery == 0)
475 ctxt->disableSAX = 1;
476 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477}
478
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000479/**
480 * xmlFatalErrMsg:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 *
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
486 */
487static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000490{
Daniel Veillard157fee02003-10-31 10:36:03 +0000491 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492 (ctxt->instate == XML_PARSER_EOF))
493 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000494 if (ctxt != NULL)
495 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200497 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000498 if (ctxt != NULL) {
499 ctxt->wellFormed = 0;
500 if (ctxt->recovery == 0)
501 ctxt->disableSAX = 1;
502 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000503}
504
505/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000506 * xmlWarningMsg:
507 * @ctxt: an XML parser context
508 * @error: the error number
509 * @msg: the error message
510 * @str1: extra data
511 * @str2: extra data
512 *
513 * Handle a warning.
514 */
515static void
516xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar *str1, const xmlChar *str2)
518{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000519 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000520
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000526 schannel = ctxt->sax->serror;
527 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000528 (ctxt->sax) ? ctxt->sax->warning : NULL,
529 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000530 ctxt, NULL, XML_FROM_PARSER, error,
531 XML_ERR_WARNING, NULL, 0,
532 (const char *) str1, (const char *) str2, NULL, 0, 0,
533 msg, (const char *) str1, (const char *) str2);
534}
535
536/**
537 * xmlValidityError:
538 * @ctxt: an XML parser context
539 * @error: the error number
540 * @msg: the error message
541 * @str1: extra data
542 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000543 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000544 */
545static void
546xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000547 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000548{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000549 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000550
551 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
552 (ctxt->instate == XML_PARSER_EOF))
553 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000554 if (ctxt != NULL) {
555 ctxt->errNo = error;
556 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
557 schannel = ctxt->sax->serror;
558 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200559 if (ctxt != NULL) {
560 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000561 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000562 ctxt, NULL, XML_FROM_DTD, error,
563 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000564 (const char *) str2, NULL, 0, 0,
565 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000566 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200567 } else {
568 __xmlRaiseError(schannel, NULL, NULL,
569 ctxt, NULL, XML_FROM_DTD, error,
570 XML_ERR_ERROR, NULL, 0, (const char *) str1,
571 (const char *) str2, NULL, 0, 0,
572 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000574}
575
576/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000577 * xmlFatalErrMsgInt:
578 * @ctxt: an XML parser context
579 * @error: the error number
580 * @msg: the error message
581 * @val: an integer value
582 *
583 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
584 */
585static void
586xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000587 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000588{
Daniel Veillard157fee02003-10-31 10:36:03 +0000589 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
590 (ctxt->instate == XML_PARSER_EOF))
591 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000592 if (ctxt != NULL)
593 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000594 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000595 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
596 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000597 if (ctxt != NULL) {
598 ctxt->wellFormed = 0;
599 if (ctxt->recovery == 0)
600 ctxt->disableSAX = 1;
601 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000602}
603
604/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000605 * xmlFatalErrMsgStrIntStr:
606 * @ctxt: an XML parser context
607 * @error: the error number
608 * @msg: the error message
609 * @str1: an string info
610 * @val: an integer value
611 * @str2: an string info
612 *
613 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
614 */
615static void
616xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
617 const char *msg, const xmlChar *str1, int val,
618 const xmlChar *str2)
619{
Daniel Veillard157fee02003-10-31 10:36:03 +0000620 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
621 (ctxt->instate == XML_PARSER_EOF))
622 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000623 if (ctxt != NULL)
624 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000625 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000626 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
627 NULL, 0, (const char *) str1, (const char *) str2,
628 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000629 if (ctxt != NULL) {
630 ctxt->wellFormed = 0;
631 if (ctxt->recovery == 0)
632 ctxt->disableSAX = 1;
633 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000634}
635
636/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000637 * xmlFatalErrMsgStr:
638 * @ctxt: an XML parser context
639 * @error: the error number
640 * @msg: the error message
641 * @val: a string value
642 *
643 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
644 */
645static void
646xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000647 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000648{
Daniel Veillard157fee02003-10-31 10:36:03 +0000649 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
650 (ctxt->instate == XML_PARSER_EOF))
651 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000652 if (ctxt != NULL)
653 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000654 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000655 XML_FROM_PARSER, error, XML_ERR_FATAL,
656 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
657 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000658 if (ctxt != NULL) {
659 ctxt->wellFormed = 0;
660 if (ctxt->recovery == 0)
661 ctxt->disableSAX = 1;
662 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000663}
664
665/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000666 * xmlErrMsgStr:
667 * @ctxt: an XML parser context
668 * @error: the error number
669 * @msg: the error message
670 * @val: a string value
671 *
672 * Handle a non fatal parser error
673 */
674static void
675xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
676 const char *msg, const xmlChar * val)
677{
Daniel Veillard157fee02003-10-31 10:36:03 +0000678 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
679 (ctxt->instate == XML_PARSER_EOF))
680 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000681 if (ctxt != NULL)
682 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000683 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000684 XML_FROM_PARSER, error, XML_ERR_ERROR,
685 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
686 val);
687}
688
689/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000690 * xmlNsErr:
691 * @ctxt: an XML parser context
692 * @error: the error number
693 * @msg: the message
694 * @info1: extra information string
695 * @info2: extra information string
696 *
697 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
698 */
699static void
700xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
701 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000702 const xmlChar * info1, const xmlChar * info2,
703 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000704{
Daniel Veillard157fee02003-10-31 10:36:03 +0000705 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
706 (ctxt->instate == XML_PARSER_EOF))
707 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000708 if (ctxt != NULL)
709 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000710 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000711 XML_ERR_ERROR, NULL, 0, (const char *) info1,
712 (const char *) info2, (const char *) info3, 0, 0, msg,
713 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000714 if (ctxt != NULL)
715 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000716}
717
Daniel Veillard37334572008-07-31 08:20:02 +0000718/**
719 * xmlNsWarn
720 * @ctxt: an XML parser context
721 * @error: the error number
722 * @msg: the message
723 * @info1: extra information string
724 * @info2: extra information string
725 *
726 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
727 */
728static void
729xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
730 const char *msg,
731 const xmlChar * info1, const xmlChar * info2,
732 const xmlChar * info3)
733{
734 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
735 (ctxt->instate == XML_PARSER_EOF))
736 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000737 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
738 XML_ERR_WARNING, NULL, 0, (const char *) info1,
739 (const char *) info2, (const char *) info3, 0, 0, msg,
740 info1, info2, info3);
741}
742
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000743/************************************************************************
744 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745 * Library wide options *
746 * *
747 ************************************************************************/
748
749/**
750 * xmlHasFeature:
751 * @feature: the feature to be examined
752 *
753 * Examines if the library has been compiled with a given feature.
754 *
755 * Returns a non-zero value if the feature exist, otherwise zero.
756 * Returns zero (0) if the feature does not exist or an unknown
757 * unknown feature is requested, non-zero otherwise.
758 */
759int
760xmlHasFeature(xmlFeature feature)
761{
762 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000763 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000764#ifdef LIBXML_THREAD_ENABLED
765 return(1);
766#else
767 return(0);
768#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000769 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000770#ifdef LIBXML_TREE_ENABLED
771 return(1);
772#else
773 return(0);
774#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000775 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000776#ifdef LIBXML_OUTPUT_ENABLED
777 return(1);
778#else
779 return(0);
780#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000781 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000782#ifdef LIBXML_PUSH_ENABLED
783 return(1);
784#else
785 return(0);
786#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000787 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000788#ifdef LIBXML_READER_ENABLED
789 return(1);
790#else
791 return(0);
792#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000793 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000794#ifdef LIBXML_PATTERN_ENABLED
795 return(1);
796#else
797 return(0);
798#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000799 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000800#ifdef LIBXML_WRITER_ENABLED
801 return(1);
802#else
803 return(0);
804#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000805 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000806#ifdef LIBXML_SAX1_ENABLED
807 return(1);
808#else
809 return(0);
810#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000811 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000812#ifdef LIBXML_FTP_ENABLED
813 return(1);
814#else
815 return(0);
816#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000817 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000818#ifdef LIBXML_HTTP_ENABLED
819 return(1);
820#else
821 return(0);
822#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000823 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000824#ifdef LIBXML_VALID_ENABLED
825 return(1);
826#else
827 return(0);
828#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000829 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000830#ifdef LIBXML_HTML_ENABLED
831 return(1);
832#else
833 return(0);
834#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000835 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000836#ifdef LIBXML_LEGACY_ENABLED
837 return(1);
838#else
839 return(0);
840#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000841 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000842#ifdef LIBXML_C14N_ENABLED
843 return(1);
844#else
845 return(0);
846#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000847 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000848#ifdef LIBXML_CATALOG_ENABLED
849 return(1);
850#else
851 return(0);
852#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000853 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000854#ifdef LIBXML_XPATH_ENABLED
855 return(1);
856#else
857 return(0);
858#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000859 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000860#ifdef LIBXML_XPTR_ENABLED
861 return(1);
862#else
863 return(0);
864#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000865 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000866#ifdef LIBXML_XINCLUDE_ENABLED
867 return(1);
868#else
869 return(0);
870#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000871 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000872#ifdef LIBXML_ICONV_ENABLED
873 return(1);
874#else
875 return(0);
876#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000877 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000878#ifdef LIBXML_ISO8859X_ENABLED
879 return(1);
880#else
881 return(0);
882#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000883 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000884#ifdef LIBXML_UNICODE_ENABLED
885 return(1);
886#else
887 return(0);
888#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000889 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000890#ifdef LIBXML_REGEXP_ENABLED
891 return(1);
892#else
893 return(0);
894#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000895 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000896#ifdef LIBXML_AUTOMATA_ENABLED
897 return(1);
898#else
899 return(0);
900#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000901 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000902#ifdef LIBXML_EXPR_ENABLED
903 return(1);
904#else
905 return(0);
906#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000907 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000908#ifdef LIBXML_SCHEMAS_ENABLED
909 return(1);
910#else
911 return(0);
912#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000913 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000914#ifdef LIBXML_SCHEMATRON_ENABLED
915 return(1);
916#else
917 return(0);
918#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000919 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000920#ifdef LIBXML_MODULES_ENABLED
921 return(1);
922#else
923 return(0);
924#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000925 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000926#ifdef LIBXML_DEBUG_ENABLED
927 return(1);
928#else
929 return(0);
930#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000931 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000932#ifdef DEBUG_MEMORY_LOCATION
933 return(1);
934#else
935 return(0);
936#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000937 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000938#ifdef LIBXML_DEBUG_RUNTIME
939 return(1);
940#else
941 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000942#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000943 case XML_WITH_ZLIB:
944#ifdef LIBXML_ZLIB_ENABLED
945 return(1);
946#else
947 return(0);
948#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000949 default:
950 break;
951 }
952 return(0);
953}
954
955/************************************************************************
956 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000957 * SAX2 defaulted attributes handling *
958 * *
959 ************************************************************************/
960
961/**
962 * xmlDetectSAX2:
963 * @ctxt: an XML parser context
964 *
965 * Do the SAX2 detection and specific intialization
966 */
967static void
968xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
969 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000970#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000971 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
972 ((ctxt->sax->startElementNs != NULL) ||
973 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000974#else
975 ctxt->sax2 = 1;
976#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000977
978 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
979 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
980 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000981 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
982 (ctxt->str_xml_ns == NULL)) {
983 xmlErrMemory(ctxt, NULL);
984 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985}
986
Daniel Veillarde57ec792003-09-10 10:50:59 +0000987typedef struct _xmlDefAttrs xmlDefAttrs;
988typedef xmlDefAttrs *xmlDefAttrsPtr;
989struct _xmlDefAttrs {
990 int nbAttrs; /* number of defaulted attributes on that element */
991 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000992 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000993};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000994
995/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000996 * xmlAttrNormalizeSpace:
997 * @src: the source string
998 * @dst: the target string
999 *
1000 * Normalize the space in non CDATA attribute values:
1001 * If the attribute type is not CDATA, then the XML processor MUST further
1002 * process the normalized attribute value by discarding any leading and
1003 * trailing space (#x20) characters, and by replacing sequences of space
1004 * (#x20) characters by a single space (#x20) character.
1005 * Note that the size of dst need to be at least src, and if one doesn't need
1006 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1007 * passing src as dst is just fine.
1008 *
1009 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1010 * is needed.
1011 */
1012static xmlChar *
1013xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1014{
1015 if ((src == NULL) || (dst == NULL))
1016 return(NULL);
1017
1018 while (*src == 0x20) src++;
1019 while (*src != 0) {
1020 if (*src == 0x20) {
1021 while (*src == 0x20) src++;
1022 if (*src != 0)
1023 *dst++ = 0x20;
1024 } else {
1025 *dst++ = *src++;
1026 }
1027 }
1028 *dst = 0;
1029 if (dst == src)
1030 return(NULL);
1031 return(dst);
1032}
1033
1034/**
1035 * xmlAttrNormalizeSpace2:
1036 * @src: the source string
1037 *
1038 * Normalize the space in non CDATA attribute values, a slightly more complex
1039 * front end to avoid allocation problems when running on attribute values
1040 * coming from the input.
1041 *
1042 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1043 * is needed.
1044 */
1045static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001046xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001047{
1048 int i;
1049 int remove_head = 0;
1050 int need_realloc = 0;
1051 const xmlChar *cur;
1052
1053 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1054 return(NULL);
1055 i = *len;
1056 if (i <= 0)
1057 return(NULL);
1058
1059 cur = src;
1060 while (*cur == 0x20) {
1061 cur++;
1062 remove_head++;
1063 }
1064 while (*cur != 0) {
1065 if (*cur == 0x20) {
1066 cur++;
1067 if ((*cur == 0x20) || (*cur == 0)) {
1068 need_realloc = 1;
1069 break;
1070 }
1071 } else
1072 cur++;
1073 }
1074 if (need_realloc) {
1075 xmlChar *ret;
1076
1077 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1078 if (ret == NULL) {
1079 xmlErrMemory(ctxt, NULL);
1080 return(NULL);
1081 }
1082 xmlAttrNormalizeSpace(ret, ret);
1083 *len = (int) strlen((const char *)ret);
1084 return(ret);
1085 } else if (remove_head) {
1086 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001087 memmove(src, src + remove_head, 1 + *len);
1088 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001089 }
1090 return(NULL);
1091}
1092
1093/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001094 * xmlAddDefAttrs:
1095 * @ctxt: an XML parser context
1096 * @fullname: the element fullname
1097 * @fullattr: the attribute fullname
1098 * @value: the attribute value
1099 *
1100 * Add a defaulted attribute for an element
1101 */
1102static void
1103xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1104 const xmlChar *fullname,
1105 const xmlChar *fullattr,
1106 const xmlChar *value) {
1107 xmlDefAttrsPtr defaults;
1108 int len;
1109 const xmlChar *name;
1110 const xmlChar *prefix;
1111
Daniel Veillard6a31b832008-03-26 14:06:44 +00001112 /*
1113 * Allows to detect attribute redefinitions
1114 */
1115 if (ctxt->attsSpecial != NULL) {
1116 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1117 return;
1118 }
1119
Daniel Veillarde57ec792003-09-10 10:50:59 +00001120 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001121 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122 if (ctxt->attsDefault == NULL)
1123 goto mem_error;
1124 }
1125
1126 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001127 * split the element name into prefix:localname , the string found
1128 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001129 */
1130 name = xmlSplitQName3(fullname, &len);
1131 if (name == NULL) {
1132 name = xmlDictLookup(ctxt->dict, fullname, -1);
1133 prefix = NULL;
1134 } else {
1135 name = xmlDictLookup(ctxt->dict, name, -1);
1136 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1137 }
1138
1139 /*
1140 * make sure there is some storage
1141 */
1142 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1143 if (defaults == NULL) {
1144 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001145 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001146 if (defaults == NULL)
1147 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001149 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001150 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1151 defaults, NULL) < 0) {
1152 xmlFree(defaults);
1153 goto mem_error;
1154 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001155 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001156 xmlDefAttrsPtr temp;
1157
1158 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001159 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001160 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001161 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001162 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001163 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001164 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1165 defaults, NULL) < 0) {
1166 xmlFree(defaults);
1167 goto mem_error;
1168 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001169 }
1170
1171 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001172 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001173 * are within the DTD and hen not associated to namespace names.
1174 */
1175 name = xmlSplitQName3(fullattr, &len);
1176 if (name == NULL) {
1177 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1178 prefix = NULL;
1179 } else {
1180 name = xmlDictLookup(ctxt->dict, name, -1);
1181 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1182 }
1183
Daniel Veillardae0765b2008-07-31 19:54:59 +00001184 defaults->values[5 * defaults->nbAttrs] = name;
1185 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001186 /* intern the string and precompute the end */
1187 len = xmlStrlen(value);
1188 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001189 defaults->values[5 * defaults->nbAttrs + 2] = value;
1190 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1191 if (ctxt->external)
1192 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1193 else
1194 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001195 defaults->nbAttrs++;
1196
1197 return;
1198
1199mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001200 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001201 return;
1202}
1203
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001204/**
1205 * xmlAddSpecialAttr:
1206 * @ctxt: an XML parser context
1207 * @fullname: the element fullname
1208 * @fullattr: the attribute fullname
1209 * @type: the attribute type
1210 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001211 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001212 */
1213static void
1214xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1215 const xmlChar *fullname,
1216 const xmlChar *fullattr,
1217 int type)
1218{
1219 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001220 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001221 if (ctxt->attsSpecial == NULL)
1222 goto mem_error;
1223 }
1224
Daniel Veillardac4118d2008-01-11 05:27:32 +00001225 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1226 return;
1227
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001228 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1229 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001230 return;
1231
1232mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001233 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001234 return;
1235}
1236
Daniel Veillard4432df22003-09-28 18:58:27 +00001237/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001238 * xmlCleanSpecialAttrCallback:
1239 *
1240 * Removes CDATA attributes from the special attribute table
1241 */
1242static void
1243xmlCleanSpecialAttrCallback(void *payload, void *data,
1244 const xmlChar *fullname, const xmlChar *fullattr,
1245 const xmlChar *unused ATTRIBUTE_UNUSED) {
1246 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1247
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001248 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001249 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1250 }
1251}
1252
1253/**
1254 * xmlCleanSpecialAttr:
1255 * @ctxt: an XML parser context
1256 *
1257 * Trim the list of attributes defined to remove all those of type
1258 * CDATA as they are not special. This call should be done when finishing
1259 * to parse the DTD and before starting to parse the document root.
1260 */
1261static void
1262xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1263{
1264 if (ctxt->attsSpecial == NULL)
1265 return;
1266
1267 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1268
1269 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1270 xmlHashFree(ctxt->attsSpecial, NULL);
1271 ctxt->attsSpecial = NULL;
1272 }
1273 return;
1274}
1275
1276/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001277 * xmlCheckLanguageID:
1278 * @lang: pointer to the string value
1279 *
1280 * Checks that the value conforms to the LanguageID production:
1281 *
1282 * NOTE: this is somewhat deprecated, those productions were removed from
1283 * the XML Second edition.
1284 *
1285 * [33] LanguageID ::= Langcode ('-' Subcode)*
1286 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1287 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1288 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1289 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1290 * [38] Subcode ::= ([a-z] | [A-Z])+
1291 *
1292 * Returns 1 if correct 0 otherwise
1293 **/
1294int
1295xmlCheckLanguageID(const xmlChar * lang)
1296{
1297 const xmlChar *cur = lang;
1298
1299 if (cur == NULL)
1300 return (0);
1301 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1302 ((cur[0] == 'I') && (cur[1] == '-'))) {
1303 /*
1304 * IANA code
1305 */
1306 cur += 2;
1307 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1308 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1309 cur++;
1310 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1311 ((cur[0] == 'X') && (cur[1] == '-'))) {
1312 /*
1313 * User code
1314 */
1315 cur += 2;
1316 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1317 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1318 cur++;
1319 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1320 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1321 /*
1322 * ISO639
1323 */
1324 cur++;
1325 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1326 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1327 cur++;
1328 else
1329 return (0);
1330 } else
1331 return (0);
1332 while (cur[0] != 0) { /* non input consuming */
1333 if (cur[0] != '-')
1334 return (0);
1335 cur++;
1336 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1337 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1338 cur++;
1339 else
1340 return (0);
1341 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1342 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1343 cur++;
1344 }
1345 return (1);
1346}
1347
Owen Taylor3473f882001-02-23 17:55:21 +00001348/************************************************************************
1349 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001350 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001351 * *
1352 ************************************************************************/
1353
Daniel Veillard8ed10722009-08-20 19:17:36 +02001354static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1355 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001356
Daniel Veillard0fb18932003-09-07 09:14:37 +00001357#ifdef SAX2
1358/**
1359 * nsPush:
1360 * @ctxt: an XML parser context
1361 * @prefix: the namespace prefix or NULL
1362 * @URL: the namespace name
1363 *
1364 * Pushes a new parser namespace on top of the ns stack
1365 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001366 * Returns -1 in case of error, -2 if the namespace should be discarded
1367 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001368 */
1369static int
1370nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1371{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001372 if (ctxt->options & XML_PARSE_NSCLEAN) {
1373 int i;
1374 for (i = 0;i < ctxt->nsNr;i += 2) {
1375 if (ctxt->nsTab[i] == prefix) {
1376 /* in scope */
1377 if (ctxt->nsTab[i + 1] == URL)
1378 return(-2);
1379 /* out of scope keep it */
1380 break;
1381 }
1382 }
1383 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001384 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1385 ctxt->nsMax = 10;
1386 ctxt->nsNr = 0;
1387 ctxt->nsTab = (const xmlChar **)
1388 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1389 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001390 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001391 ctxt->nsMax = 0;
1392 return (-1);
1393 }
1394 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001395 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001396 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001397 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1398 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1399 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001400 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001401 ctxt->nsMax /= 2;
1402 return (-1);
1403 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001404 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001405 }
1406 ctxt->nsTab[ctxt->nsNr++] = prefix;
1407 ctxt->nsTab[ctxt->nsNr++] = URL;
1408 return (ctxt->nsNr);
1409}
1410/**
1411 * nsPop:
1412 * @ctxt: an XML parser context
1413 * @nr: the number to pop
1414 *
1415 * Pops the top @nr parser prefix/namespace from the ns stack
1416 *
1417 * Returns the number of namespaces removed
1418 */
1419static int
1420nsPop(xmlParserCtxtPtr ctxt, int nr)
1421{
1422 int i;
1423
1424 if (ctxt->nsTab == NULL) return(0);
1425 if (ctxt->nsNr < nr) {
1426 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1427 nr = ctxt->nsNr;
1428 }
1429 if (ctxt->nsNr <= 0)
1430 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001431
Daniel Veillard0fb18932003-09-07 09:14:37 +00001432 for (i = 0;i < nr;i++) {
1433 ctxt->nsNr--;
1434 ctxt->nsTab[ctxt->nsNr] = NULL;
1435 }
1436 return(nr);
1437}
1438#endif
1439
1440static int
1441xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1442 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001443 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001444 int maxatts;
1445
1446 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001447 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001448 atts = (const xmlChar **)
1449 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001450 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001451 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001452 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1453 if (attallocs == NULL) goto mem_error;
1454 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001455 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001456 } else if (nr + 5 > ctxt->maxatts) {
1457 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001458 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1459 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001460 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001461 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001462 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1463 (maxatts / 5) * sizeof(int));
1464 if (attallocs == NULL) goto mem_error;
1465 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001466 ctxt->maxatts = maxatts;
1467 }
1468 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001469mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001470 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001471 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001472}
1473
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001474/**
1475 * inputPush:
1476 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001477 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001478 *
1479 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001480 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001481 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001482 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001483int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001484inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1485{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001486 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001487 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001488 if (ctxt->inputNr >= ctxt->inputMax) {
1489 ctxt->inputMax *= 2;
1490 ctxt->inputTab =
1491 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1492 ctxt->inputMax *
1493 sizeof(ctxt->inputTab[0]));
1494 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001495 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001496 xmlFreeInputStream(value);
1497 ctxt->inputMax /= 2;
1498 value = NULL;
1499 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001500 }
1501 }
1502 ctxt->inputTab[ctxt->inputNr] = value;
1503 ctxt->input = value;
1504 return (ctxt->inputNr++);
1505}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001506/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001507 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001508 * @ctxt: an XML parser context
1509 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001510 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001511 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001512 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001513 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001514xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001515inputPop(xmlParserCtxtPtr ctxt)
1516{
1517 xmlParserInputPtr ret;
1518
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001519 if (ctxt == NULL)
1520 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001521 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001522 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001523 ctxt->inputNr--;
1524 if (ctxt->inputNr > 0)
1525 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1526 else
1527 ctxt->input = NULL;
1528 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001529 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001530 return (ret);
1531}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001532/**
1533 * nodePush:
1534 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001535 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001536 *
1537 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001538 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001539 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001540 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001541int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001542nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1543{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001544 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001545 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001546 xmlNodePtr *tmp;
1547
1548 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1549 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001550 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001551 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001552 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001553 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001554 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001555 ctxt->nodeTab = tmp;
1556 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001557 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001558 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1559 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001560 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001561 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001562 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001563 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001564 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001565 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001566 ctxt->nodeTab[ctxt->nodeNr] = value;
1567 ctxt->node = value;
1568 return (ctxt->nodeNr++);
1569}
Daniel Veillard8915c152008-08-26 13:05:34 +00001570
Daniel Veillard1c732d22002-11-30 11:22:59 +00001571/**
1572 * nodePop:
1573 * @ctxt: an XML parser context
1574 *
1575 * Pops the top element node from the node stack
1576 *
1577 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001578 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001579xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001580nodePop(xmlParserCtxtPtr ctxt)
1581{
1582 xmlNodePtr ret;
1583
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001584 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001585 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001586 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001587 ctxt->nodeNr--;
1588 if (ctxt->nodeNr > 0)
1589 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1590 else
1591 ctxt->node = NULL;
1592 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001593 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001594 return (ret);
1595}
Daniel Veillarda2351322004-06-27 12:08:10 +00001596
1597#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001598/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001599 * nameNsPush:
1600 * @ctxt: an XML parser context
1601 * @value: the element name
1602 * @prefix: the element prefix
1603 * @URI: the element namespace name
1604 *
1605 * Pushes a new element name/prefix/URL on top of the name stack
1606 *
1607 * Returns -1 in case of error, the index in the stack otherwise
1608 */
1609static int
1610nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1611 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1612{
1613 if (ctxt->nameNr >= ctxt->nameMax) {
1614 const xmlChar * *tmp;
1615 void **tmp2;
1616 ctxt->nameMax *= 2;
1617 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1618 ctxt->nameMax *
1619 sizeof(ctxt->nameTab[0]));
1620 if (tmp == NULL) {
1621 ctxt->nameMax /= 2;
1622 goto mem_error;
1623 }
1624 ctxt->nameTab = tmp;
1625 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1626 ctxt->nameMax * 3 *
1627 sizeof(ctxt->pushTab[0]));
1628 if (tmp2 == NULL) {
1629 ctxt->nameMax /= 2;
1630 goto mem_error;
1631 }
1632 ctxt->pushTab = tmp2;
1633 }
1634 ctxt->nameTab[ctxt->nameNr] = value;
1635 ctxt->name = value;
1636 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1637 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001638 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001639 return (ctxt->nameNr++);
1640mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001641 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001642 return (-1);
1643}
1644/**
1645 * nameNsPop:
1646 * @ctxt: an XML parser context
1647 *
1648 * Pops the top element/prefix/URI name from the name stack
1649 *
1650 * Returns the name just removed
1651 */
1652static const xmlChar *
1653nameNsPop(xmlParserCtxtPtr ctxt)
1654{
1655 const xmlChar *ret;
1656
1657 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001658 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001659 ctxt->nameNr--;
1660 if (ctxt->nameNr > 0)
1661 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1662 else
1663 ctxt->name = NULL;
1664 ret = ctxt->nameTab[ctxt->nameNr];
1665 ctxt->nameTab[ctxt->nameNr] = NULL;
1666 return (ret);
1667}
Daniel Veillarda2351322004-06-27 12:08:10 +00001668#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001669
1670/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001671 * namePush:
1672 * @ctxt: an XML parser context
1673 * @value: the element name
1674 *
1675 * Pushes a new element name on top of the name stack
1676 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001677 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001678 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001679int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001680namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001681{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001682 if (ctxt == NULL) return (-1);
1683
Daniel Veillard1c732d22002-11-30 11:22:59 +00001684 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001685 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001687 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001688 ctxt->nameMax *
1689 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001690 if (tmp == NULL) {
1691 ctxt->nameMax /= 2;
1692 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001693 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001694 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695 }
1696 ctxt->nameTab[ctxt->nameNr] = value;
1697 ctxt->name = value;
1698 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001699mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001700 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001701 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001702}
1703/**
1704 * namePop:
1705 * @ctxt: an XML parser context
1706 *
1707 * Pops the top element name from the name stack
1708 *
1709 * Returns the name just removed
1710 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001711const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001712namePop(xmlParserCtxtPtr ctxt)
1713{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001714 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001716 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1717 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001718 ctxt->nameNr--;
1719 if (ctxt->nameNr > 0)
1720 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1721 else
1722 ctxt->name = NULL;
1723 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001724 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001725 return (ret);
1726}
Owen Taylor3473f882001-02-23 17:55:21 +00001727
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001728static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001729 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001730 int *tmp;
1731
Owen Taylor3473f882001-02-23 17:55:21 +00001732 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001733 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1734 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1735 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001736 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001737 ctxt->spaceMax /=2;
1738 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001739 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001740 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001741 }
1742 ctxt->spaceTab[ctxt->spaceNr] = val;
1743 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1744 return(ctxt->spaceNr++);
1745}
1746
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001747static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001748 int ret;
1749 if (ctxt->spaceNr <= 0) return(0);
1750 ctxt->spaceNr--;
1751 if (ctxt->spaceNr > 0)
1752 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1753 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001754 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001755 ret = ctxt->spaceTab[ctxt->spaceNr];
1756 ctxt->spaceTab[ctxt->spaceNr] = -1;
1757 return(ret);
1758}
1759
1760/*
1761 * Macros for accessing the content. Those should be used only by the parser,
1762 * and not exported.
1763 *
1764 * Dirty macros, i.e. one often need to make assumption on the context to
1765 * use them
1766 *
1767 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1768 * To be used with extreme caution since operations consuming
1769 * characters may move the input buffer to a different location !
1770 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1771 * This should be used internally by the parser
1772 * only to compare to ASCII values otherwise it would break when
1773 * running with UTF-8 encoding.
1774 * RAW same as CUR but in the input buffer, bypass any token
1775 * extraction that may have been done
1776 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1777 * to compare on ASCII based substring.
1778 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 * strings without newlines within the parser.
1780 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1781 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001782 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1783 *
1784 * NEXT Skip to the next character, this does the proper decoding
1785 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001786 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001787 * CUR_CHAR(l) returns the current unicode character (int), set l
1788 * to the number of xmlChars used for the encoding [0-5].
1789 * CUR_SCHAR same but operate on a string instead of the context
1790 * COPY_BUF copy the current unicode char to the target buffer, increment
1791 * the index
1792 * GROW, SHRINK handling of input buffers
1793 */
1794
Daniel Veillardfdc91562002-07-01 21:52:03 +00001795#define RAW (*ctxt->input->cur)
1796#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001797#define NXT(val) ctxt->input->cur[(val)]
1798#define CUR_PTR ctxt->input->cur
1799
Daniel Veillarda07050d2003-10-19 14:46:32 +00001800#define CMP4( s, c1, c2, c3, c4 ) \
1801 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1802 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1803#define CMP5( s, c1, c2, c3, c4, c5 ) \
1804 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1805#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1806 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1807#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1808 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1809#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1810 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1811#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1812 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1813 ((unsigned char *) s)[ 8 ] == c9 )
1814#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1815 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1816 ((unsigned char *) s)[ 9 ] == c10 )
1817
Owen Taylor3473f882001-02-23 17:55:21 +00001818#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001819 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001820 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001821 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001822 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1823 xmlPopInput(ctxt); \
1824 } while (0)
1825
Daniel Veillard0b787f32004-03-26 17:29:53 +00001826#define SKIPL(val) do { \
1827 int skipl; \
1828 for(skipl=0; skipl<val; skipl++) { \
1829 if (*(ctxt->input->cur) == '\n') { \
1830 ctxt->input->line++; ctxt->input->col = 1; \
1831 } else ctxt->input->col++; \
1832 ctxt->nbChars++; \
1833 ctxt->input->cur++; \
1834 } \
1835 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1836 if ((*ctxt->input->cur == 0) && \
1837 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1838 xmlPopInput(ctxt); \
1839 } while (0)
1840
Daniel Veillarda880b122003-04-21 21:36:41 +00001841#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001842 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1843 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001844 xmlSHRINK (ctxt);
1845
1846static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1847 xmlParserInputShrink(ctxt->input);
1848 if ((*ctxt->input->cur == 0) &&
1849 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1850 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001851 }
Owen Taylor3473f882001-02-23 17:55:21 +00001852
Daniel Veillarda880b122003-04-21 21:36:41 +00001853#define GROW if ((ctxt->progressive == 0) && \
1854 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001855 xmlGROW (ctxt);
1856
1857static void xmlGROW (xmlParserCtxtPtr ctxt) {
1858 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1859 if ((*ctxt->input->cur == 0) &&
1860 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1861 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001862}
Owen Taylor3473f882001-02-23 17:55:21 +00001863
1864#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1865
1866#define NEXT xmlNextChar(ctxt)
1867
Daniel Veillard21a0f912001-02-25 19:54:14 +00001868#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001869 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001870 ctxt->input->cur++; \
1871 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001872 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001873 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1874 }
1875
Owen Taylor3473f882001-02-23 17:55:21 +00001876#define NEXTL(l) do { \
1877 if (*(ctxt->input->cur) == '\n') { \
1878 ctxt->input->line++; ctxt->input->col = 1; \
1879 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001880 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001881 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001882 } while (0)
1883
1884#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1885#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1886
1887#define COPY_BUF(l,b,i,v) \
1888 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001889 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001890
1891/**
1892 * xmlSkipBlankChars:
1893 * @ctxt: the XML parser context
1894 *
1895 * skip all blanks character found at that point in the input streams.
1896 * It pops up finished entities in the process if allowable at that point.
1897 *
1898 * Returns the number of space chars skipped
1899 */
1900
1901int
1902xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001903 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001904
1905 /*
1906 * It's Okay to use CUR/NEXT here since all the blanks are on
1907 * the ASCII range.
1908 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001909 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1910 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001911 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001912 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001913 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001914 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001915 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001916 if (*cur == '\n') {
1917 ctxt->input->line++; ctxt->input->col = 1;
1918 }
1919 cur++;
1920 res++;
1921 if (*cur == 0) {
1922 ctxt->input->cur = cur;
1923 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1924 cur = ctxt->input->cur;
1925 }
1926 }
1927 ctxt->input->cur = cur;
1928 } else {
1929 int cur;
1930 do {
1931 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001932 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001933 NEXT;
1934 cur = CUR;
1935 res++;
1936 }
1937 while ((cur == 0) && (ctxt->inputNr > 1) &&
1938 (ctxt->instate != XML_PARSER_COMMENT)) {
1939 xmlPopInput(ctxt);
1940 cur = CUR;
1941 }
1942 /*
1943 * Need to handle support of entities branching here
1944 */
1945 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1946 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1947 }
Owen Taylor3473f882001-02-23 17:55:21 +00001948 return(res);
1949}
1950
1951/************************************************************************
1952 * *
1953 * Commodity functions to handle entities *
1954 * *
1955 ************************************************************************/
1956
1957/**
1958 * xmlPopInput:
1959 * @ctxt: an XML parser context
1960 *
1961 * xmlPopInput: the current input pointed by ctxt->input came to an end
1962 * pop it and return the next char.
1963 *
1964 * Returns the current xmlChar in the parser context
1965 */
1966xmlChar
1967xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001968 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001969 if (xmlParserDebugEntities)
1970 xmlGenericError(xmlGenericErrorContext,
1971 "Popping input %d\n", ctxt->inputNr);
1972 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001973 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001974 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1975 return(xmlPopInput(ctxt));
1976 return(CUR);
1977}
1978
1979/**
1980 * xmlPushInput:
1981 * @ctxt: an XML parser context
1982 * @input: an XML parser input fragment (entity, XML fragment ...).
1983 *
1984 * xmlPushInput: switch to a new input stream which is stacked on top
1985 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001986 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001987 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001988int
Owen Taylor3473f882001-02-23 17:55:21 +00001989xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001990 int ret;
1991 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001992
1993 if (xmlParserDebugEntities) {
1994 if ((ctxt->input != NULL) && (ctxt->input->filename))
1995 xmlGenericError(xmlGenericErrorContext,
1996 "%s(%d): ", ctxt->input->filename,
1997 ctxt->input->line);
1998 xmlGenericError(xmlGenericErrorContext,
1999 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2000 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002001 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002002 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002003 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002004}
2005
2006/**
2007 * xmlParseCharRef:
2008 * @ctxt: an XML parser context
2009 *
2010 * parse Reference declarations
2011 *
2012 * [66] CharRef ::= '&#' [0-9]+ ';' |
2013 * '&#x' [0-9a-fA-F]+ ';'
2014 *
2015 * [ WFC: Legal Character ]
2016 * Characters referred to using character references must match the
2017 * production for Char.
2018 *
2019 * Returns the value parsed (as an int), 0 in case of error
2020 */
2021int
2022xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002023 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002024 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002025 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002026
Owen Taylor3473f882001-02-23 17:55:21 +00002027 /*
2028 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2029 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002030 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002031 (NXT(2) == 'x')) {
2032 SKIP(3);
2033 GROW;
2034 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002035 if (count++ > 20) {
2036 count = 0;
2037 GROW;
2038 }
2039 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002040 val = val * 16 + (CUR - '0');
2041 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2042 val = val * 16 + (CUR - 'a') + 10;
2043 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2044 val = val * 16 + (CUR - 'A') + 10;
2045 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002046 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002047 val = 0;
2048 break;
2049 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002050 if (val > 0x10FFFF)
2051 outofrange = val;
2052
Owen Taylor3473f882001-02-23 17:55:21 +00002053 NEXT;
2054 count++;
2055 }
2056 if (RAW == ';') {
2057 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002058 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002059 ctxt->nbChars ++;
2060 ctxt->input->cur++;
2061 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002062 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002063 SKIP(2);
2064 GROW;
2065 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002066 if (count++ > 20) {
2067 count = 0;
2068 GROW;
2069 }
2070 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002071 val = val * 10 + (CUR - '0');
2072 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002073 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002074 val = 0;
2075 break;
2076 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002077 if (val > 0x10FFFF)
2078 outofrange = val;
2079
Owen Taylor3473f882001-02-23 17:55:21 +00002080 NEXT;
2081 count++;
2082 }
2083 if (RAW == ';') {
2084 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002085 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002086 ctxt->nbChars ++;
2087 ctxt->input->cur++;
2088 }
2089 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002090 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002091 }
2092
2093 /*
2094 * [ WFC: Legal Character ]
2095 * Characters referred to using character references must match the
2096 * production for Char.
2097 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002098 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002099 return(val);
2100 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002101 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2102 "xmlParseCharRef: invalid xmlChar value %d\n",
2103 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002104 }
2105 return(0);
2106}
2107
2108/**
2109 * xmlParseStringCharRef:
2110 * @ctxt: an XML parser context
2111 * @str: a pointer to an index in the string
2112 *
2113 * parse Reference declarations, variant parsing from a string rather
2114 * than an an input flow.
2115 *
2116 * [66] CharRef ::= '&#' [0-9]+ ';' |
2117 * '&#x' [0-9a-fA-F]+ ';'
2118 *
2119 * [ WFC: Legal Character ]
2120 * Characters referred to using character references must match the
2121 * production for Char.
2122 *
2123 * Returns the value parsed (as an int), 0 in case of error, str will be
2124 * updated to the current value of the index
2125 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002126static int
Owen Taylor3473f882001-02-23 17:55:21 +00002127xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2128 const xmlChar *ptr;
2129 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002130 unsigned int val = 0;
2131 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002132
2133 if ((str == NULL) || (*str == NULL)) return(0);
2134 ptr = *str;
2135 cur = *ptr;
2136 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2137 ptr += 3;
2138 cur = *ptr;
2139 while (cur != ';') { /* Non input consuming loop */
2140 if ((cur >= '0') && (cur <= '9'))
2141 val = val * 16 + (cur - '0');
2142 else if ((cur >= 'a') && (cur <= 'f'))
2143 val = val * 16 + (cur - 'a') + 10;
2144 else if ((cur >= 'A') && (cur <= 'F'))
2145 val = val * 16 + (cur - 'A') + 10;
2146 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002147 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002148 val = 0;
2149 break;
2150 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002151 if (val > 0x10FFFF)
2152 outofrange = val;
2153
Owen Taylor3473f882001-02-23 17:55:21 +00002154 ptr++;
2155 cur = *ptr;
2156 }
2157 if (cur == ';')
2158 ptr++;
2159 } else if ((cur == '&') && (ptr[1] == '#')){
2160 ptr += 2;
2161 cur = *ptr;
2162 while (cur != ';') { /* Non input consuming loops */
2163 if ((cur >= '0') && (cur <= '9'))
2164 val = val * 10 + (cur - '0');
2165 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002166 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002167 val = 0;
2168 break;
2169 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002170 if (val > 0x10FFFF)
2171 outofrange = val;
2172
Owen Taylor3473f882001-02-23 17:55:21 +00002173 ptr++;
2174 cur = *ptr;
2175 }
2176 if (cur == ';')
2177 ptr++;
2178 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002179 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002180 return(0);
2181 }
2182 *str = ptr;
2183
2184 /*
2185 * [ WFC: Legal Character ]
2186 * Characters referred to using character references must match the
2187 * production for Char.
2188 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002189 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002190 return(val);
2191 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002192 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2193 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2194 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002195 }
2196 return(0);
2197}
2198
2199/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002200 * xmlNewBlanksWrapperInputStream:
2201 * @ctxt: an XML parser context
2202 * @entity: an Entity pointer
2203 *
2204 * Create a new input stream for wrapping
2205 * blanks around a PEReference
2206 *
2207 * Returns the new input stream or NULL
2208 */
2209
2210static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2211
Daniel Veillardf4862f02002-09-10 11:13:43 +00002212static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002213xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2214 xmlParserInputPtr input;
2215 xmlChar *buffer;
2216 size_t length;
2217 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002218 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2219 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002220 return(NULL);
2221 }
2222 if (xmlParserDebugEntities)
2223 xmlGenericError(xmlGenericErrorContext,
2224 "new blanks wrapper for entity: %s\n", entity->name);
2225 input = xmlNewInputStream(ctxt);
2226 if (input == NULL) {
2227 return(NULL);
2228 }
2229 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002230 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002231 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002232 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002233 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002234 return(NULL);
2235 }
2236 buffer [0] = ' ';
2237 buffer [1] = '%';
2238 buffer [length-3] = ';';
2239 buffer [length-2] = ' ';
2240 buffer [length-1] = 0;
2241 memcpy(buffer + 2, entity->name, length - 5);
2242 input->free = deallocblankswrapper;
2243 input->base = buffer;
2244 input->cur = buffer;
2245 input->length = length;
2246 input->end = &buffer[length];
2247 return(input);
2248}
2249
2250/**
Owen Taylor3473f882001-02-23 17:55:21 +00002251 * xmlParserHandlePEReference:
2252 * @ctxt: the parser context
2253 *
2254 * [69] PEReference ::= '%' Name ';'
2255 *
2256 * [ WFC: No Recursion ]
2257 * A parsed entity must not contain a recursive
2258 * reference to itself, either directly or indirectly.
2259 *
2260 * [ WFC: Entity Declared ]
2261 * In a document without any DTD, a document with only an internal DTD
2262 * subset which contains no parameter entity references, or a document
2263 * with "standalone='yes'", ... ... The declaration of a parameter
2264 * entity must precede any reference to it...
2265 *
2266 * [ VC: Entity Declared ]
2267 * In a document with an external subset or external parameter entities
2268 * with "standalone='no'", ... ... The declaration of a parameter entity
2269 * must precede any reference to it...
2270 *
2271 * [ WFC: In DTD ]
2272 * Parameter-entity references may only appear in the DTD.
2273 * NOTE: misleading but this is handled.
2274 *
2275 * A PEReference may have been detected in the current input stream
2276 * the handling is done accordingly to
2277 * http://www.w3.org/TR/REC-xml#entproc
2278 * i.e.
2279 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002280 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002281 */
2282void
2283xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002284 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002285 xmlEntityPtr entity = NULL;
2286 xmlParserInputPtr input;
2287
Owen Taylor3473f882001-02-23 17:55:21 +00002288 if (RAW != '%') return;
2289 switch(ctxt->instate) {
2290 case XML_PARSER_CDATA_SECTION:
2291 return;
2292 case XML_PARSER_COMMENT:
2293 return;
2294 case XML_PARSER_START_TAG:
2295 return;
2296 case XML_PARSER_END_TAG:
2297 return;
2298 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002299 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002300 return;
2301 case XML_PARSER_PROLOG:
2302 case XML_PARSER_START:
2303 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002304 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002305 return;
2306 case XML_PARSER_ENTITY_DECL:
2307 case XML_PARSER_CONTENT:
2308 case XML_PARSER_ATTRIBUTE_VALUE:
2309 case XML_PARSER_PI:
2310 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002311 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002312 /* we just ignore it there */
2313 return;
2314 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002315 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002316 return;
2317 case XML_PARSER_ENTITY_VALUE:
2318 /*
2319 * NOTE: in the case of entity values, we don't do the
2320 * substitution here since we need the literal
2321 * entity value to be able to save the internal
2322 * subset of the document.
2323 * This will be handled by xmlStringDecodeEntities
2324 */
2325 return;
2326 case XML_PARSER_DTD:
2327 /*
2328 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2329 * In the internal DTD subset, parameter-entity references
2330 * can occur only where markup declarations can occur, not
2331 * within markup declarations.
2332 * In that case this is handled in xmlParseMarkupDecl
2333 */
2334 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2335 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002336 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002337 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002338 break;
2339 case XML_PARSER_IGNORE:
2340 return;
2341 }
2342
2343 NEXT;
2344 name = xmlParseName(ctxt);
2345 if (xmlParserDebugEntities)
2346 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002347 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002348 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002349 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002350 } else {
2351 if (RAW == ';') {
2352 NEXT;
2353 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2354 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2355 if (entity == NULL) {
2356
2357 /*
2358 * [ WFC: Entity Declared ]
2359 * In a document without any DTD, a document with only an
2360 * internal DTD subset which contains no parameter entity
2361 * references, or a document with "standalone='yes'", ...
2362 * ... The declaration of a parameter entity must precede
2363 * any reference to it...
2364 */
2365 if ((ctxt->standalone == 1) ||
2366 ((ctxt->hasExternalSubset == 0) &&
2367 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002368 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002369 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002370 } else {
2371 /*
2372 * [ VC: Entity Declared ]
2373 * In a document with an external subset or external
2374 * parameter entities with "standalone='no'", ...
2375 * ... The declaration of a parameter entity must precede
2376 * any reference to it...
2377 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002378 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2379 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2380 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002381 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002382 } else
2383 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2384 "PEReference: %%%s; not found\n",
2385 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002386 ctxt->valid = 0;
2387 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002388 } else if (ctxt->input->free != deallocblankswrapper) {
2389 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002390 if (xmlPushInput(ctxt, input) < 0)
2391 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002392 } else {
2393 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2394 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002395 xmlChar start[4];
2396 xmlCharEncoding enc;
2397
Owen Taylor3473f882001-02-23 17:55:21 +00002398 /*
2399 * handle the extra spaces added before and after
2400 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002401 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002402 */
2403 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002404 if (xmlPushInput(ctxt, input) < 0)
2405 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002406
2407 /*
2408 * Get the 4 first bytes and decode the charset
2409 * if enc != XML_CHAR_ENCODING_NONE
2410 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002411 * Note that, since we may have some non-UTF8
2412 * encoding (like UTF16, bug 135229), the 'length'
2413 * is not known, but we can calculate based upon
2414 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002415 */
2416 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002417 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002418 start[0] = RAW;
2419 start[1] = NXT(1);
2420 start[2] = NXT(2);
2421 start[3] = NXT(3);
2422 enc = xmlDetectCharEncoding(start, 4);
2423 if (enc != XML_CHAR_ENCODING_NONE) {
2424 xmlSwitchEncoding(ctxt, enc);
2425 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002426 }
2427
Owen Taylor3473f882001-02-23 17:55:21 +00002428 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002429 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2430 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002431 xmlParseTextDecl(ctxt);
2432 }
Owen Taylor3473f882001-02-23 17:55:21 +00002433 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002434 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2435 "PEReference: %s is not a parameter entity\n",
2436 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002437 }
2438 }
2439 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002440 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002441 }
Owen Taylor3473f882001-02-23 17:55:21 +00002442 }
2443}
2444
2445/*
2446 * Macro used to grow the current buffer.
2447 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002448#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002449 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002450 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002451 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002452 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002453 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002454 if (tmp == NULL) goto mem_error; \
2455 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002456}
2457
2458/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002459 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002460 * @ctxt: the parser context
2461 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002462 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002463 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2464 * @end: an end marker xmlChar, 0 if none
2465 * @end2: an end marker xmlChar, 0 if none
2466 * @end3: an end marker xmlChar, 0 if none
2467 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002468 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002469 *
2470 * [67] Reference ::= EntityRef | CharRef
2471 *
2472 * [69] PEReference ::= '%' Name ';'
2473 *
2474 * Returns A newly allocated string with the substitution done. The caller
2475 * must deallocate it !
2476 */
2477xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002478xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2479 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002480 xmlChar *buffer = NULL;
2481 int buffer_size = 0;
2482
2483 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002484 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002485 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002486 xmlEntityPtr ent;
2487 int c,l;
2488 int nbchars = 0;
2489
Daniel Veillarda82b1822004-11-08 16:24:57 +00002490 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002491 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002492 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002493
Daniel Veillard0161e632008-08-28 15:36:32 +00002494 if (((ctxt->depth > 40) &&
2495 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2496 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002497 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002498 return(NULL);
2499 }
2500
2501 /*
2502 * allocate a translation buffer.
2503 */
2504 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002505 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002506 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002507
2508 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002509 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002510 * we are operating on already parsed values.
2511 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002512 if (str < last)
2513 c = CUR_SCHAR(str, l);
2514 else
2515 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002516 while ((c != 0) && (c != end) && /* non input consuming loop */
2517 (c != end2) && (c != end3)) {
2518
2519 if (c == 0) break;
2520 if ((c == '&') && (str[1] == '#')) {
2521 int val = xmlParseStringCharRef(ctxt, &str);
2522 if (val != 0) {
2523 COPY_BUF(0,buffer,nbchars,val);
2524 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002525 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002526 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002527 }
Owen Taylor3473f882001-02-23 17:55:21 +00002528 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2529 if (xmlParserDebugEntities)
2530 xmlGenericError(xmlGenericErrorContext,
2531 "String decoding Entity Reference: %.30s\n",
2532 str);
2533 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002534 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2535 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002536 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002537 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002538 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002539 if ((ent != NULL) &&
2540 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2541 if (ent->content != NULL) {
2542 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002543 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002544 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002545 }
Owen Taylor3473f882001-02-23 17:55:21 +00002546 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002547 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2548 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002549 }
2550 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002551 ctxt->depth++;
2552 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2553 0, 0, 0);
2554 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002555
Owen Taylor3473f882001-02-23 17:55:21 +00002556 if (rep != NULL) {
2557 current = rep;
2558 while (*current != 0) { /* non input consuming loop */
2559 buffer[nbchars++] = *current++;
2560 if (nbchars >
2561 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002562 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2563 goto int_error;
2564 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002565 }
2566 }
2567 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002568 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002569 }
2570 } else if (ent != NULL) {
2571 int i = xmlStrlen(ent->name);
2572 const xmlChar *cur = ent->name;
2573
2574 buffer[nbchars++] = '&';
2575 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002576 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002577 }
2578 for (;i > 0;i--)
2579 buffer[nbchars++] = *cur++;
2580 buffer[nbchars++] = ';';
2581 }
2582 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2583 if (xmlParserDebugEntities)
2584 xmlGenericError(xmlGenericErrorContext,
2585 "String decoding PE Reference: %.30s\n", str);
2586 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002587 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2588 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002589 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002590 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002591 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002592 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002593 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002594 }
Owen Taylor3473f882001-02-23 17:55:21 +00002595 ctxt->depth++;
2596 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2597 0, 0, 0);
2598 ctxt->depth--;
2599 if (rep != NULL) {
2600 current = rep;
2601 while (*current != 0) { /* non input consuming loop */
2602 buffer[nbchars++] = *current++;
2603 if (nbchars >
2604 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002605 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2606 goto int_error;
2607 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002608 }
2609 }
2610 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002611 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002612 }
2613 }
2614 } else {
2615 COPY_BUF(l,buffer,nbchars,c);
2616 str += l;
2617 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002618 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002619 }
2620 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002621 if (str < last)
2622 c = CUR_SCHAR(str, l);
2623 else
2624 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002625 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002626 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002627 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002628
2629mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002630 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002631int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002632 if (rep != NULL)
2633 xmlFree(rep);
2634 if (buffer != NULL)
2635 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002636 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002637}
2638
Daniel Veillarde57ec792003-09-10 10:50:59 +00002639/**
2640 * xmlStringDecodeEntities:
2641 * @ctxt: the parser context
2642 * @str: the input string
2643 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2644 * @end: an end marker xmlChar, 0 if none
2645 * @end2: an end marker xmlChar, 0 if none
2646 * @end3: an end marker xmlChar, 0 if none
2647 *
2648 * Takes a entity string content and process to do the adequate substitutions.
2649 *
2650 * [67] Reference ::= EntityRef | CharRef
2651 *
2652 * [69] PEReference ::= '%' Name ';'
2653 *
2654 * Returns A newly allocated string with the substitution done. The caller
2655 * must deallocate it !
2656 */
2657xmlChar *
2658xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2659 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002660 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002661 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2662 end, end2, end3));
2663}
Owen Taylor3473f882001-02-23 17:55:21 +00002664
2665/************************************************************************
2666 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002667 * Commodity functions, cleanup needed ? *
2668 * *
2669 ************************************************************************/
2670
2671/**
2672 * areBlanks:
2673 * @ctxt: an XML parser context
2674 * @str: a xmlChar *
2675 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002676 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002677 *
2678 * Is this a sequence of blank chars that one can ignore ?
2679 *
2680 * Returns 1 if ignorable 0 otherwise.
2681 */
2682
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002683static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2684 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002685 int i, ret;
2686 xmlNodePtr lastChild;
2687
Daniel Veillard05c13a22001-09-09 08:38:09 +00002688 /*
2689 * Don't spend time trying to differentiate them, the same callback is
2690 * used !
2691 */
2692 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002693 return(0);
2694
Owen Taylor3473f882001-02-23 17:55:21 +00002695 /*
2696 * Check for xml:space value.
2697 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002698 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2699 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002700 return(0);
2701
2702 /*
2703 * Check that the string is made of blanks
2704 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002705 if (blank_chars == 0) {
2706 for (i = 0;i < len;i++)
2707 if (!(IS_BLANK_CH(str[i]))) return(0);
2708 }
Owen Taylor3473f882001-02-23 17:55:21 +00002709
2710 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002711 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002712 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002713 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002714 if (ctxt->myDoc != NULL) {
2715 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2716 if (ret == 0) return(1);
2717 if (ret == 1) return(0);
2718 }
2719
2720 /*
2721 * Otherwise, heuristic :-\
2722 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002723 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002724 if ((ctxt->node->children == NULL) &&
2725 (RAW == '<') && (NXT(1) == '/')) return(0);
2726
2727 lastChild = xmlGetLastChild(ctxt->node);
2728 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002729 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2730 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002731 } else if (xmlNodeIsText(lastChild))
2732 return(0);
2733 else if ((ctxt->node->children != NULL) &&
2734 (xmlNodeIsText(ctxt->node->children)))
2735 return(0);
2736 return(1);
2737}
2738
Owen Taylor3473f882001-02-23 17:55:21 +00002739/************************************************************************
2740 * *
2741 * Extra stuff for namespace support *
2742 * Relates to http://www.w3.org/TR/WD-xml-names *
2743 * *
2744 ************************************************************************/
2745
2746/**
2747 * xmlSplitQName:
2748 * @ctxt: an XML parser context
2749 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002750 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002751 *
2752 * parse an UTF8 encoded XML qualified name string
2753 *
2754 * [NS 5] QName ::= (Prefix ':')? LocalPart
2755 *
2756 * [NS 6] Prefix ::= NCName
2757 *
2758 * [NS 7] LocalPart ::= NCName
2759 *
2760 * Returns the local part, and prefix is updated
2761 * to get the Prefix if any.
2762 */
2763
2764xmlChar *
2765xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2766 xmlChar buf[XML_MAX_NAMELEN + 5];
2767 xmlChar *buffer = NULL;
2768 int len = 0;
2769 int max = XML_MAX_NAMELEN;
2770 xmlChar *ret = NULL;
2771 const xmlChar *cur = name;
2772 int c;
2773
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002774 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002775 *prefix = NULL;
2776
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002777 if (cur == NULL) return(NULL);
2778
Owen Taylor3473f882001-02-23 17:55:21 +00002779#ifndef XML_XML_NAMESPACE
2780 /* xml: prefix is not really a namespace */
2781 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2782 (cur[2] == 'l') && (cur[3] == ':'))
2783 return(xmlStrdup(name));
2784#endif
2785
Daniel Veillard597bc482003-07-24 16:08:28 +00002786 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002787 if (cur[0] == ':')
2788 return(xmlStrdup(name));
2789
2790 c = *cur++;
2791 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2792 buf[len++] = c;
2793 c = *cur++;
2794 }
2795 if (len >= max) {
2796 /*
2797 * Okay someone managed to make a huge name, so he's ready to pay
2798 * for the processing speed.
2799 */
2800 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002801
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002802 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002803 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002804 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002805 return(NULL);
2806 }
2807 memcpy(buffer, buf, len);
2808 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2809 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002810 xmlChar *tmp;
2811
Owen Taylor3473f882001-02-23 17:55:21 +00002812 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002813 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002814 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002815 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002816 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002817 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002818 return(NULL);
2819 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002820 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002821 }
2822 buffer[len++] = c;
2823 c = *cur++;
2824 }
2825 buffer[len] = 0;
2826 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002827
Daniel Veillard597bc482003-07-24 16:08:28 +00002828 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002829 if (buffer != NULL)
2830 xmlFree(buffer);
2831 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002832 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002833 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002834
Owen Taylor3473f882001-02-23 17:55:21 +00002835 if (buffer == NULL)
2836 ret = xmlStrndup(buf, len);
2837 else {
2838 ret = buffer;
2839 buffer = NULL;
2840 max = XML_MAX_NAMELEN;
2841 }
2842
2843
2844 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002845 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002846 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002847 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002848 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002849 }
Owen Taylor3473f882001-02-23 17:55:21 +00002850 len = 0;
2851
Daniel Veillardbb284f42002-10-16 18:02:47 +00002852 /*
2853 * Check that the first character is proper to start
2854 * a new name
2855 */
2856 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2857 ((c >= 0x41) && (c <= 0x5A)) ||
2858 (c == '_') || (c == ':'))) {
2859 int l;
2860 int first = CUR_SCHAR(cur, l);
2861
2862 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002863 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002864 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002865 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002866 }
2867 }
2868 cur++;
2869
Owen Taylor3473f882001-02-23 17:55:21 +00002870 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2871 buf[len++] = c;
2872 c = *cur++;
2873 }
2874 if (len >= max) {
2875 /*
2876 * Okay someone managed to make a huge name, so he's ready to pay
2877 * for the processing speed.
2878 */
2879 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002880
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002881 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002882 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002883 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002884 return(NULL);
2885 }
2886 memcpy(buffer, buf, len);
2887 while (c != 0) { /* tested bigname2.xml */
2888 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002889 xmlChar *tmp;
2890
Owen Taylor3473f882001-02-23 17:55:21 +00002891 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002892 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002893 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002894 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002895 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002896 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002897 return(NULL);
2898 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002899 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002900 }
2901 buffer[len++] = c;
2902 c = *cur++;
2903 }
2904 buffer[len] = 0;
2905 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002906
Owen Taylor3473f882001-02-23 17:55:21 +00002907 if (buffer == NULL)
2908 ret = xmlStrndup(buf, len);
2909 else {
2910 ret = buffer;
2911 }
2912 }
2913
2914 return(ret);
2915}
2916
2917/************************************************************************
2918 * *
2919 * The parser itself *
2920 * Relates to http://www.w3.org/TR/REC-xml *
2921 * *
2922 ************************************************************************/
2923
Daniel Veillard34e3f642008-07-29 09:02:27 +00002924/************************************************************************
2925 * *
2926 * Routines to parse Name, NCName and NmToken *
2927 * *
2928 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00002929#ifdef DEBUG
2930static unsigned long nbParseName = 0;
2931static unsigned long nbParseNmToken = 0;
2932static unsigned long nbParseNCName = 0;
2933static unsigned long nbParseNCNameComplex = 0;
2934static unsigned long nbParseNameComplex = 0;
2935static unsigned long nbParseStringName = 0;
2936#endif
2937
Daniel Veillard34e3f642008-07-29 09:02:27 +00002938/*
2939 * The two following functions are related to the change of accepted
2940 * characters for Name and NmToken in the Revision 5 of XML-1.0
2941 * They correspond to the modified production [4] and the new production [4a]
2942 * changes in that revision. Also note that the macros used for the
2943 * productions Letter, Digit, CombiningChar and Extender are not needed
2944 * anymore.
2945 * We still keep compatibility to pre-revision5 parsing semantic if the
2946 * new XML_PARSE_OLD10 option is given to the parser.
2947 */
2948static int
2949xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2950 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2951 /*
2952 * Use the new checks of production [4] [4a] amd [5] of the
2953 * Update 5 of XML-1.0
2954 */
2955 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2956 (((c >= 'a') && (c <= 'z')) ||
2957 ((c >= 'A') && (c <= 'Z')) ||
2958 (c == '_') || (c == ':') ||
2959 ((c >= 0xC0) && (c <= 0xD6)) ||
2960 ((c >= 0xD8) && (c <= 0xF6)) ||
2961 ((c >= 0xF8) && (c <= 0x2FF)) ||
2962 ((c >= 0x370) && (c <= 0x37D)) ||
2963 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2964 ((c >= 0x200C) && (c <= 0x200D)) ||
2965 ((c >= 0x2070) && (c <= 0x218F)) ||
2966 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2967 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2968 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2969 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2970 ((c >= 0x10000) && (c <= 0xEFFFF))))
2971 return(1);
2972 } else {
2973 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2974 return(1);
2975 }
2976 return(0);
2977}
2978
2979static int
2980xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2981 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2982 /*
2983 * Use the new checks of production [4] [4a] amd [5] of the
2984 * Update 5 of XML-1.0
2985 */
2986 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2987 (((c >= 'a') && (c <= 'z')) ||
2988 ((c >= 'A') && (c <= 'Z')) ||
2989 ((c >= '0') && (c <= '9')) || /* !start */
2990 (c == '_') || (c == ':') ||
2991 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2992 ((c >= 0xC0) && (c <= 0xD6)) ||
2993 ((c >= 0xD8) && (c <= 0xF6)) ||
2994 ((c >= 0xF8) && (c <= 0x2FF)) ||
2995 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2996 ((c >= 0x370) && (c <= 0x37D)) ||
2997 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2998 ((c >= 0x200C) && (c <= 0x200D)) ||
2999 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3000 ((c >= 0x2070) && (c <= 0x218F)) ||
3001 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3002 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3003 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3004 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3005 ((c >= 0x10000) && (c <= 0xEFFFF))))
3006 return(1);
3007 } else {
3008 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3009 (c == '.') || (c == '-') ||
3010 (c == '_') || (c == ':') ||
3011 (IS_COMBINING(c)) ||
3012 (IS_EXTENDER(c)))
3013 return(1);
3014 }
3015 return(0);
3016}
3017
Daniel Veillarde57ec792003-09-10 10:50:59 +00003018static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003019 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003020
Daniel Veillard34e3f642008-07-29 09:02:27 +00003021static const xmlChar *
3022xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3023 int len = 0, l;
3024 int c;
3025 int count = 0;
3026
Daniel Veillardc6561462009-03-25 10:22:31 +00003027#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003028 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003029#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003030
3031 /*
3032 * Handler for more complex cases
3033 */
3034 GROW;
3035 c = CUR_CHAR(l);
3036 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3037 /*
3038 * Use the new checks of production [4] [4a] amd [5] of the
3039 * Update 5 of XML-1.0
3040 */
3041 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3042 (!(((c >= 'a') && (c <= 'z')) ||
3043 ((c >= 'A') && (c <= 'Z')) ||
3044 (c == '_') || (c == ':') ||
3045 ((c >= 0xC0) && (c <= 0xD6)) ||
3046 ((c >= 0xD8) && (c <= 0xF6)) ||
3047 ((c >= 0xF8) && (c <= 0x2FF)) ||
3048 ((c >= 0x370) && (c <= 0x37D)) ||
3049 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3050 ((c >= 0x200C) && (c <= 0x200D)) ||
3051 ((c >= 0x2070) && (c <= 0x218F)) ||
3052 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3053 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3054 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3055 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3056 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3057 return(NULL);
3058 }
3059 len += l;
3060 NEXTL(l);
3061 c = CUR_CHAR(l);
3062 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3063 (((c >= 'a') && (c <= 'z')) ||
3064 ((c >= 'A') && (c <= 'Z')) ||
3065 ((c >= '0') && (c <= '9')) || /* !start */
3066 (c == '_') || (c == ':') ||
3067 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3068 ((c >= 0xC0) && (c <= 0xD6)) ||
3069 ((c >= 0xD8) && (c <= 0xF6)) ||
3070 ((c >= 0xF8) && (c <= 0x2FF)) ||
3071 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3072 ((c >= 0x370) && (c <= 0x37D)) ||
3073 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3074 ((c >= 0x200C) && (c <= 0x200D)) ||
3075 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3076 ((c >= 0x2070) && (c <= 0x218F)) ||
3077 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3078 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3079 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3080 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3081 ((c >= 0x10000) && (c <= 0xEFFFF))
3082 )) {
3083 if (count++ > 100) {
3084 count = 0;
3085 GROW;
3086 }
3087 len += l;
3088 NEXTL(l);
3089 c = CUR_CHAR(l);
3090 }
3091 } else {
3092 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3093 (!IS_LETTER(c) && (c != '_') &&
3094 (c != ':'))) {
3095 return(NULL);
3096 }
3097 len += l;
3098 NEXTL(l);
3099 c = CUR_CHAR(l);
3100
3101 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3102 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3103 (c == '.') || (c == '-') ||
3104 (c == '_') || (c == ':') ||
3105 (IS_COMBINING(c)) ||
3106 (IS_EXTENDER(c)))) {
3107 if (count++ > 100) {
3108 count = 0;
3109 GROW;
3110 }
3111 len += l;
3112 NEXTL(l);
3113 c = CUR_CHAR(l);
3114 }
3115 }
3116 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3117 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3118 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3119}
3120
Owen Taylor3473f882001-02-23 17:55:21 +00003121/**
3122 * xmlParseName:
3123 * @ctxt: an XML parser context
3124 *
3125 * parse an XML name.
3126 *
3127 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3128 * CombiningChar | Extender
3129 *
3130 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3131 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003132 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003133 *
3134 * Returns the Name parsed or NULL
3135 */
3136
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003137const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003138xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003139 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003140 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003141 int count = 0;
3142
3143 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003144
Daniel Veillardc6561462009-03-25 10:22:31 +00003145#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003146 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003147#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003148
Daniel Veillard48b2f892001-02-25 16:11:03 +00003149 /*
3150 * Accelerator for simple ASCII names
3151 */
3152 in = ctxt->input->cur;
3153 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3154 ((*in >= 0x41) && (*in <= 0x5A)) ||
3155 (*in == '_') || (*in == ':')) {
3156 in++;
3157 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3158 ((*in >= 0x41) && (*in <= 0x5A)) ||
3159 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003160 (*in == '_') || (*in == '-') ||
3161 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003162 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003163 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003164 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003165 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003166 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003167 ctxt->nbChars += count;
3168 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003169 if (ret == NULL)
3170 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003171 return(ret);
3172 }
3173 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003174 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003175 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003176}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003177
Daniel Veillard34e3f642008-07-29 09:02:27 +00003178static const xmlChar *
3179xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3180 int len = 0, l;
3181 int c;
3182 int count = 0;
3183
Daniel Veillardc6561462009-03-25 10:22:31 +00003184#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003185 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003186#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003187
3188 /*
3189 * Handler for more complex cases
3190 */
3191 GROW;
3192 c = CUR_CHAR(l);
3193 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3194 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3195 return(NULL);
3196 }
3197
3198 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3199 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3200 if (count++ > 100) {
3201 count = 0;
3202 GROW;
3203 }
3204 len += l;
3205 NEXTL(l);
3206 c = CUR_CHAR(l);
3207 }
3208 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3209}
3210
3211/**
3212 * xmlParseNCName:
3213 * @ctxt: an XML parser context
3214 * @len: lenght of the string parsed
3215 *
3216 * parse an XML name.
3217 *
3218 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3219 * CombiningChar | Extender
3220 *
3221 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3222 *
3223 * Returns the Name parsed or NULL
3224 */
3225
3226static const xmlChar *
3227xmlParseNCName(xmlParserCtxtPtr ctxt) {
3228 const xmlChar *in;
3229 const xmlChar *ret;
3230 int count = 0;
3231
Daniel Veillardc6561462009-03-25 10:22:31 +00003232#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003233 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003234#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003235
3236 /*
3237 * Accelerator for simple ASCII names
3238 */
3239 in = ctxt->input->cur;
3240 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3241 ((*in >= 0x41) && (*in <= 0x5A)) ||
3242 (*in == '_')) {
3243 in++;
3244 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3245 ((*in >= 0x41) && (*in <= 0x5A)) ||
3246 ((*in >= 0x30) && (*in <= 0x39)) ||
3247 (*in == '_') || (*in == '-') ||
3248 (*in == '.'))
3249 in++;
3250 if ((*in > 0) && (*in < 0x80)) {
3251 count = in - ctxt->input->cur;
3252 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3253 ctxt->input->cur = in;
3254 ctxt->nbChars += count;
3255 ctxt->input->col += count;
3256 if (ret == NULL) {
3257 xmlErrMemory(ctxt, NULL);
3258 }
3259 return(ret);
3260 }
3261 }
3262 return(xmlParseNCNameComplex(ctxt));
3263}
3264
Daniel Veillard46de64e2002-05-29 08:21:33 +00003265/**
3266 * xmlParseNameAndCompare:
3267 * @ctxt: an XML parser context
3268 *
3269 * parse an XML name and compares for match
3270 * (specialized for endtag parsing)
3271 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003272 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3273 * and the name for mismatch
3274 */
3275
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003276static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003277xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003278 register const xmlChar *cmp = other;
3279 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003280 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003281
3282 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003283
Daniel Veillard46de64e2002-05-29 08:21:33 +00003284 in = ctxt->input->cur;
3285 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003286 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003287 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003288 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003289 }
William M. Brack76e95df2003-10-18 16:20:14 +00003290 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003291 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003292 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003293 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003294 }
3295 /* failure (or end of input buffer), check with full function */
3296 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003297 /* strings coming from the dictionnary direct compare possible */
3298 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003299 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003300 }
3301 return ret;
3302}
3303
Owen Taylor3473f882001-02-23 17:55:21 +00003304/**
3305 * xmlParseStringName:
3306 * @ctxt: an XML parser context
3307 * @str: a pointer to the string pointer (IN/OUT)
3308 *
3309 * parse an XML name.
3310 *
3311 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3312 * CombiningChar | Extender
3313 *
3314 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3315 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003316 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003317 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003318 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003319 * is updated to the current location in the string.
3320 */
3321
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003322static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003323xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3324 xmlChar buf[XML_MAX_NAMELEN + 5];
3325 const xmlChar *cur = *str;
3326 int len = 0, l;
3327 int c;
3328
Daniel Veillardc6561462009-03-25 10:22:31 +00003329#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003330 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003331#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003332
Owen Taylor3473f882001-02-23 17:55:21 +00003333 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003334 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003335 return(NULL);
3336 }
3337
Daniel Veillard34e3f642008-07-29 09:02:27 +00003338 COPY_BUF(l,buf,len,c);
3339 cur += l;
3340 c = CUR_SCHAR(cur, l);
3341 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003342 COPY_BUF(l,buf,len,c);
3343 cur += l;
3344 c = CUR_SCHAR(cur, l);
3345 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3346 /*
3347 * Okay someone managed to make a huge name, so he's ready to pay
3348 * for the processing speed.
3349 */
3350 xmlChar *buffer;
3351 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003352
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003353 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003354 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003355 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003356 return(NULL);
3357 }
3358 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003359 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003360 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003361 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003362 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003363 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003364 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003365 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003366 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003367 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003368 return(NULL);
3369 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003370 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003371 }
3372 COPY_BUF(l,buffer,len,c);
3373 cur += l;
3374 c = CUR_SCHAR(cur, l);
3375 }
3376 buffer[len] = 0;
3377 *str = cur;
3378 return(buffer);
3379 }
3380 }
3381 *str = cur;
3382 return(xmlStrndup(buf, len));
3383}
3384
3385/**
3386 * xmlParseNmtoken:
3387 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003388 *
Owen Taylor3473f882001-02-23 17:55:21 +00003389 * parse an XML Nmtoken.
3390 *
3391 * [7] Nmtoken ::= (NameChar)+
3392 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003393 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003394 *
3395 * Returns the Nmtoken parsed or NULL
3396 */
3397
3398xmlChar *
3399xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3400 xmlChar buf[XML_MAX_NAMELEN + 5];
3401 int len = 0, l;
3402 int c;
3403 int count = 0;
3404
Daniel Veillardc6561462009-03-25 10:22:31 +00003405#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003406 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003407#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003408
Owen Taylor3473f882001-02-23 17:55:21 +00003409 GROW;
3410 c = CUR_CHAR(l);
3411
Daniel Veillard34e3f642008-07-29 09:02:27 +00003412 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003413 if (count++ > 100) {
3414 count = 0;
3415 GROW;
3416 }
3417 COPY_BUF(l,buf,len,c);
3418 NEXTL(l);
3419 c = CUR_CHAR(l);
3420 if (len >= XML_MAX_NAMELEN) {
3421 /*
3422 * Okay someone managed to make a huge token, so he's ready to pay
3423 * for the processing speed.
3424 */
3425 xmlChar *buffer;
3426 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003427
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003428 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003429 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003430 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003431 return(NULL);
3432 }
3433 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003434 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003435 if (count++ > 100) {
3436 count = 0;
3437 GROW;
3438 }
3439 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003440 xmlChar *tmp;
3441
Owen Taylor3473f882001-02-23 17:55:21 +00003442 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003443 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003444 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003445 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003446 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003447 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003448 return(NULL);
3449 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003450 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003451 }
3452 COPY_BUF(l,buffer,len,c);
3453 NEXTL(l);
3454 c = CUR_CHAR(l);
3455 }
3456 buffer[len] = 0;
3457 return(buffer);
3458 }
3459 }
3460 if (len == 0)
3461 return(NULL);
3462 return(xmlStrndup(buf, len));
3463}
3464
3465/**
3466 * xmlParseEntityValue:
3467 * @ctxt: an XML parser context
3468 * @orig: if non-NULL store a copy of the original entity value
3469 *
3470 * parse a value for ENTITY declarations
3471 *
3472 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3473 * "'" ([^%&'] | PEReference | Reference)* "'"
3474 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003475 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003476 */
3477
3478xmlChar *
3479xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3480 xmlChar *buf = NULL;
3481 int len = 0;
3482 int size = XML_PARSER_BUFFER_SIZE;
3483 int c, l;
3484 xmlChar stop;
3485 xmlChar *ret = NULL;
3486 const xmlChar *cur = NULL;
3487 xmlParserInputPtr input;
3488
3489 if (RAW == '"') stop = '"';
3490 else if (RAW == '\'') stop = '\'';
3491 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003492 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003493 return(NULL);
3494 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003495 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003496 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003497 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003498 return(NULL);
3499 }
3500
3501 /*
3502 * The content of the entity definition is copied in a buffer.
3503 */
3504
3505 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3506 input = ctxt->input;
3507 GROW;
3508 NEXT;
3509 c = CUR_CHAR(l);
3510 /*
3511 * NOTE: 4.4.5 Included in Literal
3512 * When a parameter entity reference appears in a literal entity
3513 * value, ... a single or double quote character in the replacement
3514 * text is always treated as a normal data character and will not
3515 * terminate the literal.
3516 * In practice it means we stop the loop only when back at parsing
3517 * the initial entity and the quote is found
3518 */
William M. Brack871611b2003-10-18 04:53:14 +00003519 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003520 (ctxt->input != input))) {
3521 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003522 xmlChar *tmp;
3523
Owen Taylor3473f882001-02-23 17:55:21 +00003524 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003525 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3526 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003527 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003528 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003529 return(NULL);
3530 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003531 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003532 }
3533 COPY_BUF(l,buf,len,c);
3534 NEXTL(l);
3535 /*
3536 * Pop-up of finished entities.
3537 */
3538 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3539 xmlPopInput(ctxt);
3540
3541 GROW;
3542 c = CUR_CHAR(l);
3543 if (c == 0) {
3544 GROW;
3545 c = CUR_CHAR(l);
3546 }
3547 }
3548 buf[len] = 0;
3549
3550 /*
3551 * Raise problem w.r.t. '&' and '%' being used in non-entities
3552 * reference constructs. Note Charref will be handled in
3553 * xmlStringDecodeEntities()
3554 */
3555 cur = buf;
3556 while (*cur != 0) { /* non input consuming */
3557 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3558 xmlChar *name;
3559 xmlChar tmp = *cur;
3560
3561 cur++;
3562 name = xmlParseStringName(ctxt, &cur);
3563 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003564 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003565 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003566 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003567 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003568 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3569 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003570 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003571 }
3572 if (name != NULL)
3573 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003574 if (*cur == 0)
3575 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003576 }
3577 cur++;
3578 }
3579
3580 /*
3581 * Then PEReference entities are substituted.
3582 */
3583 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003584 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003585 xmlFree(buf);
3586 } else {
3587 NEXT;
3588 /*
3589 * NOTE: 4.4.7 Bypassed
3590 * When a general entity reference appears in the EntityValue in
3591 * an entity declaration, it is bypassed and left as is.
3592 * so XML_SUBSTITUTE_REF is not set here.
3593 */
3594 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3595 0, 0, 0);
3596 if (orig != NULL)
3597 *orig = buf;
3598 else
3599 xmlFree(buf);
3600 }
3601
3602 return(ret);
3603}
3604
3605/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003606 * xmlParseAttValueComplex:
3607 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003608 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003609 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003610 *
3611 * parse a value for an attribute, this is the fallback function
3612 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003613 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003614 *
3615 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3616 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003617static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003618xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003619 xmlChar limit = 0;
3620 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003621 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003622 int len = 0;
3623 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003624 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003625 xmlChar *current = NULL;
3626 xmlEntityPtr ent;
3627
Owen Taylor3473f882001-02-23 17:55:21 +00003628 if (NXT(0) == '"') {
3629 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3630 limit = '"';
3631 NEXT;
3632 } else if (NXT(0) == '\'') {
3633 limit = '\'';
3634 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3635 NEXT;
3636 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003637 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003638 return(NULL);
3639 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003640
Owen Taylor3473f882001-02-23 17:55:21 +00003641 /*
3642 * allocate a translation buffer.
3643 */
3644 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003645 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003646 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003647
3648 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003649 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003650 */
3651 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003652 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003653 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003654 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003655 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003656 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003657 if (NXT(1) == '#') {
3658 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003659
Owen Taylor3473f882001-02-23 17:55:21 +00003660 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003661 if (ctxt->replaceEntities) {
3662 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003663 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003664 }
3665 buf[len++] = '&';
3666 } else {
3667 /*
3668 * The reparsing will be done in xmlStringGetNodeList()
3669 * called by the attribute() function in SAX.c
3670 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003671 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003672 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003673 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003674 buf[len++] = '&';
3675 buf[len++] = '#';
3676 buf[len++] = '3';
3677 buf[len++] = '8';
3678 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003679 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003680 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003681 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003682 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003683 }
Owen Taylor3473f882001-02-23 17:55:21 +00003684 len += xmlCopyChar(0, &buf[len], val);
3685 }
3686 } else {
3687 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003688 ctxt->nbentities++;
3689 if (ent != NULL)
3690 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003691 if ((ent != NULL) &&
3692 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3693 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003694 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003695 }
3696 if ((ctxt->replaceEntities == 0) &&
3697 (ent->content[0] == '&')) {
3698 buf[len++] = '&';
3699 buf[len++] = '#';
3700 buf[len++] = '3';
3701 buf[len++] = '8';
3702 buf[len++] = ';';
3703 } else {
3704 buf[len++] = ent->content[0];
3705 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003706 } else if ((ent != NULL) &&
3707 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003708 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3709 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003710 XML_SUBSTITUTE_REF,
3711 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003712 if (rep != NULL) {
3713 current = rep;
3714 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003715 if ((*current == 0xD) || (*current == 0xA) ||
3716 (*current == 0x9)) {
3717 buf[len++] = 0x20;
3718 current++;
3719 } else
3720 buf[len++] = *current++;
Owen Taylor3473f882001-02-23 17:55:21 +00003721 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003722 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003723 }
3724 }
3725 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003726 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003727 }
3728 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003729 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003730 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003731 }
Owen Taylor3473f882001-02-23 17:55:21 +00003732 if (ent->content != NULL)
3733 buf[len++] = ent->content[0];
3734 }
3735 } else if (ent != NULL) {
3736 int i = xmlStrlen(ent->name);
3737 const xmlChar *cur = ent->name;
3738
3739 /*
3740 * This may look absurd but is needed to detect
3741 * entities problems
3742 */
3743 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3744 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003745 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003746 XML_SUBSTITUTE_REF, 0, 0, 0);
3747 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003748 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003749 rep = NULL;
3750 }
Owen Taylor3473f882001-02-23 17:55:21 +00003751 }
3752
3753 /*
3754 * Just output the reference
3755 */
3756 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003757 while (len > buf_size - i - 10) {
3758 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003759 }
3760 for (;i > 0;i--)
3761 buf[len++] = *cur++;
3762 buf[len++] = ';';
3763 }
3764 }
3765 } else {
3766 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003767 if ((len != 0) || (!normalize)) {
3768 if ((!normalize) || (!in_space)) {
3769 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003770 while (len > buf_size - 10) {
3771 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003772 }
3773 }
3774 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003775 }
3776 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003777 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003778 COPY_BUF(l,buf,len,c);
3779 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003780 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003781 }
3782 }
3783 NEXTL(l);
3784 }
3785 GROW;
3786 c = CUR_CHAR(l);
3787 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003788 if ((in_space) && (normalize)) {
3789 while (buf[len - 1] == 0x20) len--;
3790 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003791 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003792 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003793 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003794 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003795 if ((c != 0) && (!IS_CHAR(c))) {
3796 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3797 "invalid character in attribute value\n");
3798 } else {
3799 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3800 "AttValue: ' expected\n");
3801 }
Owen Taylor3473f882001-02-23 17:55:21 +00003802 } else
3803 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003804 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003805 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003806
3807mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003808 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003809 if (buf != NULL)
3810 xmlFree(buf);
3811 if (rep != NULL)
3812 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003813 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003814}
3815
3816/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003817 * xmlParseAttValue:
3818 * @ctxt: an XML parser context
3819 *
3820 * parse a value for an attribute
3821 * Note: the parser won't do substitution of entities here, this
3822 * will be handled later in xmlStringGetNodeList
3823 *
3824 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3825 * "'" ([^<&'] | Reference)* "'"
3826 *
3827 * 3.3.3 Attribute-Value Normalization:
3828 * Before the value of an attribute is passed to the application or
3829 * checked for validity, the XML processor must normalize it as follows:
3830 * - a character reference is processed by appending the referenced
3831 * character to the attribute value
3832 * - an entity reference is processed by recursively processing the
3833 * replacement text of the entity
3834 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3835 * appending #x20 to the normalized value, except that only a single
3836 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3837 * parsed entity or the literal entity value of an internal parsed entity
3838 * - other characters are processed by appending them to the normalized value
3839 * If the declared value is not CDATA, then the XML processor must further
3840 * process the normalized attribute value by discarding any leading and
3841 * trailing space (#x20) characters, and by replacing sequences of space
3842 * (#x20) characters by a single space (#x20) character.
3843 * All attributes for which no declaration has been read should be treated
3844 * by a non-validating parser as if declared CDATA.
3845 *
3846 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3847 */
3848
3849
3850xmlChar *
3851xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003852 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003853 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003854}
3855
3856/**
Owen Taylor3473f882001-02-23 17:55:21 +00003857 * xmlParseSystemLiteral:
3858 * @ctxt: an XML parser context
3859 *
3860 * parse an XML Literal
3861 *
3862 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3863 *
3864 * Returns the SystemLiteral parsed or NULL
3865 */
3866
3867xmlChar *
3868xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3869 xmlChar *buf = NULL;
3870 int len = 0;
3871 int size = XML_PARSER_BUFFER_SIZE;
3872 int cur, l;
3873 xmlChar stop;
3874 int state = ctxt->instate;
3875 int count = 0;
3876
3877 SHRINK;
3878 if (RAW == '"') {
3879 NEXT;
3880 stop = '"';
3881 } else if (RAW == '\'') {
3882 NEXT;
3883 stop = '\'';
3884 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003885 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003886 return(NULL);
3887 }
3888
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003889 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003890 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003891 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003892 return(NULL);
3893 }
3894 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3895 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003896 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003897 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003898 xmlChar *tmp;
3899
Owen Taylor3473f882001-02-23 17:55:21 +00003900 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003901 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3902 if (tmp == NULL) {
3903 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003904 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003905 ctxt->instate = (xmlParserInputState) state;
3906 return(NULL);
3907 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003908 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003909 }
3910 count++;
3911 if (count > 50) {
3912 GROW;
3913 count = 0;
3914 }
3915 COPY_BUF(l,buf,len,cur);
3916 NEXTL(l);
3917 cur = CUR_CHAR(l);
3918 if (cur == 0) {
3919 GROW;
3920 SHRINK;
3921 cur = CUR_CHAR(l);
3922 }
3923 }
3924 buf[len] = 0;
3925 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003926 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003927 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003928 } else {
3929 NEXT;
3930 }
3931 return(buf);
3932}
3933
3934/**
3935 * xmlParsePubidLiteral:
3936 * @ctxt: an XML parser context
3937 *
3938 * parse an XML public literal
3939 *
3940 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3941 *
3942 * Returns the PubidLiteral parsed or NULL.
3943 */
3944
3945xmlChar *
3946xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3947 xmlChar *buf = NULL;
3948 int len = 0;
3949 int size = XML_PARSER_BUFFER_SIZE;
3950 xmlChar cur;
3951 xmlChar stop;
3952 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003953 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003954
3955 SHRINK;
3956 if (RAW == '"') {
3957 NEXT;
3958 stop = '"';
3959 } else if (RAW == '\'') {
3960 NEXT;
3961 stop = '\'';
3962 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003963 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003964 return(NULL);
3965 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003966 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003967 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003968 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003969 return(NULL);
3970 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003971 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003972 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003973 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003974 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003975 xmlChar *tmp;
3976
Owen Taylor3473f882001-02-23 17:55:21 +00003977 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003978 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3979 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003980 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003981 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003982 return(NULL);
3983 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003984 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003985 }
3986 buf[len++] = cur;
3987 count++;
3988 if (count > 50) {
3989 GROW;
3990 count = 0;
3991 }
3992 NEXT;
3993 cur = CUR;
3994 if (cur == 0) {
3995 GROW;
3996 SHRINK;
3997 cur = CUR;
3998 }
3999 }
4000 buf[len] = 0;
4001 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004002 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004003 } else {
4004 NEXT;
4005 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004006 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004007 return(buf);
4008}
4009
Daniel Veillard8ed10722009-08-20 19:17:36 +02004010static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004011
4012/*
4013 * used for the test in the inner loop of the char data testing
4014 */
4015static const unsigned char test_char_data[256] = {
4016 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4017 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4018 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4019 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4020 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4021 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4022 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4023 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4024 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4025 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4026 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4027 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4028 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4029 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4030 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4031 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4032 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4033 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4034 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4035 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4036 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4037 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4038 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4039 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4040 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4041 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4042 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4043 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4044 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4045 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4046 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4047 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4048};
4049
Owen Taylor3473f882001-02-23 17:55:21 +00004050/**
4051 * xmlParseCharData:
4052 * @ctxt: an XML parser context
4053 * @cdata: int indicating whether we are within a CDATA section
4054 *
4055 * parse a CharData section.
4056 * if we are within a CDATA section ']]>' marks an end of section.
4057 *
4058 * The right angle bracket (>) may be represented using the string "&gt;",
4059 * and must, for compatibility, be escaped using "&gt;" or a character
4060 * reference when it appears in the string "]]>" in content, when that
4061 * string is not marking the end of a CDATA section.
4062 *
4063 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4064 */
4065
4066void
4067xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004068 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004069 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004070 int line = ctxt->input->line;
4071 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004072 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004073
4074 SHRINK;
4075 GROW;
4076 /*
4077 * Accelerated common case where input don't need to be
4078 * modified before passing it to the handler.
4079 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004080 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004081 in = ctxt->input->cur;
4082 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004083get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004084 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004085 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004086 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004087 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004088 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004089 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004090 goto get_more_space;
4091 }
4092 if (*in == '<') {
4093 nbchar = in - ctxt->input->cur;
4094 if (nbchar > 0) {
4095 const xmlChar *tmp = ctxt->input->cur;
4096 ctxt->input->cur = in;
4097
Daniel Veillard34099b42004-11-04 17:34:35 +00004098 if ((ctxt->sax != NULL) &&
4099 (ctxt->sax->ignorableWhitespace !=
4100 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004101 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004102 if (ctxt->sax->ignorableWhitespace != NULL)
4103 ctxt->sax->ignorableWhitespace(ctxt->userData,
4104 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004105 } else {
4106 if (ctxt->sax->characters != NULL)
4107 ctxt->sax->characters(ctxt->userData,
4108 tmp, nbchar);
4109 if (*ctxt->space == -1)
4110 *ctxt->space = -2;
4111 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004112 } else if ((ctxt->sax != NULL) &&
4113 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004114 ctxt->sax->characters(ctxt->userData,
4115 tmp, nbchar);
4116 }
4117 }
4118 return;
4119 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004120
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004121get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004122 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004123 while (test_char_data[*in]) {
4124 in++;
4125 ccol++;
4126 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004127 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004128 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004129 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004130 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004131 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004132 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004133 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004134 }
4135 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004136 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004137 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004138 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004139 return;
4140 }
4141 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004142 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004143 goto get_more;
4144 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004145 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004146 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004147 if ((ctxt->sax != NULL) &&
4148 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004149 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004150 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004151 const xmlChar *tmp = ctxt->input->cur;
4152 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004153
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004154 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004155 if (ctxt->sax->ignorableWhitespace != NULL)
4156 ctxt->sax->ignorableWhitespace(ctxt->userData,
4157 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004158 } else {
4159 if (ctxt->sax->characters != NULL)
4160 ctxt->sax->characters(ctxt->userData,
4161 tmp, nbchar);
4162 if (*ctxt->space == -1)
4163 *ctxt->space = -2;
4164 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004165 line = ctxt->input->line;
4166 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004167 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004168 if (ctxt->sax->characters != NULL)
4169 ctxt->sax->characters(ctxt->userData,
4170 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004171 line = ctxt->input->line;
4172 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004173 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004174 /* something really bad happened in the SAX callback */
4175 if (ctxt->instate != XML_PARSER_CONTENT)
4176 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004177 }
4178 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004179 if (*in == 0xD) {
4180 in++;
4181 if (*in == 0xA) {
4182 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004183 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004184 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004185 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004186 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004187 in--;
4188 }
4189 if (*in == '<') {
4190 return;
4191 }
4192 if (*in == '&') {
4193 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004194 }
4195 SHRINK;
4196 GROW;
4197 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004198 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004199 nbchar = 0;
4200 }
Daniel Veillard50582112001-03-26 22:52:16 +00004201 ctxt->input->line = line;
4202 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004203 xmlParseCharDataComplex(ctxt, cdata);
4204}
4205
Daniel Veillard01c13b52002-12-10 15:19:08 +00004206/**
4207 * xmlParseCharDataComplex:
4208 * @ctxt: an XML parser context
4209 * @cdata: int indicating whether we are within a CDATA section
4210 *
4211 * parse a CharData section.this is the fallback function
4212 * of xmlParseCharData() when the parsing requires handling
4213 * of non-ASCII characters.
4214 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004215static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004216xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004217 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4218 int nbchar = 0;
4219 int cur, l;
4220 int count = 0;
4221
4222 SHRINK;
4223 GROW;
4224 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004225 while ((cur != '<') && /* checked */
4226 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004227 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004228 if ((cur == ']') && (NXT(1) == ']') &&
4229 (NXT(2) == '>')) {
4230 if (cdata) break;
4231 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004232 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004233 }
4234 }
4235 COPY_BUF(l,buf,nbchar,cur);
4236 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004237 buf[nbchar] = 0;
4238
Owen Taylor3473f882001-02-23 17:55:21 +00004239 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004240 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004241 */
4242 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004243 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004244 if (ctxt->sax->ignorableWhitespace != NULL)
4245 ctxt->sax->ignorableWhitespace(ctxt->userData,
4246 buf, nbchar);
4247 } else {
4248 if (ctxt->sax->characters != NULL)
4249 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004250 if ((ctxt->sax->characters !=
4251 ctxt->sax->ignorableWhitespace) &&
4252 (*ctxt->space == -1))
4253 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004254 }
4255 }
4256 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004257 /* something really bad happened in the SAX callback */
4258 if (ctxt->instate != XML_PARSER_CONTENT)
4259 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004260 }
4261 count++;
4262 if (count > 50) {
4263 GROW;
4264 count = 0;
4265 }
4266 NEXTL(l);
4267 cur = CUR_CHAR(l);
4268 }
4269 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004270 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004271 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004272 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004273 */
4274 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004275 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004276 if (ctxt->sax->ignorableWhitespace != NULL)
4277 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4278 } else {
4279 if (ctxt->sax->characters != NULL)
4280 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004281 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4282 (*ctxt->space == -1))
4283 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004284 }
4285 }
4286 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004287 if ((cur != 0) && (!IS_CHAR(cur))) {
4288 /* Generate the error and skip the offending character */
4289 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4290 "PCDATA invalid Char value %d\n",
4291 cur);
4292 NEXTL(l);
4293 }
Owen Taylor3473f882001-02-23 17:55:21 +00004294}
4295
4296/**
4297 * xmlParseExternalID:
4298 * @ctxt: an XML parser context
4299 * @publicID: a xmlChar** receiving PubidLiteral
4300 * @strict: indicate whether we should restrict parsing to only
4301 * production [75], see NOTE below
4302 *
4303 * Parse an External ID or a Public ID
4304 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004305 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004306 * 'PUBLIC' S PubidLiteral S SystemLiteral
4307 *
4308 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4309 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4310 *
4311 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4312 *
4313 * Returns the function returns SystemLiteral and in the second
4314 * case publicID receives PubidLiteral, is strict is off
4315 * it is possible to return NULL and have publicID set.
4316 */
4317
4318xmlChar *
4319xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4320 xmlChar *URI = NULL;
4321
4322 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004323
4324 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004325 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004326 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004327 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004328 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4329 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004330 }
4331 SKIP_BLANKS;
4332 URI = xmlParseSystemLiteral(ctxt);
4333 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004334 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004335 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004336 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004337 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004338 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004339 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004340 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004341 }
4342 SKIP_BLANKS;
4343 *publicID = xmlParsePubidLiteral(ctxt);
4344 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004345 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004346 }
4347 if (strict) {
4348 /*
4349 * We don't handle [83] so "S SystemLiteral" is required.
4350 */
William M. Brack76e95df2003-10-18 16:20:14 +00004351 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004352 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004353 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004354 }
4355 } else {
4356 /*
4357 * We handle [83] so we return immediately, if
4358 * "S SystemLiteral" is not detected. From a purely parsing
4359 * point of view that's a nice mess.
4360 */
4361 const xmlChar *ptr;
4362 GROW;
4363
4364 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004365 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004366
William M. Brack76e95df2003-10-18 16:20:14 +00004367 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004368 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4369 }
4370 SKIP_BLANKS;
4371 URI = xmlParseSystemLiteral(ctxt);
4372 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004373 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004374 }
4375 }
4376 return(URI);
4377}
4378
4379/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004380 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004381 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004382 * @buf: the already parsed part of the buffer
4383 * @len: number of bytes filles in the buffer
4384 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004385 *
4386 * Skip an XML (SGML) comment <!-- .... -->
4387 * The spec says that "For compatibility, the string "--" (double-hyphen)
4388 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004389 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004390 *
4391 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4392 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004393static void
4394xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004395 int q, ql;
4396 int r, rl;
4397 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004398 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004399 int inputid;
4400
4401 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004402
Owen Taylor3473f882001-02-23 17:55:21 +00004403 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004404 len = 0;
4405 size = XML_PARSER_BUFFER_SIZE;
4406 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4407 if (buf == NULL) {
4408 xmlErrMemory(ctxt, NULL);
4409 return;
4410 }
Owen Taylor3473f882001-02-23 17:55:21 +00004411 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004412 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004413 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004414 if (q == 0)
4415 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004416 if (!IS_CHAR(q)) {
4417 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4418 "xmlParseComment: invalid xmlChar value %d\n",
4419 q);
4420 xmlFree (buf);
4421 return;
4422 }
Owen Taylor3473f882001-02-23 17:55:21 +00004423 NEXTL(ql);
4424 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004425 if (r == 0)
4426 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004427 if (!IS_CHAR(r)) {
4428 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4429 "xmlParseComment: invalid xmlChar value %d\n",
4430 q);
4431 xmlFree (buf);
4432 return;
4433 }
Owen Taylor3473f882001-02-23 17:55:21 +00004434 NEXTL(rl);
4435 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004436 if (cur == 0)
4437 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004438 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004439 ((cur != '>') ||
4440 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004441 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004442 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004443 }
4444 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004445 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004446 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004447 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4448 if (new_buf == NULL) {
4449 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004450 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004451 return;
4452 }
William M. Bracka3215c72004-07-31 16:24:01 +00004453 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004454 }
4455 COPY_BUF(ql,buf,len,q);
4456 q = r;
4457 ql = rl;
4458 r = cur;
4459 rl = l;
4460
4461 count++;
4462 if (count > 50) {
4463 GROW;
4464 count = 0;
4465 }
4466 NEXTL(l);
4467 cur = CUR_CHAR(l);
4468 if (cur == 0) {
4469 SHRINK;
4470 GROW;
4471 cur = CUR_CHAR(l);
4472 }
4473 }
4474 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004475 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004476 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004477 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004478 } else if (!IS_CHAR(cur)) {
4479 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4480 "xmlParseComment: invalid xmlChar value %d\n",
4481 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004482 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004483 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004484 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4485 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004486 }
4487 NEXT;
4488 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4489 (!ctxt->disableSAX))
4490 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004491 }
Daniel Veillardda629342007-08-01 07:49:06 +00004492 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004493 return;
4494not_terminated:
4495 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4496 "Comment not terminated\n", NULL);
4497 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004498 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004499}
Daniel Veillardda629342007-08-01 07:49:06 +00004500
Daniel Veillard4c778d82005-01-23 17:37:44 +00004501/**
4502 * xmlParseComment:
4503 * @ctxt: an XML parser context
4504 *
4505 * Skip an XML (SGML) comment <!-- .... -->
4506 * The spec says that "For compatibility, the string "--" (double-hyphen)
4507 * must not occur within comments. "
4508 *
4509 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4510 */
4511void
4512xmlParseComment(xmlParserCtxtPtr ctxt) {
4513 xmlChar *buf = NULL;
4514 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004515 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004516 xmlParserInputState state;
4517 const xmlChar *in;
4518 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004519 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004520
4521 /*
4522 * Check that there is a comment right here.
4523 */
4524 if ((RAW != '<') || (NXT(1) != '!') ||
4525 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004526 state = ctxt->instate;
4527 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004528 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004529 SKIP(4);
4530 SHRINK;
4531 GROW;
4532
4533 /*
4534 * Accelerated common case where input don't need to be
4535 * modified before passing it to the handler.
4536 */
4537 in = ctxt->input->cur;
4538 do {
4539 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004540 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004541 ctxt->input->line++; ctxt->input->col = 1;
4542 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004543 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004544 }
4545get_more:
4546 ccol = ctxt->input->col;
4547 while (((*in > '-') && (*in <= 0x7F)) ||
4548 ((*in >= 0x20) && (*in < '-')) ||
4549 (*in == 0x09)) {
4550 in++;
4551 ccol++;
4552 }
4553 ctxt->input->col = ccol;
4554 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004555 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004556 ctxt->input->line++; ctxt->input->col = 1;
4557 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004558 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004559 goto get_more;
4560 }
4561 nbchar = in - ctxt->input->cur;
4562 /*
4563 * save current set of data
4564 */
4565 if (nbchar > 0) {
4566 if ((ctxt->sax != NULL) &&
4567 (ctxt->sax->comment != NULL)) {
4568 if (buf == NULL) {
4569 if ((*in == '-') && (in[1] == '-'))
4570 size = nbchar + 1;
4571 else
4572 size = XML_PARSER_BUFFER_SIZE + nbchar;
4573 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4574 if (buf == NULL) {
4575 xmlErrMemory(ctxt, NULL);
4576 ctxt->instate = state;
4577 return;
4578 }
4579 len = 0;
4580 } else if (len + nbchar + 1 >= size) {
4581 xmlChar *new_buf;
4582 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4583 new_buf = (xmlChar *) xmlRealloc(buf,
4584 size * sizeof(xmlChar));
4585 if (new_buf == NULL) {
4586 xmlFree (buf);
4587 xmlErrMemory(ctxt, NULL);
4588 ctxt->instate = state;
4589 return;
4590 }
4591 buf = new_buf;
4592 }
4593 memcpy(&buf[len], ctxt->input->cur, nbchar);
4594 len += nbchar;
4595 buf[len] = 0;
4596 }
4597 }
4598 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004599 if (*in == 0xA) {
4600 in++;
4601 ctxt->input->line++; ctxt->input->col = 1;
4602 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004603 if (*in == 0xD) {
4604 in++;
4605 if (*in == 0xA) {
4606 ctxt->input->cur = in;
4607 in++;
4608 ctxt->input->line++; ctxt->input->col = 1;
4609 continue; /* while */
4610 }
4611 in--;
4612 }
4613 SHRINK;
4614 GROW;
4615 in = ctxt->input->cur;
4616 if (*in == '-') {
4617 if (in[1] == '-') {
4618 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004619 if (ctxt->input->id != inputid) {
4620 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4621 "comment doesn't start and stop in the same entity\n");
4622 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004623 SKIP(3);
4624 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4625 (!ctxt->disableSAX)) {
4626 if (buf != NULL)
4627 ctxt->sax->comment(ctxt->userData, buf);
4628 else
4629 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4630 }
4631 if (buf != NULL)
4632 xmlFree(buf);
4633 ctxt->instate = state;
4634 return;
4635 }
4636 if (buf != NULL)
4637 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4638 "Comment not terminated \n<!--%.50s\n",
4639 buf);
4640 else
4641 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4642 "Comment not terminated \n", NULL);
4643 in++;
4644 ctxt->input->col++;
4645 }
4646 in++;
4647 ctxt->input->col++;
4648 goto get_more;
4649 }
4650 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4651 xmlParseCommentComplex(ctxt, buf, len, size);
4652 ctxt->instate = state;
4653 return;
4654}
4655
Owen Taylor3473f882001-02-23 17:55:21 +00004656
4657/**
4658 * xmlParsePITarget:
4659 * @ctxt: an XML parser context
4660 *
4661 * parse the name of a PI
4662 *
4663 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4664 *
4665 * Returns the PITarget name or NULL
4666 */
4667
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004668const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004669xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004670 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004671
4672 name = xmlParseName(ctxt);
4673 if ((name != NULL) &&
4674 ((name[0] == 'x') || (name[0] == 'X')) &&
4675 ((name[1] == 'm') || (name[1] == 'M')) &&
4676 ((name[2] == 'l') || (name[2] == 'L'))) {
4677 int i;
4678 if ((name[0] == 'x') && (name[1] == 'm') &&
4679 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004680 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004681 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004682 return(name);
4683 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004684 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004685 return(name);
4686 }
4687 for (i = 0;;i++) {
4688 if (xmlW3CPIs[i] == NULL) break;
4689 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4690 return(name);
4691 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004692 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4693 "xmlParsePITarget: invalid name prefix 'xml'\n",
4694 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004695 }
Daniel Veillard37334572008-07-31 08:20:02 +00004696 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4697 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4698 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4699 }
Owen Taylor3473f882001-02-23 17:55:21 +00004700 return(name);
4701}
4702
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004703#ifdef LIBXML_CATALOG_ENABLED
4704/**
4705 * xmlParseCatalogPI:
4706 * @ctxt: an XML parser context
4707 * @catalog: the PI value string
4708 *
4709 * parse an XML Catalog Processing Instruction.
4710 *
4711 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4712 *
4713 * Occurs only if allowed by the user and if happening in the Misc
4714 * part of the document before any doctype informations
4715 * This will add the given catalog to the parsing context in order
4716 * to be used if there is a resolution need further down in the document
4717 */
4718
4719static void
4720xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4721 xmlChar *URL = NULL;
4722 const xmlChar *tmp, *base;
4723 xmlChar marker;
4724
4725 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004726 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004727 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4728 goto error;
4729 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004730 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004731 if (*tmp != '=') {
4732 return;
4733 }
4734 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004735 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004736 marker = *tmp;
4737 if ((marker != '\'') && (marker != '"'))
4738 goto error;
4739 tmp++;
4740 base = tmp;
4741 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4742 if (*tmp == 0)
4743 goto error;
4744 URL = xmlStrndup(base, tmp - base);
4745 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004746 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004747 if (*tmp != 0)
4748 goto error;
4749
4750 if (URL != NULL) {
4751 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4752 xmlFree(URL);
4753 }
4754 return;
4755
4756error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004757 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4758 "Catalog PI syntax error: %s\n",
4759 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004760 if (URL != NULL)
4761 xmlFree(URL);
4762}
4763#endif
4764
Owen Taylor3473f882001-02-23 17:55:21 +00004765/**
4766 * xmlParsePI:
4767 * @ctxt: an XML parser context
4768 *
4769 * parse an XML Processing Instruction.
4770 *
4771 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4772 *
4773 * The processing is transfered to SAX once parsed.
4774 */
4775
4776void
4777xmlParsePI(xmlParserCtxtPtr ctxt) {
4778 xmlChar *buf = NULL;
4779 int len = 0;
4780 int size = XML_PARSER_BUFFER_SIZE;
4781 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004782 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004783 xmlParserInputState state;
4784 int count = 0;
4785
4786 if ((RAW == '<') && (NXT(1) == '?')) {
4787 xmlParserInputPtr input = ctxt->input;
4788 state = ctxt->instate;
4789 ctxt->instate = XML_PARSER_PI;
4790 /*
4791 * this is a Processing Instruction.
4792 */
4793 SKIP(2);
4794 SHRINK;
4795
4796 /*
4797 * Parse the target name and check for special support like
4798 * namespace.
4799 */
4800 target = xmlParsePITarget(ctxt);
4801 if (target != NULL) {
4802 if ((RAW == '?') && (NXT(1) == '>')) {
4803 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004804 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4805 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004806 }
4807 SKIP(2);
4808
4809 /*
4810 * SAX: PI detected.
4811 */
4812 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4813 (ctxt->sax->processingInstruction != NULL))
4814 ctxt->sax->processingInstruction(ctxt->userData,
4815 target, NULL);
4816 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004817 return;
4818 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004819 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004820 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004821 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004822 ctxt->instate = state;
4823 return;
4824 }
4825 cur = CUR;
4826 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004827 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4828 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004829 }
4830 SKIP_BLANKS;
4831 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004832 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004833 ((cur != '?') || (NXT(1) != '>'))) {
4834 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004835 xmlChar *tmp;
4836
Owen Taylor3473f882001-02-23 17:55:21 +00004837 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004838 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4839 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004840 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004841 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004842 ctxt->instate = state;
4843 return;
4844 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004845 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004846 }
4847 count++;
4848 if (count > 50) {
4849 GROW;
4850 count = 0;
4851 }
4852 COPY_BUF(l,buf,len,cur);
4853 NEXTL(l);
4854 cur = CUR_CHAR(l);
4855 if (cur == 0) {
4856 SHRINK;
4857 GROW;
4858 cur = CUR_CHAR(l);
4859 }
4860 }
4861 buf[len] = 0;
4862 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004863 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4864 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004865 } else {
4866 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004867 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4868 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004869 }
4870 SKIP(2);
4871
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004872#ifdef LIBXML_CATALOG_ENABLED
4873 if (((state == XML_PARSER_MISC) ||
4874 (state == XML_PARSER_START)) &&
4875 (xmlStrEqual(target, XML_CATALOG_PI))) {
4876 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4877 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4878 (allow == XML_CATA_ALLOW_ALL))
4879 xmlParseCatalogPI(ctxt, buf);
4880 }
4881#endif
4882
4883
Owen Taylor3473f882001-02-23 17:55:21 +00004884 /*
4885 * SAX: PI detected.
4886 */
4887 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4888 (ctxt->sax->processingInstruction != NULL))
4889 ctxt->sax->processingInstruction(ctxt->userData,
4890 target, buf);
4891 }
4892 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004893 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004894 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004895 }
4896 ctxt->instate = state;
4897 }
4898}
4899
4900/**
4901 * xmlParseNotationDecl:
4902 * @ctxt: an XML parser context
4903 *
4904 * parse a notation declaration
4905 *
4906 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4907 *
4908 * Hence there is actually 3 choices:
4909 * 'PUBLIC' S PubidLiteral
4910 * 'PUBLIC' S PubidLiteral S SystemLiteral
4911 * and 'SYSTEM' S SystemLiteral
4912 *
4913 * See the NOTE on xmlParseExternalID().
4914 */
4915
4916void
4917xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004918 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004919 xmlChar *Pubid;
4920 xmlChar *Systemid;
4921
Daniel Veillarda07050d2003-10-19 14:46:32 +00004922 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004923 xmlParserInputPtr input = ctxt->input;
4924 SHRINK;
4925 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004926 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004927 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4928 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004929 return;
4930 }
4931 SKIP_BLANKS;
4932
Daniel Veillard76d66f42001-05-16 21:05:17 +00004933 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004934 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004935 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004936 return;
4937 }
William M. Brack76e95df2003-10-18 16:20:14 +00004938 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004939 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004940 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004941 return;
4942 }
Daniel Veillard37334572008-07-31 08:20:02 +00004943 if (xmlStrchr(name, ':') != NULL) {
4944 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4945 "colon are forbidden from notation names '%s'\n",
4946 name, NULL, NULL);
4947 }
Owen Taylor3473f882001-02-23 17:55:21 +00004948 SKIP_BLANKS;
4949
4950 /*
4951 * Parse the IDs.
4952 */
4953 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4954 SKIP_BLANKS;
4955
4956 if (RAW == '>') {
4957 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004958 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4959 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004960 }
4961 NEXT;
4962 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4963 (ctxt->sax->notationDecl != NULL))
4964 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4965 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004966 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004967 }
Owen Taylor3473f882001-02-23 17:55:21 +00004968 if (Systemid != NULL) xmlFree(Systemid);
4969 if (Pubid != NULL) xmlFree(Pubid);
4970 }
4971}
4972
4973/**
4974 * xmlParseEntityDecl:
4975 * @ctxt: an XML parser context
4976 *
4977 * parse <!ENTITY declarations
4978 *
4979 * [70] EntityDecl ::= GEDecl | PEDecl
4980 *
4981 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4982 *
4983 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4984 *
4985 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4986 *
4987 * [74] PEDef ::= EntityValue | ExternalID
4988 *
4989 * [76] NDataDecl ::= S 'NDATA' S Name
4990 *
4991 * [ VC: Notation Declared ]
4992 * The Name must match the declared name of a notation.
4993 */
4994
4995void
4996xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004997 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004998 xmlChar *value = NULL;
4999 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005000 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005001 int isParameter = 0;
5002 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005003 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005004
Daniel Veillard4c778d82005-01-23 17:37:44 +00005005 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005006 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005007 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005008 SHRINK;
5009 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005010 skipped = SKIP_BLANKS;
5011 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005012 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5013 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005014 }
Owen Taylor3473f882001-02-23 17:55:21 +00005015
5016 if (RAW == '%') {
5017 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005018 skipped = SKIP_BLANKS;
5019 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5021 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005022 }
Owen Taylor3473f882001-02-23 17:55:21 +00005023 isParameter = 1;
5024 }
5025
Daniel Veillard76d66f42001-05-16 21:05:17 +00005026 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005027 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005028 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5029 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005030 return;
5031 }
Daniel Veillard37334572008-07-31 08:20:02 +00005032 if (xmlStrchr(name, ':') != NULL) {
5033 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5034 "colon are forbidden from entities names '%s'\n",
5035 name, NULL, NULL);
5036 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005037 skipped = SKIP_BLANKS;
5038 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005039 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5040 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005041 }
Owen Taylor3473f882001-02-23 17:55:21 +00005042
Daniel Veillardf5582f12002-06-11 10:08:16 +00005043 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005044 /*
5045 * handle the various case of definitions...
5046 */
5047 if (isParameter) {
5048 if ((RAW == '"') || (RAW == '\'')) {
5049 value = xmlParseEntityValue(ctxt, &orig);
5050 if (value) {
5051 if ((ctxt->sax != NULL) &&
5052 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5053 ctxt->sax->entityDecl(ctxt->userData, name,
5054 XML_INTERNAL_PARAMETER_ENTITY,
5055 NULL, NULL, value);
5056 }
5057 } else {
5058 URI = xmlParseExternalID(ctxt, &literal, 1);
5059 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005060 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005061 }
5062 if (URI) {
5063 xmlURIPtr uri;
5064
5065 uri = xmlParseURI((const char *) URI);
5066 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005067 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5068 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005069 /*
5070 * This really ought to be a well formedness error
5071 * but the XML Core WG decided otherwise c.f. issue
5072 * E26 of the XML erratas.
5073 */
Owen Taylor3473f882001-02-23 17:55:21 +00005074 } else {
5075 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005076 /*
5077 * Okay this is foolish to block those but not
5078 * invalid URIs.
5079 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005080 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005081 } else {
5082 if ((ctxt->sax != NULL) &&
5083 (!ctxt->disableSAX) &&
5084 (ctxt->sax->entityDecl != NULL))
5085 ctxt->sax->entityDecl(ctxt->userData, name,
5086 XML_EXTERNAL_PARAMETER_ENTITY,
5087 literal, URI, NULL);
5088 }
5089 xmlFreeURI(uri);
5090 }
5091 }
5092 }
5093 } else {
5094 if ((RAW == '"') || (RAW == '\'')) {
5095 value = xmlParseEntityValue(ctxt, &orig);
5096 if ((ctxt->sax != NULL) &&
5097 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5098 ctxt->sax->entityDecl(ctxt->userData, name,
5099 XML_INTERNAL_GENERAL_ENTITY,
5100 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005101 /*
5102 * For expat compatibility in SAX mode.
5103 */
5104 if ((ctxt->myDoc == NULL) ||
5105 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5106 if (ctxt->myDoc == NULL) {
5107 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005108 if (ctxt->myDoc == NULL) {
5109 xmlErrMemory(ctxt, "New Doc failed");
5110 return;
5111 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005112 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005113 }
5114 if (ctxt->myDoc->intSubset == NULL)
5115 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5116 BAD_CAST "fake", NULL, NULL);
5117
Daniel Veillard1af9a412003-08-20 22:54:39 +00005118 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5119 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005120 }
Owen Taylor3473f882001-02-23 17:55:21 +00005121 } else {
5122 URI = xmlParseExternalID(ctxt, &literal, 1);
5123 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005124 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005125 }
5126 if (URI) {
5127 xmlURIPtr uri;
5128
5129 uri = xmlParseURI((const char *)URI);
5130 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005131 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5132 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005133 /*
5134 * This really ought to be a well formedness error
5135 * but the XML Core WG decided otherwise c.f. issue
5136 * E26 of the XML erratas.
5137 */
Owen Taylor3473f882001-02-23 17:55:21 +00005138 } else {
5139 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005140 /*
5141 * Okay this is foolish to block those but not
5142 * invalid URIs.
5143 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005144 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005145 }
5146 xmlFreeURI(uri);
5147 }
5148 }
William M. Brack76e95df2003-10-18 16:20:14 +00005149 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005150 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5151 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005152 }
5153 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005154 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005155 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005156 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005157 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5158 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005159 }
5160 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005161 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005162 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5163 (ctxt->sax->unparsedEntityDecl != NULL))
5164 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5165 literal, URI, ndata);
5166 } else {
5167 if ((ctxt->sax != NULL) &&
5168 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5169 ctxt->sax->entityDecl(ctxt->userData, name,
5170 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5171 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005172 /*
5173 * For expat compatibility in SAX mode.
5174 * assuming the entity repalcement was asked for
5175 */
5176 if ((ctxt->replaceEntities != 0) &&
5177 ((ctxt->myDoc == NULL) ||
5178 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5179 if (ctxt->myDoc == NULL) {
5180 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005181 if (ctxt->myDoc == NULL) {
5182 xmlErrMemory(ctxt, "New Doc failed");
5183 return;
5184 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005185 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005186 }
5187
5188 if (ctxt->myDoc->intSubset == NULL)
5189 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5190 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005191 xmlSAX2EntityDecl(ctxt, name,
5192 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5193 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005194 }
Owen Taylor3473f882001-02-23 17:55:21 +00005195 }
5196 }
5197 }
5198 SKIP_BLANKS;
5199 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005200 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005201 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005202 } else {
5203 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005204 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5205 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005206 }
5207 NEXT;
5208 }
5209 if (orig != NULL) {
5210 /*
5211 * Ugly mechanism to save the raw entity value.
5212 */
5213 xmlEntityPtr cur = NULL;
5214
5215 if (isParameter) {
5216 if ((ctxt->sax != NULL) &&
5217 (ctxt->sax->getParameterEntity != NULL))
5218 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5219 } else {
5220 if ((ctxt->sax != NULL) &&
5221 (ctxt->sax->getEntity != NULL))
5222 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005223 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005224 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005225 }
Owen Taylor3473f882001-02-23 17:55:21 +00005226 }
5227 if (cur != NULL) {
5228 if (cur->orig != NULL)
5229 xmlFree(orig);
5230 else
5231 cur->orig = orig;
5232 } else
5233 xmlFree(orig);
5234 }
Owen Taylor3473f882001-02-23 17:55:21 +00005235 if (value != NULL) xmlFree(value);
5236 if (URI != NULL) xmlFree(URI);
5237 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005238 }
5239}
5240
5241/**
5242 * xmlParseDefaultDecl:
5243 * @ctxt: an XML parser context
5244 * @value: Receive a possible fixed default value for the attribute
5245 *
5246 * Parse an attribute default declaration
5247 *
5248 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5249 *
5250 * [ VC: Required Attribute ]
5251 * if the default declaration is the keyword #REQUIRED, then the
5252 * attribute must be specified for all elements of the type in the
5253 * attribute-list declaration.
5254 *
5255 * [ VC: Attribute Default Legal ]
5256 * The declared default value must meet the lexical constraints of
5257 * the declared attribute type c.f. xmlValidateAttributeDecl()
5258 *
5259 * [ VC: Fixed Attribute Default ]
5260 * if an attribute has a default value declared with the #FIXED
5261 * keyword, instances of that attribute must match the default value.
5262 *
5263 * [ WFC: No < in Attribute Values ]
5264 * handled in xmlParseAttValue()
5265 *
5266 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5267 * or XML_ATTRIBUTE_FIXED.
5268 */
5269
5270int
5271xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5272 int val;
5273 xmlChar *ret;
5274
5275 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005276 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005277 SKIP(9);
5278 return(XML_ATTRIBUTE_REQUIRED);
5279 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005280 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005281 SKIP(8);
5282 return(XML_ATTRIBUTE_IMPLIED);
5283 }
5284 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005285 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005286 SKIP(6);
5287 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005288 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005289 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5290 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005291 }
5292 SKIP_BLANKS;
5293 }
5294 ret = xmlParseAttValue(ctxt);
5295 ctxt->instate = XML_PARSER_DTD;
5296 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005297 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005298 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005299 } else
5300 *value = ret;
5301 return(val);
5302}
5303
5304/**
5305 * xmlParseNotationType:
5306 * @ctxt: an XML parser context
5307 *
5308 * parse an Notation attribute type.
5309 *
5310 * Note: the leading 'NOTATION' S part has already being parsed...
5311 *
5312 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5313 *
5314 * [ VC: Notation Attributes ]
5315 * Values of this type must match one of the notation names included
5316 * in the declaration; all notation names in the declaration must be declared.
5317 *
5318 * Returns: the notation attribute tree built while parsing
5319 */
5320
5321xmlEnumerationPtr
5322xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005323 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005324 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005325
5326 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005327 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005328 return(NULL);
5329 }
5330 SHRINK;
5331 do {
5332 NEXT;
5333 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005334 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005335 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005336 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5337 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005338 xmlFreeEnumeration(ret);
5339 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005340 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005341 tmp = ret;
5342 while (tmp != NULL) {
5343 if (xmlStrEqual(name, tmp->name)) {
5344 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5345 "standalone: attribute notation value token %s duplicated\n",
5346 name, NULL);
5347 if (!xmlDictOwns(ctxt->dict, name))
5348 xmlFree((xmlChar *) name);
5349 break;
5350 }
5351 tmp = tmp->next;
5352 }
5353 if (tmp == NULL) {
5354 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005355 if (cur == NULL) {
5356 xmlFreeEnumeration(ret);
5357 return(NULL);
5358 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005359 if (last == NULL) ret = last = cur;
5360 else {
5361 last->next = cur;
5362 last = cur;
5363 }
Owen Taylor3473f882001-02-23 17:55:21 +00005364 }
5365 SKIP_BLANKS;
5366 } while (RAW == '|');
5367 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005368 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005369 xmlFreeEnumeration(ret);
5370 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005371 }
5372 NEXT;
5373 return(ret);
5374}
5375
5376/**
5377 * xmlParseEnumerationType:
5378 * @ctxt: an XML parser context
5379 *
5380 * parse an Enumeration attribute type.
5381 *
5382 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5383 *
5384 * [ VC: Enumeration ]
5385 * Values of this type must match one of the Nmtoken tokens in
5386 * the declaration
5387 *
5388 * Returns: the enumeration attribute tree built while parsing
5389 */
5390
5391xmlEnumerationPtr
5392xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5393 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005394 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005395
5396 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005397 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005398 return(NULL);
5399 }
5400 SHRINK;
5401 do {
5402 NEXT;
5403 SKIP_BLANKS;
5404 name = xmlParseNmtoken(ctxt);
5405 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005406 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005407 return(ret);
5408 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005409 tmp = ret;
5410 while (tmp != NULL) {
5411 if (xmlStrEqual(name, tmp->name)) {
5412 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5413 "standalone: attribute enumeration value token %s duplicated\n",
5414 name, NULL);
5415 if (!xmlDictOwns(ctxt->dict, name))
5416 xmlFree(name);
5417 break;
5418 }
5419 tmp = tmp->next;
5420 }
5421 if (tmp == NULL) {
5422 cur = xmlCreateEnumeration(name);
5423 if (!xmlDictOwns(ctxt->dict, name))
5424 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005425 if (cur == NULL) {
5426 xmlFreeEnumeration(ret);
5427 return(NULL);
5428 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005429 if (last == NULL) ret = last = cur;
5430 else {
5431 last->next = cur;
5432 last = cur;
5433 }
Owen Taylor3473f882001-02-23 17:55:21 +00005434 }
5435 SKIP_BLANKS;
5436 } while (RAW == '|');
5437 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005438 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005439 return(ret);
5440 }
5441 NEXT;
5442 return(ret);
5443}
5444
5445/**
5446 * xmlParseEnumeratedType:
5447 * @ctxt: an XML parser context
5448 * @tree: the enumeration tree built while parsing
5449 *
5450 * parse an Enumerated attribute type.
5451 *
5452 * [57] EnumeratedType ::= NotationType | Enumeration
5453 *
5454 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5455 *
5456 *
5457 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5458 */
5459
5460int
5461xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005462 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005463 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005464 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005465 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5466 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005467 return(0);
5468 }
5469 SKIP_BLANKS;
5470 *tree = xmlParseNotationType(ctxt);
5471 if (*tree == NULL) return(0);
5472 return(XML_ATTRIBUTE_NOTATION);
5473 }
5474 *tree = xmlParseEnumerationType(ctxt);
5475 if (*tree == NULL) return(0);
5476 return(XML_ATTRIBUTE_ENUMERATION);
5477}
5478
5479/**
5480 * xmlParseAttributeType:
5481 * @ctxt: an XML parser context
5482 * @tree: the enumeration tree built while parsing
5483 *
5484 * parse the Attribute list def for an element
5485 *
5486 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5487 *
5488 * [55] StringType ::= 'CDATA'
5489 *
5490 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5491 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5492 *
5493 * Validity constraints for attribute values syntax are checked in
5494 * xmlValidateAttributeValue()
5495 *
5496 * [ VC: ID ]
5497 * Values of type ID must match the Name production. A name must not
5498 * appear more than once in an XML document as a value of this type;
5499 * i.e., ID values must uniquely identify the elements which bear them.
5500 *
5501 * [ VC: One ID per Element Type ]
5502 * No element type may have more than one ID attribute specified.
5503 *
5504 * [ VC: ID Attribute Default ]
5505 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5506 *
5507 * [ VC: IDREF ]
5508 * Values of type IDREF must match the Name production, and values
5509 * of type IDREFS must match Names; each IDREF Name must match the value
5510 * of an ID attribute on some element in the XML document; i.e. IDREF
5511 * values must match the value of some ID attribute.
5512 *
5513 * [ VC: Entity Name ]
5514 * Values of type ENTITY must match the Name production, values
5515 * of type ENTITIES must match Names; each Entity Name must match the
5516 * name of an unparsed entity declared in the DTD.
5517 *
5518 * [ VC: Name Token ]
5519 * Values of type NMTOKEN must match the Nmtoken production; values
5520 * of type NMTOKENS must match Nmtokens.
5521 *
5522 * Returns the attribute type
5523 */
5524int
5525xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5526 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005527 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005528 SKIP(5);
5529 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005530 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005531 SKIP(6);
5532 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005533 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005534 SKIP(5);
5535 return(XML_ATTRIBUTE_IDREF);
5536 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5537 SKIP(2);
5538 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005539 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005540 SKIP(6);
5541 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005542 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005543 SKIP(8);
5544 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005545 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005546 SKIP(8);
5547 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005548 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005549 SKIP(7);
5550 return(XML_ATTRIBUTE_NMTOKEN);
5551 }
5552 return(xmlParseEnumeratedType(ctxt, tree));
5553}
5554
5555/**
5556 * xmlParseAttributeListDecl:
5557 * @ctxt: an XML parser context
5558 *
5559 * : parse the Attribute list def for an element
5560 *
5561 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5562 *
5563 * [53] AttDef ::= S Name S AttType S DefaultDecl
5564 *
5565 */
5566void
5567xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005568 const xmlChar *elemName;
5569 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005570 xmlEnumerationPtr tree;
5571
Daniel Veillarda07050d2003-10-19 14:46:32 +00005572 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005573 xmlParserInputPtr input = ctxt->input;
5574
5575 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005576 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005577 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005578 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005579 }
5580 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005581 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005582 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005583 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5584 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005585 return;
5586 }
5587 SKIP_BLANKS;
5588 GROW;
5589 while (RAW != '>') {
5590 const xmlChar *check = CUR_PTR;
5591 int type;
5592 int def;
5593 xmlChar *defaultValue = NULL;
5594
5595 GROW;
5596 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005597 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005598 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005599 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5600 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005601 break;
5602 }
5603 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005604 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005605 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005606 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005607 break;
5608 }
5609 SKIP_BLANKS;
5610
5611 type = xmlParseAttributeType(ctxt, &tree);
5612 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005613 break;
5614 }
5615
5616 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005617 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005618 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5619 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005620 if (tree != NULL)
5621 xmlFreeEnumeration(tree);
5622 break;
5623 }
5624 SKIP_BLANKS;
5625
5626 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5627 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005628 if (defaultValue != NULL)
5629 xmlFree(defaultValue);
5630 if (tree != NULL)
5631 xmlFreeEnumeration(tree);
5632 break;
5633 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005634 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5635 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005636
5637 GROW;
5638 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005639 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005640 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005641 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005642 if (defaultValue != NULL)
5643 xmlFree(defaultValue);
5644 if (tree != NULL)
5645 xmlFreeEnumeration(tree);
5646 break;
5647 }
5648 SKIP_BLANKS;
5649 }
5650 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005651 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5652 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005653 if (defaultValue != NULL)
5654 xmlFree(defaultValue);
5655 if (tree != NULL)
5656 xmlFreeEnumeration(tree);
5657 break;
5658 }
5659 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5660 (ctxt->sax->attributeDecl != NULL))
5661 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5662 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005663 else if (tree != NULL)
5664 xmlFreeEnumeration(tree);
5665
5666 if ((ctxt->sax2) && (defaultValue != NULL) &&
5667 (def != XML_ATTRIBUTE_IMPLIED) &&
5668 (def != XML_ATTRIBUTE_REQUIRED)) {
5669 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5670 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005671 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005672 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5673 }
Owen Taylor3473f882001-02-23 17:55:21 +00005674 if (defaultValue != NULL)
5675 xmlFree(defaultValue);
5676 GROW;
5677 }
5678 if (RAW == '>') {
5679 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005680 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5681 "Attribute list declaration doesn't start and stop in the same entity\n",
5682 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005683 }
5684 NEXT;
5685 }
Owen Taylor3473f882001-02-23 17:55:21 +00005686 }
5687}
5688
5689/**
5690 * xmlParseElementMixedContentDecl:
5691 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005692 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005693 *
5694 * parse the declaration for a Mixed Element content
5695 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5696 *
5697 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5698 * '(' S? '#PCDATA' S? ')'
5699 *
5700 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5701 *
5702 * [ VC: No Duplicate Types ]
5703 * The same name must not appear more than once in a single
5704 * mixed-content declaration.
5705 *
5706 * returns: the list of the xmlElementContentPtr describing the element choices
5707 */
5708xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005709xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005710 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005711 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005712
5713 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005714 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005715 SKIP(7);
5716 SKIP_BLANKS;
5717 SHRINK;
5718 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005719 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005720 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5721"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005722 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005723 }
Owen Taylor3473f882001-02-23 17:55:21 +00005724 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005725 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005726 if (ret == NULL)
5727 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005728 if (RAW == '*') {
5729 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5730 NEXT;
5731 }
5732 return(ret);
5733 }
5734 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005735 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005736 if (ret == NULL) return(NULL);
5737 }
5738 while (RAW == '|') {
5739 NEXT;
5740 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005741 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005742 if (ret == NULL) return(NULL);
5743 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005744 if (cur != NULL)
5745 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005746 cur = ret;
5747 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005748 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005749 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005750 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005751 if (n->c1 != NULL)
5752 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005753 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005754 if (n != NULL)
5755 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005756 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005757 }
5758 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005759 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005760 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005761 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005762 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005763 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005764 return(NULL);
5765 }
5766 SKIP_BLANKS;
5767 GROW;
5768 }
5769 if ((RAW == ')') && (NXT(1) == '*')) {
5770 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005771 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005772 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005773 if (cur->c2 != NULL)
5774 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005775 }
5776 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005777 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005778 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5779"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005780 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005781 }
Owen Taylor3473f882001-02-23 17:55:21 +00005782 SKIP(2);
5783 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005784 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005785 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005786 return(NULL);
5787 }
5788
5789 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005790 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005791 }
5792 return(ret);
5793}
5794
5795/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005796 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005797 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005798 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005799 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005800 *
5801 * parse the declaration for a Mixed Element content
5802 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5803 *
5804 *
5805 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5806 *
5807 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5808 *
5809 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5810 *
5811 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5812 *
5813 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5814 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005815 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005816 * opening or closing parentheses in a choice, seq, or Mixed
5817 * construct is contained in the replacement text for a parameter
5818 * entity, both must be contained in the same replacement text. For
5819 * interoperability, if a parameter-entity reference appears in a
5820 * choice, seq, or Mixed construct, its replacement text should not
5821 * be empty, and neither the first nor last non-blank character of
5822 * the replacement text should be a connector (| or ,).
5823 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005824 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005825 * hierarchy.
5826 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005827static xmlElementContentPtr
5828xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5829 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005830 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005831 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005832 xmlChar type = 0;
5833
Daniel Veillard489f9672009-08-10 16:49:30 +02005834 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5835 (depth > 2048)) {
5836 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5837"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5838 depth);
5839 return(NULL);
5840 }
Owen Taylor3473f882001-02-23 17:55:21 +00005841 SKIP_BLANKS;
5842 GROW;
5843 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005844 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005845
Owen Taylor3473f882001-02-23 17:55:21 +00005846 /* Recurse on first child */
5847 NEXT;
5848 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005849 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5850 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005851 SKIP_BLANKS;
5852 GROW;
5853 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005854 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005855 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005856 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005857 return(NULL);
5858 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005859 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005860 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005861 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005862 return(NULL);
5863 }
Owen Taylor3473f882001-02-23 17:55:21 +00005864 GROW;
5865 if (RAW == '?') {
5866 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5867 NEXT;
5868 } else if (RAW == '*') {
5869 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5870 NEXT;
5871 } else if (RAW == '+') {
5872 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5873 NEXT;
5874 } else {
5875 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5876 }
Owen Taylor3473f882001-02-23 17:55:21 +00005877 GROW;
5878 }
5879 SKIP_BLANKS;
5880 SHRINK;
5881 while (RAW != ')') {
5882 /*
5883 * Each loop we parse one separator and one element.
5884 */
5885 if (RAW == ',') {
5886 if (type == 0) type = CUR;
5887
5888 /*
5889 * Detect "Name | Name , Name" error
5890 */
5891 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005892 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005893 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005894 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005895 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005896 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005897 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005898 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005899 return(NULL);
5900 }
5901 NEXT;
5902
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005903 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005904 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005905 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005906 xmlFreeDocElementContent(ctxt->myDoc, last);
5907 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005908 return(NULL);
5909 }
5910 if (last == NULL) {
5911 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005912 if (ret != NULL)
5913 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005914 ret = cur = op;
5915 } else {
5916 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005917 if (op != NULL)
5918 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005919 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005920 if (last != NULL)
5921 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005922 cur =op;
5923 last = NULL;
5924 }
5925 } else if (RAW == '|') {
5926 if (type == 0) type = CUR;
5927
5928 /*
5929 * Detect "Name , Name | Name" error
5930 */
5931 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005932 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005933 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005934 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005935 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005936 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005937 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005938 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005939 return(NULL);
5940 }
5941 NEXT;
5942
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005943 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005944 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005945 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005946 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005947 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005948 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005949 return(NULL);
5950 }
5951 if (last == NULL) {
5952 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005953 if (ret != NULL)
5954 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005955 ret = cur = op;
5956 } else {
5957 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005958 if (op != NULL)
5959 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005960 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005961 if (last != NULL)
5962 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005963 cur =op;
5964 last = NULL;
5965 }
5966 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005967 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005968 if ((last != NULL) && (last != ret))
5969 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005970 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005971 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005972 return(NULL);
5973 }
5974 GROW;
5975 SKIP_BLANKS;
5976 GROW;
5977 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005978 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005979 /* Recurse on second child */
5980 NEXT;
5981 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005982 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5983 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005984 SKIP_BLANKS;
5985 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005986 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005987 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005988 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005989 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005990 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005991 return(NULL);
5992 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005993 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005994 if (last == NULL) {
5995 if (ret != NULL)
5996 xmlFreeDocElementContent(ctxt->myDoc, ret);
5997 return(NULL);
5998 }
Owen Taylor3473f882001-02-23 17:55:21 +00005999 if (RAW == '?') {
6000 last->ocur = XML_ELEMENT_CONTENT_OPT;
6001 NEXT;
6002 } else if (RAW == '*') {
6003 last->ocur = XML_ELEMENT_CONTENT_MULT;
6004 NEXT;
6005 } else if (RAW == '+') {
6006 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6007 NEXT;
6008 } else {
6009 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6010 }
6011 }
6012 SKIP_BLANKS;
6013 GROW;
6014 }
6015 if ((cur != NULL) && (last != NULL)) {
6016 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006017 if (last != NULL)
6018 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006019 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006020 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006021 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6022"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006023 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006024 }
Owen Taylor3473f882001-02-23 17:55:21 +00006025 NEXT;
6026 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006027 if (ret != NULL) {
6028 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6029 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6030 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6031 else
6032 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6033 }
Owen Taylor3473f882001-02-23 17:55:21 +00006034 NEXT;
6035 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006036 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006037 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006038 cur = ret;
6039 /*
6040 * Some normalization:
6041 * (a | b* | c?)* == (a | b | c)*
6042 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006043 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006044 if ((cur->c1 != NULL) &&
6045 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6046 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6047 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6048 if ((cur->c2 != NULL) &&
6049 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6050 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6051 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6052 cur = cur->c2;
6053 }
6054 }
Owen Taylor3473f882001-02-23 17:55:21 +00006055 NEXT;
6056 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006057 if (ret != NULL) {
6058 int found = 0;
6059
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006060 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6061 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6062 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006063 else
6064 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006065 /*
6066 * Some normalization:
6067 * (a | b*)+ == (a | b)*
6068 * (a | b?)+ == (a | b)*
6069 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006070 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006071 if ((cur->c1 != NULL) &&
6072 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6073 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6074 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6075 found = 1;
6076 }
6077 if ((cur->c2 != NULL) &&
6078 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6079 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6080 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6081 found = 1;
6082 }
6083 cur = cur->c2;
6084 }
6085 if (found)
6086 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6087 }
Owen Taylor3473f882001-02-23 17:55:21 +00006088 NEXT;
6089 }
6090 return(ret);
6091}
6092
6093/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006094 * xmlParseElementChildrenContentDecl:
6095 * @ctxt: an XML parser context
6096 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006097 *
6098 * parse the declaration for a Mixed Element content
6099 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6100 *
6101 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6102 *
6103 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6104 *
6105 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6106 *
6107 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6108 *
6109 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6110 * TODO Parameter-entity replacement text must be properly nested
6111 * with parenthesized groups. That is to say, if either of the
6112 * opening or closing parentheses in a choice, seq, or Mixed
6113 * construct is contained in the replacement text for a parameter
6114 * entity, both must be contained in the same replacement text. For
6115 * interoperability, if a parameter-entity reference appears in a
6116 * choice, seq, or Mixed construct, its replacement text should not
6117 * be empty, and neither the first nor last non-blank character of
6118 * the replacement text should be a connector (| or ,).
6119 *
6120 * Returns the tree of xmlElementContentPtr describing the element
6121 * hierarchy.
6122 */
6123xmlElementContentPtr
6124xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6125 /* stub left for API/ABI compat */
6126 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6127}
6128
6129/**
Owen Taylor3473f882001-02-23 17:55:21 +00006130 * xmlParseElementContentDecl:
6131 * @ctxt: an XML parser context
6132 * @name: the name of the element being defined.
6133 * @result: the Element Content pointer will be stored here if any
6134 *
6135 * parse the declaration for an Element content either Mixed or Children,
6136 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6137 *
6138 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6139 *
6140 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6141 */
6142
6143int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006144xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006145 xmlElementContentPtr *result) {
6146
6147 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006148 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006149 int res;
6150
6151 *result = NULL;
6152
6153 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006154 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006155 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006156 return(-1);
6157 }
6158 NEXT;
6159 GROW;
6160 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006161 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006162 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006163 res = XML_ELEMENT_TYPE_MIXED;
6164 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006165 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006166 res = XML_ELEMENT_TYPE_ELEMENT;
6167 }
Owen Taylor3473f882001-02-23 17:55:21 +00006168 SKIP_BLANKS;
6169 *result = tree;
6170 return(res);
6171}
6172
6173/**
6174 * xmlParseElementDecl:
6175 * @ctxt: an XML parser context
6176 *
6177 * parse an Element declaration.
6178 *
6179 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6180 *
6181 * [ VC: Unique Element Type Declaration ]
6182 * No element type may be declared more than once
6183 *
6184 * Returns the type of the element, or -1 in case of error
6185 */
6186int
6187xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006188 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006189 int ret = -1;
6190 xmlElementContentPtr content = NULL;
6191
Daniel Veillard4c778d82005-01-23 17:37:44 +00006192 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006193 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006194 xmlParserInputPtr input = ctxt->input;
6195
6196 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006197 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006198 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6199 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006200 }
6201 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006202 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006203 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006204 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6205 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006206 return(-1);
6207 }
6208 while ((RAW == 0) && (ctxt->inputNr > 1))
6209 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006210 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006211 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6212 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006213 }
6214 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006215 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006216 SKIP(5);
6217 /*
6218 * Element must always be empty.
6219 */
6220 ret = XML_ELEMENT_TYPE_EMPTY;
6221 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6222 (NXT(2) == 'Y')) {
6223 SKIP(3);
6224 /*
6225 * Element is a generic container.
6226 */
6227 ret = XML_ELEMENT_TYPE_ANY;
6228 } else if (RAW == '(') {
6229 ret = xmlParseElementContentDecl(ctxt, name, &content);
6230 } else {
6231 /*
6232 * [ WFC: PEs in Internal Subset ] error handling.
6233 */
6234 if ((RAW == '%') && (ctxt->external == 0) &&
6235 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006236 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006237 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006238 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006239 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006240 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6241 }
Owen Taylor3473f882001-02-23 17:55:21 +00006242 return(-1);
6243 }
6244
6245 SKIP_BLANKS;
6246 /*
6247 * Pop-up of finished entities.
6248 */
6249 while ((RAW == 0) && (ctxt->inputNr > 1))
6250 xmlPopInput(ctxt);
6251 SKIP_BLANKS;
6252
6253 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006254 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006255 if (content != NULL) {
6256 xmlFreeDocElementContent(ctxt->myDoc, content);
6257 }
Owen Taylor3473f882001-02-23 17:55:21 +00006258 } else {
6259 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006260 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6261 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006262 }
6263
6264 NEXT;
6265 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006266 (ctxt->sax->elementDecl != NULL)) {
6267 if (content != NULL)
6268 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006269 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6270 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006271 if ((content != NULL) && (content->parent == NULL)) {
6272 /*
6273 * this is a trick: if xmlAddElementDecl is called,
6274 * instead of copying the full tree it is plugged directly
6275 * if called from the parser. Avoid duplicating the
6276 * interfaces or change the API/ABI
6277 */
6278 xmlFreeDocElementContent(ctxt->myDoc, content);
6279 }
6280 } else if (content != NULL) {
6281 xmlFreeDocElementContent(ctxt->myDoc, content);
6282 }
Owen Taylor3473f882001-02-23 17:55:21 +00006283 }
Owen Taylor3473f882001-02-23 17:55:21 +00006284 }
6285 return(ret);
6286}
6287
6288/**
Owen Taylor3473f882001-02-23 17:55:21 +00006289 * xmlParseConditionalSections
6290 * @ctxt: an XML parser context
6291 *
6292 * [61] conditionalSect ::= includeSect | ignoreSect
6293 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6294 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6295 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6296 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6297 */
6298
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006299static void
Owen Taylor3473f882001-02-23 17:55:21 +00006300xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006301 int id = ctxt->input->id;
6302
Owen Taylor3473f882001-02-23 17:55:21 +00006303 SKIP(3);
6304 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006305 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006306 SKIP(7);
6307 SKIP_BLANKS;
6308 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006309 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006310 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006311 if (ctxt->input->id != id) {
6312 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6313 "All markup of the conditional section is not in the same entity\n",
6314 NULL, NULL);
6315 }
Owen Taylor3473f882001-02-23 17:55:21 +00006316 NEXT;
6317 }
6318 if (xmlParserDebugEntities) {
6319 if ((ctxt->input != NULL) && (ctxt->input->filename))
6320 xmlGenericError(xmlGenericErrorContext,
6321 "%s(%d): ", ctxt->input->filename,
6322 ctxt->input->line);
6323 xmlGenericError(xmlGenericErrorContext,
6324 "Entering INCLUDE Conditional Section\n");
6325 }
6326
6327 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6328 (NXT(2) != '>'))) {
6329 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006330 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006331
6332 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6333 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006334 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006335 NEXT;
6336 } else if (RAW == '%') {
6337 xmlParsePEReference(ctxt);
6338 } else
6339 xmlParseMarkupDecl(ctxt);
6340
6341 /*
6342 * Pop-up of finished entities.
6343 */
6344 while ((RAW == 0) && (ctxt->inputNr > 1))
6345 xmlPopInput(ctxt);
6346
Daniel Veillardfdc91562002-07-01 21:52:03 +00006347 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006348 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006349 break;
6350 }
6351 }
6352 if (xmlParserDebugEntities) {
6353 if ((ctxt->input != NULL) && (ctxt->input->filename))
6354 xmlGenericError(xmlGenericErrorContext,
6355 "%s(%d): ", ctxt->input->filename,
6356 ctxt->input->line);
6357 xmlGenericError(xmlGenericErrorContext,
6358 "Leaving INCLUDE Conditional Section\n");
6359 }
6360
Daniel Veillarda07050d2003-10-19 14:46:32 +00006361 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006362 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006363 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006364 int depth = 0;
6365
6366 SKIP(6);
6367 SKIP_BLANKS;
6368 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006369 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006370 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006371 if (ctxt->input->id != id) {
6372 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6373 "All markup of the conditional section is not in the same entity\n",
6374 NULL, NULL);
6375 }
Owen Taylor3473f882001-02-23 17:55:21 +00006376 NEXT;
6377 }
6378 if (xmlParserDebugEntities) {
6379 if ((ctxt->input != NULL) && (ctxt->input->filename))
6380 xmlGenericError(xmlGenericErrorContext,
6381 "%s(%d): ", ctxt->input->filename,
6382 ctxt->input->line);
6383 xmlGenericError(xmlGenericErrorContext,
6384 "Entering IGNORE Conditional Section\n");
6385 }
6386
6387 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006388 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006389 * But disable SAX event generating DTD building in the meantime
6390 */
6391 state = ctxt->disableSAX;
6392 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006393 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006394 ctxt->instate = XML_PARSER_IGNORE;
6395
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006396 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006397 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6398 depth++;
6399 SKIP(3);
6400 continue;
6401 }
6402 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6403 if (--depth >= 0) SKIP(3);
6404 continue;
6405 }
6406 NEXT;
6407 continue;
6408 }
6409
6410 ctxt->disableSAX = state;
6411 ctxt->instate = instate;
6412
6413 if (xmlParserDebugEntities) {
6414 if ((ctxt->input != NULL) && (ctxt->input->filename))
6415 xmlGenericError(xmlGenericErrorContext,
6416 "%s(%d): ", ctxt->input->filename,
6417 ctxt->input->line);
6418 xmlGenericError(xmlGenericErrorContext,
6419 "Leaving IGNORE Conditional Section\n");
6420 }
6421
6422 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006423 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006424 }
6425
6426 if (RAW == 0)
6427 SHRINK;
6428
6429 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006430 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006431 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006432 if (ctxt->input->id != id) {
6433 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6434 "All markup of the conditional section is not in the same entity\n",
6435 NULL, NULL);
6436 }
Owen Taylor3473f882001-02-23 17:55:21 +00006437 SKIP(3);
6438 }
6439}
6440
6441/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006442 * xmlParseMarkupDecl:
6443 * @ctxt: an XML parser context
6444 *
6445 * parse Markup declarations
6446 *
6447 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6448 * NotationDecl | PI | Comment
6449 *
6450 * [ VC: Proper Declaration/PE Nesting ]
6451 * Parameter-entity replacement text must be properly nested with
6452 * markup declarations. That is to say, if either the first character
6453 * or the last character of a markup declaration (markupdecl above) is
6454 * contained in the replacement text for a parameter-entity reference,
6455 * both must be contained in the same replacement text.
6456 *
6457 * [ WFC: PEs in Internal Subset ]
6458 * In the internal DTD subset, parameter-entity references can occur
6459 * only where markup declarations can occur, not within markup declarations.
6460 * (This does not apply to references that occur in external parameter
6461 * entities or to the external subset.)
6462 */
6463void
6464xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6465 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006466 if (CUR == '<') {
6467 if (NXT(1) == '!') {
6468 switch (NXT(2)) {
6469 case 'E':
6470 if (NXT(3) == 'L')
6471 xmlParseElementDecl(ctxt);
6472 else if (NXT(3) == 'N')
6473 xmlParseEntityDecl(ctxt);
6474 break;
6475 case 'A':
6476 xmlParseAttributeListDecl(ctxt);
6477 break;
6478 case 'N':
6479 xmlParseNotationDecl(ctxt);
6480 break;
6481 case '-':
6482 xmlParseComment(ctxt);
6483 break;
6484 default:
6485 /* there is an error but it will be detected later */
6486 break;
6487 }
6488 } else if (NXT(1) == '?') {
6489 xmlParsePI(ctxt);
6490 }
6491 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006492 /*
6493 * This is only for internal subset. On external entities,
6494 * the replacement is done before parsing stage
6495 */
6496 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6497 xmlParsePEReference(ctxt);
6498
6499 /*
6500 * Conditional sections are allowed from entities included
6501 * by PE References in the internal subset.
6502 */
6503 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6504 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6505 xmlParseConditionalSections(ctxt);
6506 }
6507 }
6508
6509 ctxt->instate = XML_PARSER_DTD;
6510}
6511
6512/**
6513 * xmlParseTextDecl:
6514 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006515 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006516 * parse an XML declaration header for external entities
6517 *
6518 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006519 */
6520
6521void
6522xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6523 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006524 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006525
6526 /*
6527 * We know that '<?xml' is here.
6528 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006529 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006530 SKIP(5);
6531 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006532 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006533 return;
6534 }
6535
William M. Brack76e95df2003-10-18 16:20:14 +00006536 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006537 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6538 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006539 }
6540 SKIP_BLANKS;
6541
6542 /*
6543 * We may have the VersionInfo here.
6544 */
6545 version = xmlParseVersionInfo(ctxt);
6546 if (version == NULL)
6547 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006548 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006549 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006550 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6551 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006552 }
6553 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006554 ctxt->input->version = version;
6555
6556 /*
6557 * We must have the encoding declaration
6558 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006559 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006560 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6561 /*
6562 * The XML REC instructs us to stop parsing right here
6563 */
6564 return;
6565 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006566 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6567 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6568 "Missing encoding in text declaration\n");
6569 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006570
6571 SKIP_BLANKS;
6572 if ((RAW == '?') && (NXT(1) == '>')) {
6573 SKIP(2);
6574 } else if (RAW == '>') {
6575 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006576 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006577 NEXT;
6578 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006579 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006580 MOVETO_ENDTAG(CUR_PTR);
6581 NEXT;
6582 }
6583}
6584
6585/**
Owen Taylor3473f882001-02-23 17:55:21 +00006586 * xmlParseExternalSubset:
6587 * @ctxt: an XML parser context
6588 * @ExternalID: the external identifier
6589 * @SystemID: the system identifier (or URL)
6590 *
6591 * parse Markup declarations from an external subset
6592 *
6593 * [30] extSubset ::= textDecl? extSubsetDecl
6594 *
6595 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6596 */
6597void
6598xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6599 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006600 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006601 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006602
6603 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6604 (ctxt->input->end - ctxt->input->cur >= 4)) {
6605 xmlChar start[4];
6606 xmlCharEncoding enc;
6607
6608 start[0] = RAW;
6609 start[1] = NXT(1);
6610 start[2] = NXT(2);
6611 start[3] = NXT(3);
6612 enc = xmlDetectCharEncoding(start, 4);
6613 if (enc != XML_CHAR_ENCODING_NONE)
6614 xmlSwitchEncoding(ctxt, enc);
6615 }
6616
Daniel Veillarda07050d2003-10-19 14:46:32 +00006617 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006618 xmlParseTextDecl(ctxt);
6619 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6620 /*
6621 * The XML REC instructs us to stop parsing right here
6622 */
6623 ctxt->instate = XML_PARSER_EOF;
6624 return;
6625 }
6626 }
6627 if (ctxt->myDoc == NULL) {
6628 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006629 if (ctxt->myDoc == NULL) {
6630 xmlErrMemory(ctxt, "New Doc failed");
6631 return;
6632 }
6633 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006634 }
6635 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6636 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6637
6638 ctxt->instate = XML_PARSER_DTD;
6639 ctxt->external = 1;
6640 while (((RAW == '<') && (NXT(1) == '?')) ||
6641 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006642 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006643 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006644 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006645
6646 GROW;
6647 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6648 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006649 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006650 NEXT;
6651 } else if (RAW == '%') {
6652 xmlParsePEReference(ctxt);
6653 } else
6654 xmlParseMarkupDecl(ctxt);
6655
6656 /*
6657 * Pop-up of finished entities.
6658 */
6659 while ((RAW == 0) && (ctxt->inputNr > 1))
6660 xmlPopInput(ctxt);
6661
Daniel Veillardfdc91562002-07-01 21:52:03 +00006662 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006663 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006664 break;
6665 }
6666 }
6667
6668 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006669 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006670 }
6671
6672}
6673
6674/**
6675 * xmlParseReference:
6676 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006677 *
Owen Taylor3473f882001-02-23 17:55:21 +00006678 * parse and handle entity references in content, depending on the SAX
6679 * interface, this may end-up in a call to character() if this is a
6680 * CharRef, a predefined entity, if there is no reference() callback.
6681 * or if the parser was asked to switch to that mode.
6682 *
6683 * [67] Reference ::= EntityRef | CharRef
6684 */
6685void
6686xmlParseReference(xmlParserCtxtPtr ctxt) {
6687 xmlEntityPtr ent;
6688 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006689 int was_checked;
6690 xmlNodePtr list = NULL;
6691 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006692
Daniel Veillard0161e632008-08-28 15:36:32 +00006693
6694 if (RAW != '&')
6695 return;
6696
6697 /*
6698 * Simple case of a CharRef
6699 */
Owen Taylor3473f882001-02-23 17:55:21 +00006700 if (NXT(1) == '#') {
6701 int i = 0;
6702 xmlChar out[10];
6703 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006704 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006705
Daniel Veillarddc171602008-03-26 17:41:38 +00006706 if (value == 0)
6707 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006708 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6709 /*
6710 * So we are using non-UTF-8 buffers
6711 * Check that the char fit on 8bits, if not
6712 * generate a CharRef.
6713 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006714 if (value <= 0xFF) {
6715 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006716 out[1] = 0;
6717 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6718 (!ctxt->disableSAX))
6719 ctxt->sax->characters(ctxt->userData, out, 1);
6720 } else {
6721 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006722 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006723 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006724 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006725 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6726 (!ctxt->disableSAX))
6727 ctxt->sax->reference(ctxt->userData, out);
6728 }
6729 } else {
6730 /*
6731 * Just encode the value in UTF-8
6732 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006733 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006734 out[i] = 0;
6735 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6736 (!ctxt->disableSAX))
6737 ctxt->sax->characters(ctxt->userData, out, i);
6738 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006739 return;
6740 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006741
Daniel Veillard0161e632008-08-28 15:36:32 +00006742 /*
6743 * We are seeing an entity reference
6744 */
6745 ent = xmlParseEntityRef(ctxt);
6746 if (ent == NULL) return;
6747 if (!ctxt->wellFormed)
6748 return;
6749 was_checked = ent->checked;
6750
6751 /* special case of predefined entities */
6752 if ((ent->name == NULL) ||
6753 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6754 val = ent->content;
6755 if (val == NULL) return;
6756 /*
6757 * inline the entity.
6758 */
6759 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6760 (!ctxt->disableSAX))
6761 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6762 return;
6763 }
6764
6765 /*
6766 * The first reference to the entity trigger a parsing phase
6767 * where the ent->children is filled with the result from
6768 * the parsing.
6769 */
6770 if (ent->checked == 0) {
6771 unsigned long oldnbent = ctxt->nbentities;
6772
6773 /*
6774 * This is a bit hackish but this seems the best
6775 * way to make sure both SAX and DOM entity support
6776 * behaves okay.
6777 */
6778 void *user_data;
6779 if (ctxt->userData == ctxt)
6780 user_data = NULL;
6781 else
6782 user_data = ctxt->userData;
6783
6784 /*
6785 * Check that this entity is well formed
6786 * 4.3.2: An internal general parsed entity is well-formed
6787 * if its replacement text matches the production labeled
6788 * content.
6789 */
6790 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6791 ctxt->depth++;
6792 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6793 user_data, &list);
6794 ctxt->depth--;
6795
6796 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6797 ctxt->depth++;
6798 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6799 user_data, ctxt->depth, ent->URI,
6800 ent->ExternalID, &list);
6801 ctxt->depth--;
6802 } else {
6803 ret = XML_ERR_ENTITY_PE_INTERNAL;
6804 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6805 "invalid entity type found\n", NULL);
6806 }
6807
6808 /*
6809 * Store the number of entities needing parsing for this entity
6810 * content and do checkings
6811 */
6812 ent->checked = ctxt->nbentities - oldnbent;
6813 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006814 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006815 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006816 return;
6817 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006818 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6819 xmlFreeNodeList(list);
6820 return;
6821 }
Owen Taylor3473f882001-02-23 17:55:21 +00006822
Daniel Veillard0161e632008-08-28 15:36:32 +00006823 if ((ret == XML_ERR_OK) && (list != NULL)) {
6824 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6825 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6826 (ent->children == NULL)) {
6827 ent->children = list;
6828 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006829 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006830 * Prune it directly in the generated document
6831 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006832 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006833 if (((list->type == XML_TEXT_NODE) &&
6834 (list->next == NULL)) ||
6835 (ctxt->parseMode == XML_PARSE_READER)) {
6836 list->parent = (xmlNodePtr) ent;
6837 list = NULL;
6838 ent->owner = 1;
6839 } else {
6840 ent->owner = 0;
6841 while (list != NULL) {
6842 list->parent = (xmlNodePtr) ctxt->node;
6843 list->doc = ctxt->myDoc;
6844 if (list->next == NULL)
6845 ent->last = list;
6846 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006847 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006848 list = ent->children;
6849#ifdef LIBXML_LEGACY_ENABLED
6850 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6851 xmlAddEntityReference(ent, list, NULL);
6852#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006853 }
6854 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006855 ent->owner = 1;
6856 while (list != NULL) {
6857 list->parent = (xmlNodePtr) ent;
6858 if (list->next == NULL)
6859 ent->last = list;
6860 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006861 }
6862 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006863 } else {
6864 xmlFreeNodeList(list);
6865 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006866 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006867 } else if ((ret != XML_ERR_OK) &&
6868 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6869 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6870 "Entity '%s' failed to parse\n", ent->name);
6871 } else if (list != NULL) {
6872 xmlFreeNodeList(list);
6873 list = NULL;
6874 }
6875 if (ent->checked == 0)
6876 ent->checked = 1;
6877 } else if (ent->checked != 1) {
6878 ctxt->nbentities += ent->checked;
6879 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006880
Daniel Veillard0161e632008-08-28 15:36:32 +00006881 /*
6882 * Now that the entity content has been gathered
6883 * provide it to the application, this can take different forms based
6884 * on the parsing modes.
6885 */
6886 if (ent->children == NULL) {
6887 /*
6888 * Probably running in SAX mode and the callbacks don't
6889 * build the entity content. So unless we already went
6890 * though parsing for first checking go though the entity
6891 * content to generate callbacks associated to the entity
6892 */
6893 if (was_checked != 0) {
6894 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006895 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006896 * This is a bit hackish but this seems the best
6897 * way to make sure both SAX and DOM entity support
6898 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006899 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006900 if (ctxt->userData == ctxt)
6901 user_data = NULL;
6902 else
6903 user_data = ctxt->userData;
6904
6905 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6906 ctxt->depth++;
6907 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6908 ent->content, user_data, NULL);
6909 ctxt->depth--;
6910 } else if (ent->etype ==
6911 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6912 ctxt->depth++;
6913 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6914 ctxt->sax, user_data, ctxt->depth,
6915 ent->URI, ent->ExternalID, NULL);
6916 ctxt->depth--;
6917 } else {
6918 ret = XML_ERR_ENTITY_PE_INTERNAL;
6919 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6920 "invalid entity type found\n", NULL);
6921 }
6922 if (ret == XML_ERR_ENTITY_LOOP) {
6923 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6924 return;
6925 }
6926 }
6927 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6928 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6929 /*
6930 * Entity reference callback comes second, it's somewhat
6931 * superfluous but a compatibility to historical behaviour
6932 */
6933 ctxt->sax->reference(ctxt->userData, ent->name);
6934 }
6935 return;
6936 }
6937
6938 /*
6939 * If we didn't get any children for the entity being built
6940 */
6941 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6942 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6943 /*
6944 * Create a node.
6945 */
6946 ctxt->sax->reference(ctxt->userData, ent->name);
6947 return;
6948 }
6949
6950 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6951 /*
6952 * There is a problem on the handling of _private for entities
6953 * (bug 155816): Should we copy the content of the field from
6954 * the entity (possibly overwriting some value set by the user
6955 * when a copy is created), should we leave it alone, or should
6956 * we try to take care of different situations? The problem
6957 * is exacerbated by the usage of this field by the xmlReader.
6958 * To fix this bug, we look at _private on the created node
6959 * and, if it's NULL, we copy in whatever was in the entity.
6960 * If it's not NULL we leave it alone. This is somewhat of a
6961 * hack - maybe we should have further tests to determine
6962 * what to do.
6963 */
6964 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6965 /*
6966 * Seems we are generating the DOM content, do
6967 * a simple tree copy for all references except the first
6968 * In the first occurrence list contains the replacement.
6969 * progressive == 2 means we are operating on the Reader
6970 * and since nodes are discarded we must copy all the time.
6971 */
6972 if (((list == NULL) && (ent->owner == 0)) ||
6973 (ctxt->parseMode == XML_PARSE_READER)) {
6974 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6975
6976 /*
6977 * when operating on a reader, the entities definitions
6978 * are always owning the entities subtree.
6979 if (ctxt->parseMode == XML_PARSE_READER)
6980 ent->owner = 1;
6981 */
6982
6983 cur = ent->children;
6984 while (cur != NULL) {
6985 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6986 if (nw != NULL) {
6987 if (nw->_private == NULL)
6988 nw->_private = cur->_private;
6989 if (firstChild == NULL){
6990 firstChild = nw;
6991 }
6992 nw = xmlAddChild(ctxt->node, nw);
6993 }
6994 if (cur == ent->last) {
6995 /*
6996 * needed to detect some strange empty
6997 * node cases in the reader tests
6998 */
6999 if ((ctxt->parseMode == XML_PARSE_READER) &&
7000 (nw != NULL) &&
7001 (nw->type == XML_ELEMENT_NODE) &&
7002 (nw->children == NULL))
7003 nw->extra = 1;
7004
7005 break;
7006 }
7007 cur = cur->next;
7008 }
7009#ifdef LIBXML_LEGACY_ENABLED
7010 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7011 xmlAddEntityReference(ent, firstChild, nw);
7012#endif /* LIBXML_LEGACY_ENABLED */
7013 } else if (list == NULL) {
7014 xmlNodePtr nw = NULL, cur, next, last,
7015 firstChild = NULL;
7016 /*
7017 * Copy the entity child list and make it the new
7018 * entity child list. The goal is to make sure any
7019 * ID or REF referenced will be the one from the
7020 * document content and not the entity copy.
7021 */
7022 cur = ent->children;
7023 ent->children = NULL;
7024 last = ent->last;
7025 ent->last = NULL;
7026 while (cur != NULL) {
7027 next = cur->next;
7028 cur->next = NULL;
7029 cur->parent = NULL;
7030 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7031 if (nw != NULL) {
7032 if (nw->_private == NULL)
7033 nw->_private = cur->_private;
7034 if (firstChild == NULL){
7035 firstChild = cur;
7036 }
7037 xmlAddChild((xmlNodePtr) ent, nw);
7038 xmlAddChild(ctxt->node, cur);
7039 }
7040 if (cur == last)
7041 break;
7042 cur = next;
7043 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007044 if (ent->owner == 0)
7045 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007046#ifdef LIBXML_LEGACY_ENABLED
7047 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7048 xmlAddEntityReference(ent, firstChild, nw);
7049#endif /* LIBXML_LEGACY_ENABLED */
7050 } else {
7051 const xmlChar *nbktext;
7052
7053 /*
7054 * the name change is to avoid coalescing of the
7055 * node with a possible previous text one which
7056 * would make ent->children a dangling pointer
7057 */
7058 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7059 -1);
7060 if (ent->children->type == XML_TEXT_NODE)
7061 ent->children->name = nbktext;
7062 if ((ent->last != ent->children) &&
7063 (ent->last->type == XML_TEXT_NODE))
7064 ent->last->name = nbktext;
7065 xmlAddChildList(ctxt->node, ent->children);
7066 }
7067
7068 /*
7069 * This is to avoid a nasty side effect, see
7070 * characters() in SAX.c
7071 */
7072 ctxt->nodemem = 0;
7073 ctxt->nodelen = 0;
7074 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007075 }
7076 }
7077}
7078
7079/**
7080 * xmlParseEntityRef:
7081 * @ctxt: an XML parser context
7082 *
7083 * parse ENTITY references declarations
7084 *
7085 * [68] EntityRef ::= '&' Name ';'
7086 *
7087 * [ WFC: Entity Declared ]
7088 * In a document without any DTD, a document with only an internal DTD
7089 * subset which contains no parameter entity references, or a document
7090 * with "standalone='yes'", the Name given in the entity reference
7091 * must match that in an entity declaration, except that well-formed
7092 * documents need not declare any of the following entities: amp, lt,
7093 * gt, apos, quot. The declaration of a parameter entity must precede
7094 * any reference to it. Similarly, the declaration of a general entity
7095 * must precede any reference to it which appears in a default value in an
7096 * attribute-list declaration. Note that if entities are declared in the
7097 * external subset or in external parameter entities, a non-validating
7098 * processor is not obligated to read and process their declarations;
7099 * for such documents, the rule that an entity must be declared is a
7100 * well-formedness constraint only if standalone='yes'.
7101 *
7102 * [ WFC: Parsed Entity ]
7103 * An entity reference must not contain the name of an unparsed entity
7104 *
7105 * Returns the xmlEntityPtr if found, or NULL otherwise.
7106 */
7107xmlEntityPtr
7108xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007109 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007110 xmlEntityPtr ent = NULL;
7111
7112 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007113
Daniel Veillard0161e632008-08-28 15:36:32 +00007114 if (RAW != '&')
7115 return(NULL);
7116 NEXT;
7117 name = xmlParseName(ctxt);
7118 if (name == NULL) {
7119 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7120 "xmlParseEntityRef: no name\n");
7121 return(NULL);
7122 }
7123 if (RAW != ';') {
7124 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7125 return(NULL);
7126 }
7127 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007128
Daniel Veillard0161e632008-08-28 15:36:32 +00007129 /*
7130 * Predefined entites override any extra definition
7131 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007132 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7133 ent = xmlGetPredefinedEntity(name);
7134 if (ent != NULL)
7135 return(ent);
7136 }
Owen Taylor3473f882001-02-23 17:55:21 +00007137
Daniel Veillard0161e632008-08-28 15:36:32 +00007138 /*
7139 * Increate the number of entity references parsed
7140 */
7141 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007142
Daniel Veillard0161e632008-08-28 15:36:32 +00007143 /*
7144 * Ask first SAX for entity resolution, otherwise try the
7145 * entities which may have stored in the parser context.
7146 */
7147 if (ctxt->sax != NULL) {
7148 if (ctxt->sax->getEntity != NULL)
7149 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007150 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7151 (ctxt->options & XML_PARSE_OLDSAX))
7152 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007153 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7154 (ctxt->userData==ctxt)) {
7155 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007156 }
7157 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007158 /*
7159 * [ WFC: Entity Declared ]
7160 * In a document without any DTD, a document with only an
7161 * internal DTD subset which contains no parameter entity
7162 * references, or a document with "standalone='yes'", the
7163 * Name given in the entity reference must match that in an
7164 * entity declaration, except that well-formed documents
7165 * need not declare any of the following entities: amp, lt,
7166 * gt, apos, quot.
7167 * The declaration of a parameter entity must precede any
7168 * reference to it.
7169 * Similarly, the declaration of a general entity must
7170 * precede any reference to it which appears in a default
7171 * value in an attribute-list declaration. Note that if
7172 * entities are declared in the external subset or in
7173 * external parameter entities, a non-validating processor
7174 * is not obligated to read and process their declarations;
7175 * for such documents, the rule that an entity must be
7176 * declared is a well-formedness constraint only if
7177 * standalone='yes'.
7178 */
7179 if (ent == NULL) {
7180 if ((ctxt->standalone == 1) ||
7181 ((ctxt->hasExternalSubset == 0) &&
7182 (ctxt->hasPErefs == 0))) {
7183 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7184 "Entity '%s' not defined\n", name);
7185 } else {
7186 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7187 "Entity '%s' not defined\n", name);
7188 if ((ctxt->inSubset == 0) &&
7189 (ctxt->sax != NULL) &&
7190 (ctxt->sax->reference != NULL)) {
7191 ctxt->sax->reference(ctxt->userData, name);
7192 }
7193 }
7194 ctxt->valid = 0;
7195 }
7196
7197 /*
7198 * [ WFC: Parsed Entity ]
7199 * An entity reference must not contain the name of an
7200 * unparsed entity
7201 */
7202 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7203 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7204 "Entity reference to unparsed entity %s\n", name);
7205 }
7206
7207 /*
7208 * [ WFC: No External Entity References ]
7209 * Attribute values cannot contain direct or indirect
7210 * entity references to external entities.
7211 */
7212 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7213 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7214 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7215 "Attribute references external entity '%s'\n", name);
7216 }
7217 /*
7218 * [ WFC: No < in Attribute Values ]
7219 * The replacement text of any entity referred to directly or
7220 * indirectly in an attribute value (other than "&lt;") must
7221 * not contain a <.
7222 */
7223 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7224 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007225 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007226 (xmlStrchr(ent->content, '<'))) {
7227 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7228 "'<' in entity '%s' is not allowed in attributes values\n", name);
7229 }
7230
7231 /*
7232 * Internal check, no parameter entities here ...
7233 */
7234 else {
7235 switch (ent->etype) {
7236 case XML_INTERNAL_PARAMETER_ENTITY:
7237 case XML_EXTERNAL_PARAMETER_ENTITY:
7238 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7239 "Attempt to reference the parameter entity '%s'\n",
7240 name);
7241 break;
7242 default:
7243 break;
7244 }
7245 }
7246
7247 /*
7248 * [ WFC: No Recursion ]
7249 * A parsed entity must not contain a recursive reference
7250 * to itself, either directly or indirectly.
7251 * Done somewhere else
7252 */
Owen Taylor3473f882001-02-23 17:55:21 +00007253 return(ent);
7254}
7255
7256/**
7257 * xmlParseStringEntityRef:
7258 * @ctxt: an XML parser context
7259 * @str: a pointer to an index in the string
7260 *
7261 * parse ENTITY references declarations, but this version parses it from
7262 * a string value.
7263 *
7264 * [68] EntityRef ::= '&' Name ';'
7265 *
7266 * [ WFC: Entity Declared ]
7267 * In a document without any DTD, a document with only an internal DTD
7268 * subset which contains no parameter entity references, or a document
7269 * with "standalone='yes'", the Name given in the entity reference
7270 * must match that in an entity declaration, except that well-formed
7271 * documents need not declare any of the following entities: amp, lt,
7272 * gt, apos, quot. The declaration of a parameter entity must precede
7273 * any reference to it. Similarly, the declaration of a general entity
7274 * must precede any reference to it which appears in a default value in an
7275 * attribute-list declaration. Note that if entities are declared in the
7276 * external subset or in external parameter entities, a non-validating
7277 * processor is not obligated to read and process their declarations;
7278 * for such documents, the rule that an entity must be declared is a
7279 * well-formedness constraint only if standalone='yes'.
7280 *
7281 * [ WFC: Parsed Entity ]
7282 * An entity reference must not contain the name of an unparsed entity
7283 *
7284 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7285 * is updated to the current location in the string.
7286 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007287static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007288xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7289 xmlChar *name;
7290 const xmlChar *ptr;
7291 xmlChar cur;
7292 xmlEntityPtr ent = NULL;
7293
7294 if ((str == NULL) || (*str == NULL))
7295 return(NULL);
7296 ptr = *str;
7297 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007298 if (cur != '&')
7299 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007300
Daniel Veillard0161e632008-08-28 15:36:32 +00007301 ptr++;
7302 cur = *ptr;
7303 name = xmlParseStringName(ctxt, &ptr);
7304 if (name == NULL) {
7305 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7306 "xmlParseStringEntityRef: no name\n");
7307 *str = ptr;
7308 return(NULL);
7309 }
7310 if (*ptr != ';') {
7311 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007312 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007313 *str = ptr;
7314 return(NULL);
7315 }
7316 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007317
Owen Taylor3473f882001-02-23 17:55:21 +00007318
Daniel Veillard0161e632008-08-28 15:36:32 +00007319 /*
7320 * Predefined entites override any extra definition
7321 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007322 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7323 ent = xmlGetPredefinedEntity(name);
7324 if (ent != NULL) {
7325 xmlFree(name);
7326 *str = ptr;
7327 return(ent);
7328 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007329 }
Owen Taylor3473f882001-02-23 17:55:21 +00007330
Daniel Veillard0161e632008-08-28 15:36:32 +00007331 /*
7332 * Increate the number of entity references parsed
7333 */
7334 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007335
Daniel Veillard0161e632008-08-28 15:36:32 +00007336 /*
7337 * Ask first SAX for entity resolution, otherwise try the
7338 * entities which may have stored in the parser context.
7339 */
7340 if (ctxt->sax != NULL) {
7341 if (ctxt->sax->getEntity != NULL)
7342 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007343 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7344 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007345 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7346 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007347 }
7348 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007349
7350 /*
7351 * [ WFC: Entity Declared ]
7352 * In a document without any DTD, a document with only an
7353 * internal DTD subset which contains no parameter entity
7354 * references, or a document with "standalone='yes'", the
7355 * Name given in the entity reference must match that in an
7356 * entity declaration, except that well-formed documents
7357 * need not declare any of the following entities: amp, lt,
7358 * gt, apos, quot.
7359 * The declaration of a parameter entity must precede any
7360 * reference to it.
7361 * Similarly, the declaration of a general entity must
7362 * precede any reference to it which appears in a default
7363 * value in an attribute-list declaration. Note that if
7364 * entities are declared in the external subset or in
7365 * external parameter entities, a non-validating processor
7366 * is not obligated to read and process their declarations;
7367 * for such documents, the rule that an entity must be
7368 * declared is a well-formedness constraint only if
7369 * standalone='yes'.
7370 */
7371 if (ent == NULL) {
7372 if ((ctxt->standalone == 1) ||
7373 ((ctxt->hasExternalSubset == 0) &&
7374 (ctxt->hasPErefs == 0))) {
7375 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7376 "Entity '%s' not defined\n", name);
7377 } else {
7378 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7379 "Entity '%s' not defined\n",
7380 name);
7381 }
7382 /* TODO ? check regressions ctxt->valid = 0; */
7383 }
7384
7385 /*
7386 * [ WFC: Parsed Entity ]
7387 * An entity reference must not contain the name of an
7388 * unparsed entity
7389 */
7390 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7391 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7392 "Entity reference to unparsed entity %s\n", name);
7393 }
7394
7395 /*
7396 * [ WFC: No External Entity References ]
7397 * Attribute values cannot contain direct or indirect
7398 * entity references to external entities.
7399 */
7400 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7401 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7402 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7403 "Attribute references external entity '%s'\n", name);
7404 }
7405 /*
7406 * [ WFC: No < in Attribute Values ]
7407 * The replacement text of any entity referred to directly or
7408 * indirectly in an attribute value (other than "&lt;") must
7409 * not contain a <.
7410 */
7411 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7412 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007413 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007414 (xmlStrchr(ent->content, '<'))) {
7415 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7416 "'<' in entity '%s' is not allowed in attributes values\n",
7417 name);
7418 }
7419
7420 /*
7421 * Internal check, no parameter entities here ...
7422 */
7423 else {
7424 switch (ent->etype) {
7425 case XML_INTERNAL_PARAMETER_ENTITY:
7426 case XML_EXTERNAL_PARAMETER_ENTITY:
7427 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7428 "Attempt to reference the parameter entity '%s'\n",
7429 name);
7430 break;
7431 default:
7432 break;
7433 }
7434 }
7435
7436 /*
7437 * [ WFC: No Recursion ]
7438 * A parsed entity must not contain a recursive reference
7439 * to itself, either directly or indirectly.
7440 * Done somewhere else
7441 */
7442
7443 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007444 *str = ptr;
7445 return(ent);
7446}
7447
7448/**
7449 * xmlParsePEReference:
7450 * @ctxt: an XML parser context
7451 *
7452 * parse PEReference declarations
7453 * The entity content is handled directly by pushing it's content as
7454 * a new input stream.
7455 *
7456 * [69] PEReference ::= '%' Name ';'
7457 *
7458 * [ WFC: No Recursion ]
7459 * A parsed entity must not contain a recursive
7460 * reference to itself, either directly or indirectly.
7461 *
7462 * [ WFC: Entity Declared ]
7463 * In a document without any DTD, a document with only an internal DTD
7464 * subset which contains no parameter entity references, or a document
7465 * with "standalone='yes'", ... ... The declaration of a parameter
7466 * entity must precede any reference to it...
7467 *
7468 * [ VC: Entity Declared ]
7469 * In a document with an external subset or external parameter entities
7470 * with "standalone='no'", ... ... The declaration of a parameter entity
7471 * must precede any reference to it...
7472 *
7473 * [ WFC: In DTD ]
7474 * Parameter-entity references may only appear in the DTD.
7475 * NOTE: misleading but this is handled.
7476 */
7477void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007478xmlParsePEReference(xmlParserCtxtPtr ctxt)
7479{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007480 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007481 xmlEntityPtr entity = NULL;
7482 xmlParserInputPtr input;
7483
Daniel Veillard0161e632008-08-28 15:36:32 +00007484 if (RAW != '%')
7485 return;
7486 NEXT;
7487 name = xmlParseName(ctxt);
7488 if (name == NULL) {
7489 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7490 "xmlParsePEReference: no name\n");
7491 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007492 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007493 if (RAW != ';') {
7494 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7495 return;
7496 }
7497
7498 NEXT;
7499
7500 /*
7501 * Increate the number of entity references parsed
7502 */
7503 ctxt->nbentities++;
7504
7505 /*
7506 * Request the entity from SAX
7507 */
7508 if ((ctxt->sax != NULL) &&
7509 (ctxt->sax->getParameterEntity != NULL))
7510 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7511 name);
7512 if (entity == NULL) {
7513 /*
7514 * [ WFC: Entity Declared ]
7515 * In a document without any DTD, a document with only an
7516 * internal DTD subset which contains no parameter entity
7517 * references, or a document with "standalone='yes'", ...
7518 * ... The declaration of a parameter entity must precede
7519 * any reference to it...
7520 */
7521 if ((ctxt->standalone == 1) ||
7522 ((ctxt->hasExternalSubset == 0) &&
7523 (ctxt->hasPErefs == 0))) {
7524 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7525 "PEReference: %%%s; not found\n",
7526 name);
7527 } else {
7528 /*
7529 * [ VC: Entity Declared ]
7530 * In a document with an external subset or external
7531 * parameter entities with "standalone='no'", ...
7532 * ... The declaration of a parameter entity must
7533 * precede any reference to it...
7534 */
7535 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7536 "PEReference: %%%s; not found\n",
7537 name, NULL);
7538 ctxt->valid = 0;
7539 }
7540 } else {
7541 /*
7542 * Internal checking in case the entity quest barfed
7543 */
7544 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7545 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7546 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7547 "Internal: %%%s; is not a parameter entity\n",
7548 name, NULL);
7549 } else if (ctxt->input->free != deallocblankswrapper) {
7550 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7551 if (xmlPushInput(ctxt, input) < 0)
7552 return;
7553 } else {
7554 /*
7555 * TODO !!!
7556 * handle the extra spaces added before and after
7557 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7558 */
7559 input = xmlNewEntityInputStream(ctxt, entity);
7560 if (xmlPushInput(ctxt, input) < 0)
7561 return;
7562 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7563 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7564 (IS_BLANK_CH(NXT(5)))) {
7565 xmlParseTextDecl(ctxt);
7566 if (ctxt->errNo ==
7567 XML_ERR_UNSUPPORTED_ENCODING) {
7568 /*
7569 * The XML REC instructs us to stop parsing
7570 * right here
7571 */
7572 ctxt->instate = XML_PARSER_EOF;
7573 return;
7574 }
7575 }
7576 }
7577 }
7578 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007579}
7580
7581/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007582 * xmlLoadEntityContent:
7583 * @ctxt: an XML parser context
7584 * @entity: an unloaded system entity
7585 *
7586 * Load the original content of the given system entity from the
7587 * ExternalID/SystemID given. This is to be used for Included in Literal
7588 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7589 *
7590 * Returns 0 in case of success and -1 in case of failure
7591 */
7592static int
7593xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7594 xmlParserInputPtr input;
7595 xmlBufferPtr buf;
7596 int l, c;
7597 int count = 0;
7598
7599 if ((ctxt == NULL) || (entity == NULL) ||
7600 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7601 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7602 (entity->content != NULL)) {
7603 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7604 "xmlLoadEntityContent parameter error");
7605 return(-1);
7606 }
7607
7608 if (xmlParserDebugEntities)
7609 xmlGenericError(xmlGenericErrorContext,
7610 "Reading %s entity content input\n", entity->name);
7611
7612 buf = xmlBufferCreate();
7613 if (buf == NULL) {
7614 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7615 "xmlLoadEntityContent parameter error");
7616 return(-1);
7617 }
7618
7619 input = xmlNewEntityInputStream(ctxt, entity);
7620 if (input == NULL) {
7621 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7622 "xmlLoadEntityContent input error");
7623 xmlBufferFree(buf);
7624 return(-1);
7625 }
7626
7627 /*
7628 * Push the entity as the current input, read char by char
7629 * saving to the buffer until the end of the entity or an error
7630 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007631 if (xmlPushInput(ctxt, input) < 0) {
7632 xmlBufferFree(buf);
7633 return(-1);
7634 }
7635
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007636 GROW;
7637 c = CUR_CHAR(l);
7638 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7639 (IS_CHAR(c))) {
7640 xmlBufferAdd(buf, ctxt->input->cur, l);
7641 if (count++ > 100) {
7642 count = 0;
7643 GROW;
7644 }
7645 NEXTL(l);
7646 c = CUR_CHAR(l);
7647 }
7648
7649 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7650 xmlPopInput(ctxt);
7651 } else if (!IS_CHAR(c)) {
7652 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7653 "xmlLoadEntityContent: invalid char value %d\n",
7654 c);
7655 xmlBufferFree(buf);
7656 return(-1);
7657 }
7658 entity->content = buf->content;
7659 buf->content = NULL;
7660 xmlBufferFree(buf);
7661
7662 return(0);
7663}
7664
7665/**
Owen Taylor3473f882001-02-23 17:55:21 +00007666 * xmlParseStringPEReference:
7667 * @ctxt: an XML parser context
7668 * @str: a pointer to an index in the string
7669 *
7670 * parse PEReference declarations
7671 *
7672 * [69] PEReference ::= '%' Name ';'
7673 *
7674 * [ WFC: No Recursion ]
7675 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007676 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007677 *
7678 * [ WFC: Entity Declared ]
7679 * In a document without any DTD, a document with only an internal DTD
7680 * subset which contains no parameter entity references, or a document
7681 * with "standalone='yes'", ... ... The declaration of a parameter
7682 * entity must precede any reference to it...
7683 *
7684 * [ VC: Entity Declared ]
7685 * In a document with an external subset or external parameter entities
7686 * with "standalone='no'", ... ... The declaration of a parameter entity
7687 * must precede any reference to it...
7688 *
7689 * [ WFC: In DTD ]
7690 * Parameter-entity references may only appear in the DTD.
7691 * NOTE: misleading but this is handled.
7692 *
7693 * Returns the string of the entity content.
7694 * str is updated to the current value of the index
7695 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007696static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007697xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7698 const xmlChar *ptr;
7699 xmlChar cur;
7700 xmlChar *name;
7701 xmlEntityPtr entity = NULL;
7702
7703 if ((str == NULL) || (*str == NULL)) return(NULL);
7704 ptr = *str;
7705 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007706 if (cur != '%')
7707 return(NULL);
7708 ptr++;
7709 cur = *ptr;
7710 name = xmlParseStringName(ctxt, &ptr);
7711 if (name == NULL) {
7712 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7713 "xmlParseStringPEReference: no name\n");
7714 *str = ptr;
7715 return(NULL);
7716 }
7717 cur = *ptr;
7718 if (cur != ';') {
7719 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7720 xmlFree(name);
7721 *str = ptr;
7722 return(NULL);
7723 }
7724 ptr++;
7725
7726 /*
7727 * Increate the number of entity references parsed
7728 */
7729 ctxt->nbentities++;
7730
7731 /*
7732 * Request the entity from SAX
7733 */
7734 if ((ctxt->sax != NULL) &&
7735 (ctxt->sax->getParameterEntity != NULL))
7736 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7737 name);
7738 if (entity == NULL) {
7739 /*
7740 * [ WFC: Entity Declared ]
7741 * In a document without any DTD, a document with only an
7742 * internal DTD subset which contains no parameter entity
7743 * references, or a document with "standalone='yes'", ...
7744 * ... The declaration of a parameter entity must precede
7745 * any reference to it...
7746 */
7747 if ((ctxt->standalone == 1) ||
7748 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7749 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7750 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007751 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007752 /*
7753 * [ VC: Entity Declared ]
7754 * In a document with an external subset or external
7755 * parameter entities with "standalone='no'", ...
7756 * ... The declaration of a parameter entity must
7757 * precede any reference to it...
7758 */
7759 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7760 "PEReference: %%%s; not found\n",
7761 name, NULL);
7762 ctxt->valid = 0;
7763 }
7764 } else {
7765 /*
7766 * Internal checking in case the entity quest barfed
7767 */
7768 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7769 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7770 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7771 "%%%s; is not a parameter entity\n",
7772 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007773 }
7774 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007775 ctxt->hasPErefs = 1;
7776 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007777 *str = ptr;
7778 return(entity);
7779}
7780
7781/**
7782 * xmlParseDocTypeDecl:
7783 * @ctxt: an XML parser context
7784 *
7785 * parse a DOCTYPE declaration
7786 *
7787 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7788 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7789 *
7790 * [ VC: Root Element Type ]
7791 * The Name in the document type declaration must match the element
7792 * type of the root element.
7793 */
7794
7795void
7796xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007797 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007798 xmlChar *ExternalID = NULL;
7799 xmlChar *URI = NULL;
7800
7801 /*
7802 * We know that '<!DOCTYPE' has been detected.
7803 */
7804 SKIP(9);
7805
7806 SKIP_BLANKS;
7807
7808 /*
7809 * Parse the DOCTYPE name.
7810 */
7811 name = xmlParseName(ctxt);
7812 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007813 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7814 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007815 }
7816 ctxt->intSubName = name;
7817
7818 SKIP_BLANKS;
7819
7820 /*
7821 * Check for SystemID and ExternalID
7822 */
7823 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7824
7825 if ((URI != NULL) || (ExternalID != NULL)) {
7826 ctxt->hasExternalSubset = 1;
7827 }
7828 ctxt->extSubURI = URI;
7829 ctxt->extSubSystem = ExternalID;
7830
7831 SKIP_BLANKS;
7832
7833 /*
7834 * Create and update the internal subset.
7835 */
7836 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7837 (!ctxt->disableSAX))
7838 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7839
7840 /*
7841 * Is there any internal subset declarations ?
7842 * they are handled separately in xmlParseInternalSubset()
7843 */
7844 if (RAW == '[')
7845 return;
7846
7847 /*
7848 * We should be at the end of the DOCTYPE declaration.
7849 */
7850 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007851 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007852 }
7853 NEXT;
7854}
7855
7856/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007857 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007858 * @ctxt: an XML parser context
7859 *
7860 * parse the internal subset declaration
7861 *
7862 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7863 */
7864
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007865static void
Owen Taylor3473f882001-02-23 17:55:21 +00007866xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7867 /*
7868 * Is there any DTD definition ?
7869 */
7870 if (RAW == '[') {
7871 ctxt->instate = XML_PARSER_DTD;
7872 NEXT;
7873 /*
7874 * Parse the succession of Markup declarations and
7875 * PEReferences.
7876 * Subsequence (markupdecl | PEReference | S)*
7877 */
7878 while (RAW != ']') {
7879 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007880 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007881
7882 SKIP_BLANKS;
7883 xmlParseMarkupDecl(ctxt);
7884 xmlParsePEReference(ctxt);
7885
7886 /*
7887 * Pop-up of finished entities.
7888 */
7889 while ((RAW == 0) && (ctxt->inputNr > 1))
7890 xmlPopInput(ctxt);
7891
7892 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007893 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007894 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007895 break;
7896 }
7897 }
7898 if (RAW == ']') {
7899 NEXT;
7900 SKIP_BLANKS;
7901 }
7902 }
7903
7904 /*
7905 * We should be at the end of the DOCTYPE declaration.
7906 */
7907 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007908 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007909 }
7910 NEXT;
7911}
7912
Daniel Veillard81273902003-09-30 00:43:48 +00007913#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007914/**
7915 * xmlParseAttribute:
7916 * @ctxt: an XML parser context
7917 * @value: a xmlChar ** used to store the value of the attribute
7918 *
7919 * parse an attribute
7920 *
7921 * [41] Attribute ::= Name Eq AttValue
7922 *
7923 * [ WFC: No External Entity References ]
7924 * Attribute values cannot contain direct or indirect entity references
7925 * to external entities.
7926 *
7927 * [ WFC: No < in Attribute Values ]
7928 * The replacement text of any entity referred to directly or indirectly in
7929 * an attribute value (other than "&lt;") must not contain a <.
7930 *
7931 * [ VC: Attribute Value Type ]
7932 * The attribute must have been declared; the value must be of the type
7933 * declared for it.
7934 *
7935 * [25] Eq ::= S? '=' S?
7936 *
7937 * With namespace:
7938 *
7939 * [NS 11] Attribute ::= QName Eq AttValue
7940 *
7941 * Also the case QName == xmlns:??? is handled independently as a namespace
7942 * definition.
7943 *
7944 * Returns the attribute name, and the value in *value.
7945 */
7946
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007947const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007948xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007949 const xmlChar *name;
7950 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007951
7952 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007953 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007954 name = xmlParseName(ctxt);
7955 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007956 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007957 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007958 return(NULL);
7959 }
7960
7961 /*
7962 * read the value
7963 */
7964 SKIP_BLANKS;
7965 if (RAW == '=') {
7966 NEXT;
7967 SKIP_BLANKS;
7968 val = xmlParseAttValue(ctxt);
7969 ctxt->instate = XML_PARSER_CONTENT;
7970 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007971 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007972 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007973 return(NULL);
7974 }
7975
7976 /*
7977 * Check that xml:lang conforms to the specification
7978 * No more registered as an error, just generate a warning now
7979 * since this was deprecated in XML second edition
7980 */
7981 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7982 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007983 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7984 "Malformed value for xml:lang : %s\n",
7985 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007986 }
7987 }
7988
7989 /*
7990 * Check that xml:space conforms to the specification
7991 */
7992 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7993 if (xmlStrEqual(val, BAD_CAST "default"))
7994 *(ctxt->space) = 0;
7995 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7996 *(ctxt->space) = 1;
7997 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007998 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007999"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008000 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008001 }
8002 }
8003
8004 *value = val;
8005 return(name);
8006}
8007
8008/**
8009 * xmlParseStartTag:
8010 * @ctxt: an XML parser context
8011 *
8012 * parse a start of tag either for rule element or
8013 * EmptyElement. In both case we don't parse the tag closing chars.
8014 *
8015 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8016 *
8017 * [ WFC: Unique Att Spec ]
8018 * No attribute name may appear more than once in the same start-tag or
8019 * empty-element tag.
8020 *
8021 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8022 *
8023 * [ WFC: Unique Att Spec ]
8024 * No attribute name may appear more than once in the same start-tag or
8025 * empty-element tag.
8026 *
8027 * With namespace:
8028 *
8029 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8030 *
8031 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8032 *
8033 * Returns the element name parsed
8034 */
8035
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008036const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008037xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008038 const xmlChar *name;
8039 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008040 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008041 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008042 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008043 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008044 int i;
8045
8046 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008047 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008048
8049 name = xmlParseName(ctxt);
8050 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008051 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008052 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008053 return(NULL);
8054 }
8055
8056 /*
8057 * Now parse the attributes, it ends up with the ending
8058 *
8059 * (S Attribute)* S?
8060 */
8061 SKIP_BLANKS;
8062 GROW;
8063
Daniel Veillard21a0f912001-02-25 19:54:14 +00008064 while ((RAW != '>') &&
8065 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008066 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008067 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008068 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008069
8070 attname = xmlParseAttribute(ctxt, &attvalue);
8071 if ((attname != NULL) && (attvalue != NULL)) {
8072 /*
8073 * [ WFC: Unique Att Spec ]
8074 * No attribute name may appear more than once in the same
8075 * start-tag or empty-element tag.
8076 */
8077 for (i = 0; i < nbatts;i += 2) {
8078 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008079 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008080 xmlFree(attvalue);
8081 goto failed;
8082 }
8083 }
Owen Taylor3473f882001-02-23 17:55:21 +00008084 /*
8085 * Add the pair to atts
8086 */
8087 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008088 maxatts = 22; /* allow for 10 attrs by default */
8089 atts = (const xmlChar **)
8090 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008091 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008092 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008093 if (attvalue != NULL)
8094 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008095 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008096 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008097 ctxt->atts = atts;
8098 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008099 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008100 const xmlChar **n;
8101
Owen Taylor3473f882001-02-23 17:55:21 +00008102 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008103 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008104 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008105 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008106 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008107 if (attvalue != NULL)
8108 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008109 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008110 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008111 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008112 ctxt->atts = atts;
8113 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008114 }
8115 atts[nbatts++] = attname;
8116 atts[nbatts++] = attvalue;
8117 atts[nbatts] = NULL;
8118 atts[nbatts + 1] = NULL;
8119 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008120 if (attvalue != NULL)
8121 xmlFree(attvalue);
8122 }
8123
8124failed:
8125
Daniel Veillard3772de32002-12-17 10:31:45 +00008126 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008127 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8128 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008129 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008130 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8131 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008132 }
8133 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008134 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8135 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008136 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8137 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008138 break;
8139 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008140 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008141 GROW;
8142 }
8143
8144 /*
8145 * SAX: Start of Element !
8146 */
8147 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008148 (!ctxt->disableSAX)) {
8149 if (nbatts > 0)
8150 ctxt->sax->startElement(ctxt->userData, name, atts);
8151 else
8152 ctxt->sax->startElement(ctxt->userData, name, NULL);
8153 }
Owen Taylor3473f882001-02-23 17:55:21 +00008154
8155 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008156 /* Free only the content strings */
8157 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008158 if (atts[i] != NULL)
8159 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008160 }
8161 return(name);
8162}
8163
8164/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008165 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008166 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008167 * @line: line of the start tag
8168 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008169 *
8170 * parse an end of tag
8171 *
8172 * [42] ETag ::= '</' Name S? '>'
8173 *
8174 * With namespace
8175 *
8176 * [NS 9] ETag ::= '</' QName S? '>'
8177 */
8178
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008179static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008180xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008181 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008182
8183 GROW;
8184 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008185 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008186 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008187 return;
8188 }
8189 SKIP(2);
8190
Daniel Veillard46de64e2002-05-29 08:21:33 +00008191 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008192
8193 /*
8194 * We should definitely be at the ending "S? '>'" part
8195 */
8196 GROW;
8197 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008198 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008199 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008200 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008201 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008202
8203 /*
8204 * [ WFC: Element Type Match ]
8205 * The Name in an element's end-tag must match the element type in the
8206 * start-tag.
8207 *
8208 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008209 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008210 if (name == NULL) name = BAD_CAST "unparseable";
8211 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008212 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008213 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008214 }
8215
8216 /*
8217 * SAX: End of Tag
8218 */
8219 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8220 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008221 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008222
Daniel Veillarde57ec792003-09-10 10:50:59 +00008223 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008224 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008225 return;
8226}
8227
8228/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008229 * xmlParseEndTag:
8230 * @ctxt: an XML parser context
8231 *
8232 * parse an end of tag
8233 *
8234 * [42] ETag ::= '</' Name S? '>'
8235 *
8236 * With namespace
8237 *
8238 * [NS 9] ETag ::= '</' QName S? '>'
8239 */
8240
8241void
8242xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008243 xmlParseEndTag1(ctxt, 0);
8244}
Daniel Veillard81273902003-09-30 00:43:48 +00008245#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008246
8247/************************************************************************
8248 * *
8249 * SAX 2 specific operations *
8250 * *
8251 ************************************************************************/
8252
Daniel Veillard0fb18932003-09-07 09:14:37 +00008253/*
8254 * xmlGetNamespace:
8255 * @ctxt: an XML parser context
8256 * @prefix: the prefix to lookup
8257 *
8258 * Lookup the namespace name for the @prefix (which ca be NULL)
8259 * The prefix must come from the @ctxt->dict dictionnary
8260 *
8261 * Returns the namespace name or NULL if not bound
8262 */
8263static const xmlChar *
8264xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8265 int i;
8266
Daniel Veillarde57ec792003-09-10 10:50:59 +00008267 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008268 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008269 if (ctxt->nsTab[i] == prefix) {
8270 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8271 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008272 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008273 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008274 return(NULL);
8275}
8276
8277/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008278 * xmlParseQName:
8279 * @ctxt: an XML parser context
8280 * @prefix: pointer to store the prefix part
8281 *
8282 * parse an XML Namespace QName
8283 *
8284 * [6] QName ::= (Prefix ':')? LocalPart
8285 * [7] Prefix ::= NCName
8286 * [8] LocalPart ::= NCName
8287 *
8288 * Returns the Name parsed or NULL
8289 */
8290
8291static const xmlChar *
8292xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8293 const xmlChar *l, *p;
8294
8295 GROW;
8296
8297 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008298 if (l == NULL) {
8299 if (CUR == ':') {
8300 l = xmlParseName(ctxt);
8301 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008302 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8303 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008304 *prefix = NULL;
8305 return(l);
8306 }
8307 }
8308 return(NULL);
8309 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008310 if (CUR == ':') {
8311 NEXT;
8312 p = l;
8313 l = xmlParseNCName(ctxt);
8314 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008315 xmlChar *tmp;
8316
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008317 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8318 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008319 l = xmlParseNmtoken(ctxt);
8320 if (l == NULL)
8321 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8322 else {
8323 tmp = xmlBuildQName(l, p, NULL, 0);
8324 xmlFree((char *)l);
8325 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008326 p = xmlDictLookup(ctxt->dict, tmp, -1);
8327 if (tmp != NULL) xmlFree(tmp);
8328 *prefix = NULL;
8329 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008330 }
8331 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008332 xmlChar *tmp;
8333
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008334 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8335 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008336 NEXT;
8337 tmp = (xmlChar *) xmlParseName(ctxt);
8338 if (tmp != NULL) {
8339 tmp = xmlBuildQName(tmp, l, NULL, 0);
8340 l = xmlDictLookup(ctxt->dict, tmp, -1);
8341 if (tmp != NULL) xmlFree(tmp);
8342 *prefix = p;
8343 return(l);
8344 }
8345 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8346 l = xmlDictLookup(ctxt->dict, tmp, -1);
8347 if (tmp != NULL) xmlFree(tmp);
8348 *prefix = p;
8349 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008350 }
8351 *prefix = p;
8352 } else
8353 *prefix = NULL;
8354 return(l);
8355}
8356
8357/**
8358 * xmlParseQNameAndCompare:
8359 * @ctxt: an XML parser context
8360 * @name: the localname
8361 * @prefix: the prefix, if any.
8362 *
8363 * parse an XML name and compares for match
8364 * (specialized for endtag parsing)
8365 *
8366 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8367 * and the name for mismatch
8368 */
8369
8370static const xmlChar *
8371xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8372 xmlChar const *prefix) {
8373 const xmlChar *cmp = name;
8374 const xmlChar *in;
8375 const xmlChar *ret;
8376 const xmlChar *prefix2;
8377
8378 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8379
8380 GROW;
8381 in = ctxt->input->cur;
8382
8383 cmp = prefix;
8384 while (*in != 0 && *in == *cmp) {
8385 ++in;
8386 ++cmp;
8387 }
8388 if ((*cmp == 0) && (*in == ':')) {
8389 in++;
8390 cmp = name;
8391 while (*in != 0 && *in == *cmp) {
8392 ++in;
8393 ++cmp;
8394 }
William M. Brack76e95df2003-10-18 16:20:14 +00008395 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008396 /* success */
8397 ctxt->input->cur = in;
8398 return((const xmlChar*) 1);
8399 }
8400 }
8401 /*
8402 * all strings coms from the dictionary, equality can be done directly
8403 */
8404 ret = xmlParseQName (ctxt, &prefix2);
8405 if ((ret == name) && (prefix == prefix2))
8406 return((const xmlChar*) 1);
8407 return ret;
8408}
8409
8410/**
8411 * xmlParseAttValueInternal:
8412 * @ctxt: an XML parser context
8413 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008414 * @alloc: whether the attribute was reallocated as a new string
8415 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008416 *
8417 * parse a value for an attribute.
8418 * NOTE: if no normalization is needed, the routine will return pointers
8419 * directly from the data buffer.
8420 *
8421 * 3.3.3 Attribute-Value Normalization:
8422 * Before the value of an attribute is passed to the application or
8423 * checked for validity, the XML processor must normalize it as follows:
8424 * - a character reference is processed by appending the referenced
8425 * character to the attribute value
8426 * - an entity reference is processed by recursively processing the
8427 * replacement text of the entity
8428 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8429 * appending #x20 to the normalized value, except that only a single
8430 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8431 * parsed entity or the literal entity value of an internal parsed entity
8432 * - other characters are processed by appending them to the normalized value
8433 * If the declared value is not CDATA, then the XML processor must further
8434 * process the normalized attribute value by discarding any leading and
8435 * trailing space (#x20) characters, and by replacing sequences of space
8436 * (#x20) characters by a single space (#x20) character.
8437 * All attributes for which no declaration has been read should be treated
8438 * by a non-validating parser as if declared CDATA.
8439 *
8440 * Returns the AttValue parsed or NULL. The value has to be freed by the
8441 * caller if it was copied, this can be detected by val[*len] == 0.
8442 */
8443
8444static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008445xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8446 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008447{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008448 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008449 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008450 xmlChar *ret = NULL;
8451
8452 GROW;
8453 in = (xmlChar *) CUR_PTR;
8454 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008455 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008456 return (NULL);
8457 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008458 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008459
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008460 /*
8461 * try to handle in this routine the most common case where no
8462 * allocation of a new string is required and where content is
8463 * pure ASCII.
8464 */
8465 limit = *in++;
8466 end = ctxt->input->end;
8467 start = in;
8468 if (in >= end) {
8469 const xmlChar *oldbase = ctxt->input->base;
8470 GROW;
8471 if (oldbase != ctxt->input->base) {
8472 long delta = ctxt->input->base - oldbase;
8473 start = start + delta;
8474 in = in + delta;
8475 }
8476 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008477 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008478 if (normalize) {
8479 /*
8480 * Skip any leading spaces
8481 */
8482 while ((in < end) && (*in != limit) &&
8483 ((*in == 0x20) || (*in == 0x9) ||
8484 (*in == 0xA) || (*in == 0xD))) {
8485 in++;
8486 start = in;
8487 if (in >= end) {
8488 const xmlChar *oldbase = ctxt->input->base;
8489 GROW;
8490 if (oldbase != ctxt->input->base) {
8491 long delta = ctxt->input->base - oldbase;
8492 start = start + delta;
8493 in = in + delta;
8494 }
8495 end = ctxt->input->end;
8496 }
8497 }
8498 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8499 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8500 if ((*in++ == 0x20) && (*in == 0x20)) break;
8501 if (in >= end) {
8502 const xmlChar *oldbase = ctxt->input->base;
8503 GROW;
8504 if (oldbase != ctxt->input->base) {
8505 long delta = ctxt->input->base - oldbase;
8506 start = start + delta;
8507 in = in + delta;
8508 }
8509 end = ctxt->input->end;
8510 }
8511 }
8512 last = in;
8513 /*
8514 * skip the trailing blanks
8515 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008516 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008517 while ((in < end) && (*in != limit) &&
8518 ((*in == 0x20) || (*in == 0x9) ||
8519 (*in == 0xA) || (*in == 0xD))) {
8520 in++;
8521 if (in >= end) {
8522 const xmlChar *oldbase = ctxt->input->base;
8523 GROW;
8524 if (oldbase != ctxt->input->base) {
8525 long delta = ctxt->input->base - oldbase;
8526 start = start + delta;
8527 in = in + delta;
8528 last = last + delta;
8529 }
8530 end = ctxt->input->end;
8531 }
8532 }
8533 if (*in != limit) goto need_complex;
8534 } else {
8535 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8536 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8537 in++;
8538 if (in >= end) {
8539 const xmlChar *oldbase = ctxt->input->base;
8540 GROW;
8541 if (oldbase != ctxt->input->base) {
8542 long delta = ctxt->input->base - oldbase;
8543 start = start + delta;
8544 in = in + delta;
8545 }
8546 end = ctxt->input->end;
8547 }
8548 }
8549 last = in;
8550 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008551 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008552 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008553 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008554 *len = last - start;
8555 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008556 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008557 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008558 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008559 }
8560 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008561 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008562 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008563need_complex:
8564 if (alloc) *alloc = 1;
8565 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008566}
8567
8568/**
8569 * xmlParseAttribute2:
8570 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008571 * @pref: the element prefix
8572 * @elem: the element name
8573 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008574 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008575 * @len: an int * to save the length of the attribute
8576 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008577 *
8578 * parse an attribute in the new SAX2 framework.
8579 *
8580 * Returns the attribute name, and the value in *value, .
8581 */
8582
8583static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008584xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008585 const xmlChar * pref, const xmlChar * elem,
8586 const xmlChar ** prefix, xmlChar ** value,
8587 int *len, int *alloc)
8588{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008589 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008590 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008591 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008592
8593 *value = NULL;
8594 GROW;
8595 name = xmlParseQName(ctxt, prefix);
8596 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008597 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8598 "error parsing attribute name\n");
8599 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008600 }
8601
8602 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008603 * get the type if needed
8604 */
8605 if (ctxt->attsSpecial != NULL) {
8606 int type;
8607
8608 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008609 pref, elem, *prefix, name);
8610 if (type != 0)
8611 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008612 }
8613
8614 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008615 * read the value
8616 */
8617 SKIP_BLANKS;
8618 if (RAW == '=') {
8619 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008620 SKIP_BLANKS;
8621 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8622 if (normalize) {
8623 /*
8624 * Sometimes a second normalisation pass for spaces is needed
8625 * but that only happens if charrefs or entities refernces
8626 * have been used in the attribute value, i.e. the attribute
8627 * value have been extracted in an allocated string already.
8628 */
8629 if (*alloc) {
8630 const xmlChar *val2;
8631
8632 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008633 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008634 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008635 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008636 }
8637 }
8638 }
8639 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008640 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008641 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8642 "Specification mandate value for attribute %s\n",
8643 name);
8644 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008645 }
8646
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008647 if (*prefix == ctxt->str_xml) {
8648 /*
8649 * Check that xml:lang conforms to the specification
8650 * No more registered as an error, just generate a warning now
8651 * since this was deprecated in XML second edition
8652 */
8653 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8654 internal_val = xmlStrndup(val, *len);
8655 if (!xmlCheckLanguageID(internal_val)) {
8656 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8657 "Malformed value for xml:lang : %s\n",
8658 internal_val, NULL);
8659 }
8660 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008661
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008662 /*
8663 * Check that xml:space conforms to the specification
8664 */
8665 if (xmlStrEqual(name, BAD_CAST "space")) {
8666 internal_val = xmlStrndup(val, *len);
8667 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8668 *(ctxt->space) = 0;
8669 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8670 *(ctxt->space) = 1;
8671 else {
8672 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8673 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8674 internal_val, NULL);
8675 }
8676 }
8677 if (internal_val) {
8678 xmlFree(internal_val);
8679 }
8680 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008681
8682 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008683 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008684}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008685/**
8686 * xmlParseStartTag2:
8687 * @ctxt: an XML parser context
8688 *
8689 * parse a start of tag either for rule element or
8690 * EmptyElement. In both case we don't parse the tag closing chars.
8691 * This routine is called when running SAX2 parsing
8692 *
8693 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8694 *
8695 * [ WFC: Unique Att Spec ]
8696 * No attribute name may appear more than once in the same start-tag or
8697 * empty-element tag.
8698 *
8699 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8700 *
8701 * [ WFC: Unique Att Spec ]
8702 * No attribute name may appear more than once in the same start-tag or
8703 * empty-element tag.
8704 *
8705 * With namespace:
8706 *
8707 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8708 *
8709 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8710 *
8711 * Returns the element name parsed
8712 */
8713
8714static const xmlChar *
8715xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008716 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008717 const xmlChar *localname;
8718 const xmlChar *prefix;
8719 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008720 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008721 const xmlChar *nsname;
8722 xmlChar *attvalue;
8723 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008724 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008725 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008726 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008727 const xmlChar *base;
8728 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008729 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008730
8731 if (RAW != '<') return(NULL);
8732 NEXT1;
8733
8734 /*
8735 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8736 * point since the attribute values may be stored as pointers to
8737 * the buffer and calling SHRINK would destroy them !
8738 * The Shrinking is only possible once the full set of attribute
8739 * callbacks have been done.
8740 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008741reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008742 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008743 base = ctxt->input->base;
8744 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008745 oldline = ctxt->input->line;
8746 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008747 nbatts = 0;
8748 nratts = 0;
8749 nbdef = 0;
8750 nbNs = 0;
8751 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008752 /* Forget any namespaces added during an earlier parse of this element. */
8753 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008754
8755 localname = xmlParseQName(ctxt, &prefix);
8756 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008757 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8758 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008759 return(NULL);
8760 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008761 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008762
8763 /*
8764 * Now parse the attributes, it ends up with the ending
8765 *
8766 * (S Attribute)* S?
8767 */
8768 SKIP_BLANKS;
8769 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008770 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008771
8772 while ((RAW != '>') &&
8773 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008774 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008775 const xmlChar *q = CUR_PTR;
8776 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008777 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008778
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008779 attname = xmlParseAttribute2(ctxt, prefix, localname,
8780 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008781 if (ctxt->input->base != base) {
8782 if ((attvalue != NULL) && (alloc != 0))
8783 xmlFree(attvalue);
8784 attvalue = NULL;
8785 goto base_changed;
8786 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008787 if ((attname != NULL) && (attvalue != NULL)) {
8788 if (len < 0) len = xmlStrlen(attvalue);
8789 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008790 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8791 xmlURIPtr uri;
8792
8793 if (*URL != 0) {
8794 uri = xmlParseURI((const char *) URL);
8795 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008796 xmlNsErr(ctxt, XML_WAR_NS_URI,
8797 "xmlns: '%s' is not a valid URI\n",
8798 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008799 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008800 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008801 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8802 "xmlns: URI %s is not absolute\n",
8803 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008804 }
8805 xmlFreeURI(uri);
8806 }
Daniel Veillard37334572008-07-31 08:20:02 +00008807 if (URL == ctxt->str_xml_ns) {
8808 if (attname != ctxt->str_xml) {
8809 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8810 "xml namespace URI cannot be the default namespace\n",
8811 NULL, NULL, NULL);
8812 }
8813 goto skip_default_ns;
8814 }
8815 if ((len == 29) &&
8816 (xmlStrEqual(URL,
8817 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8818 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8819 "reuse of the xmlns namespace name is forbidden\n",
8820 NULL, NULL, NULL);
8821 goto skip_default_ns;
8822 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008823 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008824 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008825 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008826 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008827 for (j = 1;j <= nbNs;j++)
8828 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8829 break;
8830 if (j <= nbNs)
8831 xmlErrAttributeDup(ctxt, NULL, attname);
8832 else
8833 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008834skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008835 if (alloc != 0) xmlFree(attvalue);
8836 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008837 continue;
8838 }
8839 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008840 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8841 xmlURIPtr uri;
8842
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008843 if (attname == ctxt->str_xml) {
8844 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008845 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8846 "xml namespace prefix mapped to wrong URI\n",
8847 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008848 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008849 /*
8850 * Do not keep a namespace definition node
8851 */
Daniel Veillard37334572008-07-31 08:20:02 +00008852 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008853 }
Daniel Veillard37334572008-07-31 08:20:02 +00008854 if (URL == ctxt->str_xml_ns) {
8855 if (attname != ctxt->str_xml) {
8856 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8857 "xml namespace URI mapped to wrong prefix\n",
8858 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008859 }
Daniel Veillard37334572008-07-31 08:20:02 +00008860 goto skip_ns;
8861 }
8862 if (attname == ctxt->str_xmlns) {
8863 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8864 "redefinition of the xmlns prefix is forbidden\n",
8865 NULL, NULL, NULL);
8866 goto skip_ns;
8867 }
8868 if ((len == 29) &&
8869 (xmlStrEqual(URL,
8870 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8871 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8872 "reuse of the xmlns namespace name is forbidden\n",
8873 NULL, NULL, NULL);
8874 goto skip_ns;
8875 }
8876 if ((URL == NULL) || (URL[0] == 0)) {
8877 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8878 "xmlns:%s: Empty XML namespace is not allowed\n",
8879 attname, NULL, NULL);
8880 goto skip_ns;
8881 } else {
8882 uri = xmlParseURI((const char *) URL);
8883 if (uri == NULL) {
8884 xmlNsErr(ctxt, XML_WAR_NS_URI,
8885 "xmlns:%s: '%s' is not a valid URI\n",
8886 attname, URL, NULL);
8887 } else {
8888 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8889 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8890 "xmlns:%s: URI %s is not absolute\n",
8891 attname, URL, NULL);
8892 }
8893 xmlFreeURI(uri);
8894 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008895 }
8896
Daniel Veillard0fb18932003-09-07 09:14:37 +00008897 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008898 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008899 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008900 for (j = 1;j <= nbNs;j++)
8901 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8902 break;
8903 if (j <= nbNs)
8904 xmlErrAttributeDup(ctxt, aprefix, attname);
8905 else
8906 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008907skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008908 if (alloc != 0) xmlFree(attvalue);
8909 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008910 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008911 continue;
8912 }
8913
8914 /*
8915 * Add the pair to atts
8916 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008917 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8918 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008919 if (attvalue[len] == 0)
8920 xmlFree(attvalue);
8921 goto failed;
8922 }
8923 maxatts = ctxt->maxatts;
8924 atts = ctxt->atts;
8925 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008926 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008927 atts[nbatts++] = attname;
8928 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008929 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008930 atts[nbatts++] = attvalue;
8931 attvalue += len;
8932 atts[nbatts++] = attvalue;
8933 /*
8934 * tag if some deallocation is needed
8935 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008936 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008937 } else {
8938 if ((attvalue != NULL) && (attvalue[len] == 0))
8939 xmlFree(attvalue);
8940 }
8941
Daniel Veillard37334572008-07-31 08:20:02 +00008942failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008943
8944 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008945 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008946 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8947 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008948 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008949 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8950 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008951 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008952 }
8953 SKIP_BLANKS;
8954 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8955 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008956 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008957 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008958 break;
8959 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008960 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008961 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008962 }
8963
Daniel Veillard0fb18932003-09-07 09:14:37 +00008964 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008965 * The attributes defaulting
8966 */
8967 if (ctxt->attsDefault != NULL) {
8968 xmlDefAttrsPtr defaults;
8969
8970 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8971 if (defaults != NULL) {
8972 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008973 attname = defaults->values[5 * i];
8974 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008975
8976 /*
8977 * special work for namespaces defaulted defs
8978 */
8979 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8980 /*
8981 * check that it's not a defined namespace
8982 */
8983 for (j = 1;j <= nbNs;j++)
8984 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8985 break;
8986 if (j <= nbNs) continue;
8987
8988 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008989 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008990 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008991 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008992 nbNs++;
8993 }
8994 } else if (aprefix == ctxt->str_xmlns) {
8995 /*
8996 * check that it's not a defined namespace
8997 */
8998 for (j = 1;j <= nbNs;j++)
8999 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9000 break;
9001 if (j <= nbNs) continue;
9002
9003 nsname = xmlGetNamespace(ctxt, attname);
9004 if (nsname != defaults->values[2]) {
9005 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009006 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009007 nbNs++;
9008 }
9009 } else {
9010 /*
9011 * check that it's not a defined attribute
9012 */
9013 for (j = 0;j < nbatts;j+=5) {
9014 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9015 break;
9016 }
9017 if (j < nbatts) continue;
9018
9019 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9020 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009021 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009022 }
9023 maxatts = ctxt->maxatts;
9024 atts = ctxt->atts;
9025 }
9026 atts[nbatts++] = attname;
9027 atts[nbatts++] = aprefix;
9028 if (aprefix == NULL)
9029 atts[nbatts++] = NULL;
9030 else
9031 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009032 atts[nbatts++] = defaults->values[5 * i + 2];
9033 atts[nbatts++] = defaults->values[5 * i + 3];
9034 if ((ctxt->standalone == 1) &&
9035 (defaults->values[5 * i + 4] != NULL)) {
9036 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9037 "standalone: attribute %s on %s defaulted from external subset\n",
9038 attname, localname);
9039 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009040 nbdef++;
9041 }
9042 }
9043 }
9044 }
9045
Daniel Veillarde70c8772003-11-25 07:21:18 +00009046 /*
9047 * The attributes checkings
9048 */
9049 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009050 /*
9051 * The default namespace does not apply to attribute names.
9052 */
9053 if (atts[i + 1] != NULL) {
9054 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9055 if (nsname == NULL) {
9056 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9057 "Namespace prefix %s for %s on %s is not defined\n",
9058 atts[i + 1], atts[i], localname);
9059 }
9060 atts[i + 2] = nsname;
9061 } else
9062 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009063 /*
9064 * [ WFC: Unique Att Spec ]
9065 * No attribute name may appear more than once in the same
9066 * start-tag or empty-element tag.
9067 * As extended by the Namespace in XML REC.
9068 */
9069 for (j = 0; j < i;j += 5) {
9070 if (atts[i] == atts[j]) {
9071 if (atts[i+1] == atts[j+1]) {
9072 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9073 break;
9074 }
9075 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9076 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9077 "Namespaced Attribute %s in '%s' redefined\n",
9078 atts[i], nsname, NULL);
9079 break;
9080 }
9081 }
9082 }
9083 }
9084
Daniel Veillarde57ec792003-09-10 10:50:59 +00009085 nsname = xmlGetNamespace(ctxt, prefix);
9086 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009087 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9088 "Namespace prefix %s on %s is not defined\n",
9089 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009090 }
9091 *pref = prefix;
9092 *URI = nsname;
9093
9094 /*
9095 * SAX: Start of Element !
9096 */
9097 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9098 (!ctxt->disableSAX)) {
9099 if (nbNs > 0)
9100 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9101 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9102 nbatts / 5, nbdef, atts);
9103 else
9104 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9105 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9106 }
9107
9108 /*
9109 * Free up attribute allocated strings if needed
9110 */
9111 if (attval != 0) {
9112 for (i = 3,j = 0; j < nratts;i += 5,j++)
9113 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9114 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009115 }
9116
9117 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009118
9119base_changed:
9120 /*
9121 * the attribute strings are valid iif the base didn't changed
9122 */
9123 if (attval != 0) {
9124 for (i = 3,j = 0; j < nratts;i += 5,j++)
9125 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9126 xmlFree((xmlChar *) atts[i]);
9127 }
9128 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009129 ctxt->input->line = oldline;
9130 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009131 if (ctxt->wellFormed == 1) {
9132 goto reparse;
9133 }
9134 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009135}
9136
9137/**
9138 * xmlParseEndTag2:
9139 * @ctxt: an XML parser context
9140 * @line: line of the start tag
9141 * @nsNr: number of namespaces on the start tag
9142 *
9143 * parse an end of tag
9144 *
9145 * [42] ETag ::= '</' Name S? '>'
9146 *
9147 * With namespace
9148 *
9149 * [NS 9] ETag ::= '</' QName S? '>'
9150 */
9151
9152static void
9153xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009154 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009155 const xmlChar *name;
9156
9157 GROW;
9158 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009159 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009160 return;
9161 }
9162 SKIP(2);
9163
William M. Brack13dfa872004-09-18 04:52:08 +00009164 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009165 if (ctxt->input->cur[tlen] == '>') {
9166 ctxt->input->cur += tlen + 1;
9167 goto done;
9168 }
9169 ctxt->input->cur += tlen;
9170 name = (xmlChar*)1;
9171 } else {
9172 if (prefix == NULL)
9173 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9174 else
9175 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9176 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009177
9178 /*
9179 * We should definitely be at the ending "S? '>'" part
9180 */
9181 GROW;
9182 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009183 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009184 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009185 } else
9186 NEXT1;
9187
9188 /*
9189 * [ WFC: Element Type Match ]
9190 * The Name in an element's end-tag must match the element type in the
9191 * start-tag.
9192 *
9193 */
9194 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009195 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009196 if ((line == 0) && (ctxt->node != NULL))
9197 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009198 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009199 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009200 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009201 }
9202
9203 /*
9204 * SAX: End of Tag
9205 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009206done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009207 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9208 (!ctxt->disableSAX))
9209 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9210
Daniel Veillard0fb18932003-09-07 09:14:37 +00009211 spacePop(ctxt);
9212 if (nsNr != 0)
9213 nsPop(ctxt, nsNr);
9214 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009215}
9216
9217/**
Owen Taylor3473f882001-02-23 17:55:21 +00009218 * xmlParseCDSect:
9219 * @ctxt: an XML parser context
9220 *
9221 * Parse escaped pure raw content.
9222 *
9223 * [18] CDSect ::= CDStart CData CDEnd
9224 *
9225 * [19] CDStart ::= '<![CDATA['
9226 *
9227 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9228 *
9229 * [21] CDEnd ::= ']]>'
9230 */
9231void
9232xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9233 xmlChar *buf = NULL;
9234 int len = 0;
9235 int size = XML_PARSER_BUFFER_SIZE;
9236 int r, rl;
9237 int s, sl;
9238 int cur, l;
9239 int count = 0;
9240
Daniel Veillard8f597c32003-10-06 08:19:27 +00009241 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009242 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009243 SKIP(9);
9244 } else
9245 return;
9246
9247 ctxt->instate = XML_PARSER_CDATA_SECTION;
9248 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009249 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009250 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009251 ctxt->instate = XML_PARSER_CONTENT;
9252 return;
9253 }
9254 NEXTL(rl);
9255 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009256 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009257 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009258 ctxt->instate = XML_PARSER_CONTENT;
9259 return;
9260 }
9261 NEXTL(sl);
9262 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009263 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009264 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009265 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009266 return;
9267 }
William M. Brack871611b2003-10-18 04:53:14 +00009268 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009269 ((r != ']') || (s != ']') || (cur != '>'))) {
9270 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009271 xmlChar *tmp;
9272
Owen Taylor3473f882001-02-23 17:55:21 +00009273 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009274 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9275 if (tmp == NULL) {
9276 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009277 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009278 return;
9279 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009280 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009281 }
9282 COPY_BUF(rl,buf,len,r);
9283 r = s;
9284 rl = sl;
9285 s = cur;
9286 sl = l;
9287 count++;
9288 if (count > 50) {
9289 GROW;
9290 count = 0;
9291 }
9292 NEXTL(l);
9293 cur = CUR_CHAR(l);
9294 }
9295 buf[len] = 0;
9296 ctxt->instate = XML_PARSER_CONTENT;
9297 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009298 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009299 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009300 xmlFree(buf);
9301 return;
9302 }
9303 NEXTL(l);
9304
9305 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009306 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009307 */
9308 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9309 if (ctxt->sax->cdataBlock != NULL)
9310 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009311 else if (ctxt->sax->characters != NULL)
9312 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009313 }
9314 xmlFree(buf);
9315}
9316
9317/**
9318 * xmlParseContent:
9319 * @ctxt: an XML parser context
9320 *
9321 * Parse a content:
9322 *
9323 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9324 */
9325
9326void
9327xmlParseContent(xmlParserCtxtPtr ctxt) {
9328 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009329 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009330 ((RAW != '<') || (NXT(1) != '/')) &&
9331 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009332 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009333 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009334 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009335
9336 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009337 * First case : a Processing Instruction.
9338 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009339 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009340 xmlParsePI(ctxt);
9341 }
9342
9343 /*
9344 * Second case : a CDSection
9345 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009346 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009347 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009348 xmlParseCDSect(ctxt);
9349 }
9350
9351 /*
9352 * Third case : a comment
9353 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009354 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009355 (NXT(2) == '-') && (NXT(3) == '-')) {
9356 xmlParseComment(ctxt);
9357 ctxt->instate = XML_PARSER_CONTENT;
9358 }
9359
9360 /*
9361 * Fourth case : a sub-element.
9362 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009363 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009364 xmlParseElement(ctxt);
9365 }
9366
9367 /*
9368 * Fifth case : a reference. If if has not been resolved,
9369 * parsing returns it's Name, create the node
9370 */
9371
Daniel Veillard21a0f912001-02-25 19:54:14 +00009372 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009373 xmlParseReference(ctxt);
9374 }
9375
9376 /*
9377 * Last case, text. Note that References are handled directly.
9378 */
9379 else {
9380 xmlParseCharData(ctxt, 0);
9381 }
9382
9383 GROW;
9384 /*
9385 * Pop-up of finished entities.
9386 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009387 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009388 xmlPopInput(ctxt);
9389 SHRINK;
9390
Daniel Veillardfdc91562002-07-01 21:52:03 +00009391 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009392 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9393 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009394 ctxt->instate = XML_PARSER_EOF;
9395 break;
9396 }
9397 }
9398}
9399
9400/**
9401 * xmlParseElement:
9402 * @ctxt: an XML parser context
9403 *
9404 * parse an XML element, this is highly recursive
9405 *
9406 * [39] element ::= EmptyElemTag | STag content ETag
9407 *
9408 * [ WFC: Element Type Match ]
9409 * The Name in an element's end-tag must match the element type in the
9410 * start-tag.
9411 *
Owen Taylor3473f882001-02-23 17:55:21 +00009412 */
9413
9414void
9415xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009416 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009417 const xmlChar *prefix = NULL;
9418 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009419 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009420 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009421 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009422 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009423
Daniel Veillard8915c152008-08-26 13:05:34 +00009424 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9425 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9426 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9427 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9428 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009429 ctxt->instate = XML_PARSER_EOF;
9430 return;
9431 }
9432
Owen Taylor3473f882001-02-23 17:55:21 +00009433 /* Capture start position */
9434 if (ctxt->record_info) {
9435 node_info.begin_pos = ctxt->input->consumed +
9436 (CUR_PTR - ctxt->input->base);
9437 node_info.begin_line = ctxt->input->line;
9438 }
9439
9440 if (ctxt->spaceNr == 0)
9441 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009442 else if (*ctxt->space == -2)
9443 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009444 else
9445 spacePush(ctxt, *ctxt->space);
9446
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009447 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009448#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009449 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009450#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009451 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009452#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009453 else
9454 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009455#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009456 if (name == NULL) {
9457 spacePop(ctxt);
9458 return;
9459 }
9460 namePush(ctxt, name);
9461 ret = ctxt->node;
9462
Daniel Veillard4432df22003-09-28 18:58:27 +00009463#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009464 /*
9465 * [ VC: Root Element Type ]
9466 * The Name in the document type declaration must match the element
9467 * type of the root element.
9468 */
9469 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9470 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9471 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009472#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009473
9474 /*
9475 * Check for an Empty Element.
9476 */
9477 if ((RAW == '/') && (NXT(1) == '>')) {
9478 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009479 if (ctxt->sax2) {
9480 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9481 (!ctxt->disableSAX))
9482 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009483#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009484 } else {
9485 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9486 (!ctxt->disableSAX))
9487 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009488#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009489 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009490 namePop(ctxt);
9491 spacePop(ctxt);
9492 if (nsNr != ctxt->nsNr)
9493 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009494 if ( ret != NULL && ctxt->record_info ) {
9495 node_info.end_pos = ctxt->input->consumed +
9496 (CUR_PTR - ctxt->input->base);
9497 node_info.end_line = ctxt->input->line;
9498 node_info.node = ret;
9499 xmlParserAddNodeInfo(ctxt, &node_info);
9500 }
9501 return;
9502 }
9503 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009504 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009505 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009506 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9507 "Couldn't find end of Start Tag %s line %d\n",
9508 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009509
9510 /*
9511 * end of parsing of this node.
9512 */
9513 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009514 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009515 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009516 if (nsNr != ctxt->nsNr)
9517 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009518
9519 /*
9520 * Capture end position and add node
9521 */
9522 if ( ret != NULL && ctxt->record_info ) {
9523 node_info.end_pos = ctxt->input->consumed +
9524 (CUR_PTR - ctxt->input->base);
9525 node_info.end_line = ctxt->input->line;
9526 node_info.node = ret;
9527 xmlParserAddNodeInfo(ctxt, &node_info);
9528 }
9529 return;
9530 }
9531
9532 /*
9533 * Parse the content of the element:
9534 */
9535 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009536 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009537 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009538 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009539 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009540
9541 /*
9542 * end of parsing of this node.
9543 */
9544 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009545 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009546 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009547 if (nsNr != ctxt->nsNr)
9548 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009549 return;
9550 }
9551
9552 /*
9553 * parse the end of tag: '</' should be here.
9554 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009555 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009556 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009557 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009558 }
9559#ifdef LIBXML_SAX1_ENABLED
9560 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009561 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009562#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009563
9564 /*
9565 * Capture end position and add node
9566 */
9567 if ( ret != NULL && ctxt->record_info ) {
9568 node_info.end_pos = ctxt->input->consumed +
9569 (CUR_PTR - ctxt->input->base);
9570 node_info.end_line = ctxt->input->line;
9571 node_info.node = ret;
9572 xmlParserAddNodeInfo(ctxt, &node_info);
9573 }
9574}
9575
9576/**
9577 * xmlParseVersionNum:
9578 * @ctxt: an XML parser context
9579 *
9580 * parse the XML version value.
9581 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009582 * [26] VersionNum ::= '1.' [0-9]+
9583 *
9584 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009585 *
9586 * Returns the string giving the XML version number, or NULL
9587 */
9588xmlChar *
9589xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9590 xmlChar *buf = NULL;
9591 int len = 0;
9592 int size = 10;
9593 xmlChar cur;
9594
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009595 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009596 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009597 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009598 return(NULL);
9599 }
9600 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009601 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009602 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009603 return(NULL);
9604 }
9605 buf[len++] = cur;
9606 NEXT;
9607 cur=CUR;
9608 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009609 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009610 return(NULL);
9611 }
9612 buf[len++] = cur;
9613 NEXT;
9614 cur=CUR;
9615 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009616 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009617 xmlChar *tmp;
9618
Owen Taylor3473f882001-02-23 17:55:21 +00009619 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009620 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9621 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009622 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009623 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009624 return(NULL);
9625 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009626 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009627 }
9628 buf[len++] = cur;
9629 NEXT;
9630 cur=CUR;
9631 }
9632 buf[len] = 0;
9633 return(buf);
9634}
9635
9636/**
9637 * xmlParseVersionInfo:
9638 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009639 *
Owen Taylor3473f882001-02-23 17:55:21 +00009640 * parse the XML version.
9641 *
9642 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009643 *
Owen Taylor3473f882001-02-23 17:55:21 +00009644 * [25] Eq ::= S? '=' S?
9645 *
9646 * Returns the version string, e.g. "1.0"
9647 */
9648
9649xmlChar *
9650xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9651 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009652
Daniel Veillarda07050d2003-10-19 14:46:32 +00009653 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009654 SKIP(7);
9655 SKIP_BLANKS;
9656 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009657 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009658 return(NULL);
9659 }
9660 NEXT;
9661 SKIP_BLANKS;
9662 if (RAW == '"') {
9663 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009664 version = xmlParseVersionNum(ctxt);
9665 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009666 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009667 } else
9668 NEXT;
9669 } else if (RAW == '\''){
9670 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009671 version = xmlParseVersionNum(ctxt);
9672 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009673 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009674 } else
9675 NEXT;
9676 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009677 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009678 }
9679 }
9680 return(version);
9681}
9682
9683/**
9684 * xmlParseEncName:
9685 * @ctxt: an XML parser context
9686 *
9687 * parse the XML encoding name
9688 *
9689 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9690 *
9691 * Returns the encoding name value or NULL
9692 */
9693xmlChar *
9694xmlParseEncName(xmlParserCtxtPtr ctxt) {
9695 xmlChar *buf = NULL;
9696 int len = 0;
9697 int size = 10;
9698 xmlChar cur;
9699
9700 cur = CUR;
9701 if (((cur >= 'a') && (cur <= 'z')) ||
9702 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009703 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009704 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009705 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009706 return(NULL);
9707 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009708
Owen Taylor3473f882001-02-23 17:55:21 +00009709 buf[len++] = cur;
9710 NEXT;
9711 cur = CUR;
9712 while (((cur >= 'a') && (cur <= 'z')) ||
9713 ((cur >= 'A') && (cur <= 'Z')) ||
9714 ((cur >= '0') && (cur <= '9')) ||
9715 (cur == '.') || (cur == '_') ||
9716 (cur == '-')) {
9717 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009718 xmlChar *tmp;
9719
Owen Taylor3473f882001-02-23 17:55:21 +00009720 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009721 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9722 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009723 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009724 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009725 return(NULL);
9726 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009727 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009728 }
9729 buf[len++] = cur;
9730 NEXT;
9731 cur = CUR;
9732 if (cur == 0) {
9733 SHRINK;
9734 GROW;
9735 cur = CUR;
9736 }
9737 }
9738 buf[len] = 0;
9739 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009740 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009741 }
9742 return(buf);
9743}
9744
9745/**
9746 * xmlParseEncodingDecl:
9747 * @ctxt: an XML parser context
9748 *
9749 * parse the XML encoding declaration
9750 *
9751 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9752 *
9753 * this setups the conversion filters.
9754 *
9755 * Returns the encoding value or NULL
9756 */
9757
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009758const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009759xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9760 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009761
9762 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009763 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009764 SKIP(8);
9765 SKIP_BLANKS;
9766 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009767 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009768 return(NULL);
9769 }
9770 NEXT;
9771 SKIP_BLANKS;
9772 if (RAW == '"') {
9773 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009774 encoding = xmlParseEncName(ctxt);
9775 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009776 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009777 } else
9778 NEXT;
9779 } else if (RAW == '\''){
9780 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009781 encoding = xmlParseEncName(ctxt);
9782 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009783 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009784 } else
9785 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009786 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009787 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009788 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009789 /*
9790 * UTF-16 encoding stwich has already taken place at this stage,
9791 * more over the little-endian/big-endian selection is already done
9792 */
9793 if ((encoding != NULL) &&
9794 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9795 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009796 /*
9797 * If no encoding was passed to the parser, that we are
9798 * using UTF-16 and no decoder is present i.e. the
9799 * document is apparently UTF-8 compatible, then raise an
9800 * encoding mismatch fatal error
9801 */
9802 if ((ctxt->encoding == NULL) &&
9803 (ctxt->input->buf != NULL) &&
9804 (ctxt->input->buf->encoder == NULL)) {
9805 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9806 "Document labelled UTF-16 but has UTF-8 content\n");
9807 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009808 if (ctxt->encoding != NULL)
9809 xmlFree((xmlChar *) ctxt->encoding);
9810 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009811 }
9812 /*
9813 * UTF-8 encoding is handled natively
9814 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009815 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009816 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9817 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009818 if (ctxt->encoding != NULL)
9819 xmlFree((xmlChar *) ctxt->encoding);
9820 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009821 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009822 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009823 xmlCharEncodingHandlerPtr handler;
9824
9825 if (ctxt->input->encoding != NULL)
9826 xmlFree((xmlChar *) ctxt->input->encoding);
9827 ctxt->input->encoding = encoding;
9828
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009829 handler = xmlFindCharEncodingHandler((const char *) encoding);
9830 if (handler != NULL) {
9831 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009832 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009833 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009834 "Unsupported encoding %s\n", encoding);
9835 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009836 }
9837 }
9838 }
9839 return(encoding);
9840}
9841
9842/**
9843 * xmlParseSDDecl:
9844 * @ctxt: an XML parser context
9845 *
9846 * parse the XML standalone declaration
9847 *
9848 * [32] SDDecl ::= S 'standalone' Eq
9849 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9850 *
9851 * [ VC: Standalone Document Declaration ]
9852 * TODO The standalone document declaration must have the value "no"
9853 * if any external markup declarations contain declarations of:
9854 * - attributes with default values, if elements to which these
9855 * attributes apply appear in the document without specifications
9856 * of values for these attributes, or
9857 * - entities (other than amp, lt, gt, apos, quot), if references
9858 * to those entities appear in the document, or
9859 * - attributes with values subject to normalization, where the
9860 * attribute appears in the document with a value which will change
9861 * as a result of normalization, or
9862 * - element types with element content, if white space occurs directly
9863 * within any instance of those types.
9864 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009865 * Returns:
9866 * 1 if standalone="yes"
9867 * 0 if standalone="no"
9868 * -2 if standalone attribute is missing or invalid
9869 * (A standalone value of -2 means that the XML declaration was found,
9870 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009871 */
9872
9873int
9874xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009875 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009876
9877 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009878 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009879 SKIP(10);
9880 SKIP_BLANKS;
9881 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009882 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009883 return(standalone);
9884 }
9885 NEXT;
9886 SKIP_BLANKS;
9887 if (RAW == '\''){
9888 NEXT;
9889 if ((RAW == 'n') && (NXT(1) == 'o')) {
9890 standalone = 0;
9891 SKIP(2);
9892 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9893 (NXT(2) == 's')) {
9894 standalone = 1;
9895 SKIP(3);
9896 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009897 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009898 }
9899 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009900 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009901 } else
9902 NEXT;
9903 } else if (RAW == '"'){
9904 NEXT;
9905 if ((RAW == 'n') && (NXT(1) == 'o')) {
9906 standalone = 0;
9907 SKIP(2);
9908 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9909 (NXT(2) == 's')) {
9910 standalone = 1;
9911 SKIP(3);
9912 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009913 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009914 }
9915 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009916 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009917 } else
9918 NEXT;
9919 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009920 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009921 }
9922 }
9923 return(standalone);
9924}
9925
9926/**
9927 * xmlParseXMLDecl:
9928 * @ctxt: an XML parser context
9929 *
9930 * parse an XML declaration header
9931 *
9932 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9933 */
9934
9935void
9936xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9937 xmlChar *version;
9938
9939 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009940 * This value for standalone indicates that the document has an
9941 * XML declaration but it does not have a standalone attribute.
9942 * It will be overwritten later if a standalone attribute is found.
9943 */
9944 ctxt->input->standalone = -2;
9945
9946 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009947 * We know that '<?xml' is here.
9948 */
9949 SKIP(5);
9950
William M. Brack76e95df2003-10-18 16:20:14 +00009951 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009952 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9953 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009954 }
9955 SKIP_BLANKS;
9956
9957 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009958 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009959 */
9960 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009961 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009962 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009963 } else {
9964 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9965 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009966 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009967 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009968 if (ctxt->options & XML_PARSE_OLD10) {
9969 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9970 "Unsupported version '%s'\n",
9971 version);
9972 } else {
9973 if ((version[0] == '1') && ((version[1] == '.'))) {
9974 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9975 "Unsupported version '%s'\n",
9976 version, NULL);
9977 } else {
9978 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9979 "Unsupported version '%s'\n",
9980 version);
9981 }
9982 }
Daniel Veillard19840942001-11-29 16:11:38 +00009983 }
9984 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009985 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009986 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009987 }
Owen Taylor3473f882001-02-23 17:55:21 +00009988
9989 /*
9990 * We may have the encoding declaration
9991 */
William M. Brack76e95df2003-10-18 16:20:14 +00009992 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009993 if ((RAW == '?') && (NXT(1) == '>')) {
9994 SKIP(2);
9995 return;
9996 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009997 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009998 }
9999 xmlParseEncodingDecl(ctxt);
10000 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10001 /*
10002 * The XML REC instructs us to stop parsing right here
10003 */
10004 return;
10005 }
10006
10007 /*
10008 * We may have the standalone status.
10009 */
William M. Brack76e95df2003-10-18 16:20:14 +000010010 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010011 if ((RAW == '?') && (NXT(1) == '>')) {
10012 SKIP(2);
10013 return;
10014 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010015 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010016 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010017
10018 /*
10019 * We can grow the input buffer freely at that point
10020 */
10021 GROW;
10022
Owen Taylor3473f882001-02-23 17:55:21 +000010023 SKIP_BLANKS;
10024 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10025
10026 SKIP_BLANKS;
10027 if ((RAW == '?') && (NXT(1) == '>')) {
10028 SKIP(2);
10029 } else if (RAW == '>') {
10030 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010031 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010032 NEXT;
10033 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010034 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010035 MOVETO_ENDTAG(CUR_PTR);
10036 NEXT;
10037 }
10038}
10039
10040/**
10041 * xmlParseMisc:
10042 * @ctxt: an XML parser context
10043 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010044 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010045 *
10046 * [27] Misc ::= Comment | PI | S
10047 */
10048
10049void
10050xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010051 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010052 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010053 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010054 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010055 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010056 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010057 NEXT;
10058 } else
10059 xmlParseComment(ctxt);
10060 }
10061}
10062
10063/**
10064 * xmlParseDocument:
10065 * @ctxt: an XML parser context
10066 *
10067 * parse an XML document (and build a tree if using the standard SAX
10068 * interface).
10069 *
10070 * [1] document ::= prolog element Misc*
10071 *
10072 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10073 *
10074 * Returns 0, -1 in case of error. the parser context is augmented
10075 * as a result of the parsing.
10076 */
10077
10078int
10079xmlParseDocument(xmlParserCtxtPtr ctxt) {
10080 xmlChar start[4];
10081 xmlCharEncoding enc;
10082
10083 xmlInitParser();
10084
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010085 if ((ctxt == NULL) || (ctxt->input == NULL))
10086 return(-1);
10087
Owen Taylor3473f882001-02-23 17:55:21 +000010088 GROW;
10089
10090 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010091 * SAX: detecting the level.
10092 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010093 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010094
10095 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010096 * SAX: beginning of the document processing.
10097 */
10098 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10099 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10100
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010101 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10102 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010103 /*
10104 * Get the 4 first bytes and decode the charset
10105 * if enc != XML_CHAR_ENCODING_NONE
10106 * plug some encoding conversion routines.
10107 */
10108 start[0] = RAW;
10109 start[1] = NXT(1);
10110 start[2] = NXT(2);
10111 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010112 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010113 if (enc != XML_CHAR_ENCODING_NONE) {
10114 xmlSwitchEncoding(ctxt, enc);
10115 }
Owen Taylor3473f882001-02-23 17:55:21 +000010116 }
10117
10118
10119 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010120 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010121 }
10122
10123 /*
10124 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010125 * do not GROW here to avoid the detected encoder to decode more
10126 * than just the first line
Owen Taylor3473f882001-02-23 17:55:21 +000010127 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010128 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010129
10130 /*
10131 * Note that we will switch encoding on the fly.
10132 */
10133 xmlParseXMLDecl(ctxt);
10134 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10135 /*
10136 * The XML REC instructs us to stop parsing right here
10137 */
10138 return(-1);
10139 }
10140 ctxt->standalone = ctxt->input->standalone;
10141 SKIP_BLANKS;
10142 } else {
10143 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10144 }
10145 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10146 ctxt->sax->startDocument(ctxt->userData);
10147
10148 /*
10149 * The Misc part of the Prolog
10150 */
10151 GROW;
10152 xmlParseMisc(ctxt);
10153
10154 /*
10155 * Then possibly doc type declaration(s) and more Misc
10156 * (doctypedecl Misc*)?
10157 */
10158 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010159 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010160
10161 ctxt->inSubset = 1;
10162 xmlParseDocTypeDecl(ctxt);
10163 if (RAW == '[') {
10164 ctxt->instate = XML_PARSER_DTD;
10165 xmlParseInternalSubset(ctxt);
10166 }
10167
10168 /*
10169 * Create and update the external subset.
10170 */
10171 ctxt->inSubset = 2;
10172 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10173 (!ctxt->disableSAX))
10174 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10175 ctxt->extSubSystem, ctxt->extSubURI);
10176 ctxt->inSubset = 0;
10177
Daniel Veillardac4118d2008-01-11 05:27:32 +000010178 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010179
10180 ctxt->instate = XML_PARSER_PROLOG;
10181 xmlParseMisc(ctxt);
10182 }
10183
10184 /*
10185 * Time to start parsing the tree itself
10186 */
10187 GROW;
10188 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010189 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10190 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010191 } else {
10192 ctxt->instate = XML_PARSER_CONTENT;
10193 xmlParseElement(ctxt);
10194 ctxt->instate = XML_PARSER_EPILOG;
10195
10196
10197 /*
10198 * The Misc part at the end
10199 */
10200 xmlParseMisc(ctxt);
10201
Daniel Veillard561b7f82002-03-20 21:55:57 +000010202 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010203 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010204 }
10205 ctxt->instate = XML_PARSER_EOF;
10206 }
10207
10208 /*
10209 * SAX: end of the document processing.
10210 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010211 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010212 ctxt->sax->endDocument(ctxt->userData);
10213
Daniel Veillard5997aca2002-03-18 18:36:20 +000010214 /*
10215 * Remove locally kept entity definitions if the tree was not built
10216 */
10217 if ((ctxt->myDoc != NULL) &&
10218 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10219 xmlFreeDoc(ctxt->myDoc);
10220 ctxt->myDoc = NULL;
10221 }
10222
Daniel Veillardae0765b2008-07-31 19:54:59 +000010223 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10224 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10225 if (ctxt->valid)
10226 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10227 if (ctxt->nsWellFormed)
10228 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10229 if (ctxt->options & XML_PARSE_OLD10)
10230 ctxt->myDoc->properties |= XML_DOC_OLD10;
10231 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010232 if (! ctxt->wellFormed) {
10233 ctxt->valid = 0;
10234 return(-1);
10235 }
Owen Taylor3473f882001-02-23 17:55:21 +000010236 return(0);
10237}
10238
10239/**
10240 * xmlParseExtParsedEnt:
10241 * @ctxt: an XML parser context
10242 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010243 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010244 * An external general parsed entity is well-formed if it matches the
10245 * production labeled extParsedEnt.
10246 *
10247 * [78] extParsedEnt ::= TextDecl? content
10248 *
10249 * Returns 0, -1 in case of error. the parser context is augmented
10250 * as a result of the parsing.
10251 */
10252
10253int
10254xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10255 xmlChar start[4];
10256 xmlCharEncoding enc;
10257
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010258 if ((ctxt == NULL) || (ctxt->input == NULL))
10259 return(-1);
10260
Owen Taylor3473f882001-02-23 17:55:21 +000010261 xmlDefaultSAXHandlerInit();
10262
Daniel Veillard309f81d2003-09-23 09:02:53 +000010263 xmlDetectSAX2(ctxt);
10264
Owen Taylor3473f882001-02-23 17:55:21 +000010265 GROW;
10266
10267 /*
10268 * SAX: beginning of the document processing.
10269 */
10270 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10271 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10272
10273 /*
10274 * Get the 4 first bytes and decode the charset
10275 * if enc != XML_CHAR_ENCODING_NONE
10276 * plug some encoding conversion routines.
10277 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010278 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10279 start[0] = RAW;
10280 start[1] = NXT(1);
10281 start[2] = NXT(2);
10282 start[3] = NXT(3);
10283 enc = xmlDetectCharEncoding(start, 4);
10284 if (enc != XML_CHAR_ENCODING_NONE) {
10285 xmlSwitchEncoding(ctxt, enc);
10286 }
Owen Taylor3473f882001-02-23 17:55:21 +000010287 }
10288
10289
10290 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010291 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010292 }
10293
10294 /*
10295 * Check for the XMLDecl in the Prolog.
10296 */
10297 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010298 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010299
10300 /*
10301 * Note that we will switch encoding on the fly.
10302 */
10303 xmlParseXMLDecl(ctxt);
10304 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10305 /*
10306 * The XML REC instructs us to stop parsing right here
10307 */
10308 return(-1);
10309 }
10310 SKIP_BLANKS;
10311 } else {
10312 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10313 }
10314 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10315 ctxt->sax->startDocument(ctxt->userData);
10316
10317 /*
10318 * Doing validity checking on chunk doesn't make sense
10319 */
10320 ctxt->instate = XML_PARSER_CONTENT;
10321 ctxt->validate = 0;
10322 ctxt->loadsubset = 0;
10323 ctxt->depth = 0;
10324
10325 xmlParseContent(ctxt);
10326
10327 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010328 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010329 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010330 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010331 }
10332
10333 /*
10334 * SAX: end of the document processing.
10335 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010336 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010337 ctxt->sax->endDocument(ctxt->userData);
10338
10339 if (! ctxt->wellFormed) return(-1);
10340 return(0);
10341}
10342
Daniel Veillard73b013f2003-09-30 12:36:01 +000010343#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010344/************************************************************************
10345 * *
10346 * Progressive parsing interfaces *
10347 * *
10348 ************************************************************************/
10349
10350/**
10351 * xmlParseLookupSequence:
10352 * @ctxt: an XML parser context
10353 * @first: the first char to lookup
10354 * @next: the next char to lookup or zero
10355 * @third: the next char to lookup or zero
10356 *
10357 * Try to find if a sequence (first, next, third) or just (first next) or
10358 * (first) is available in the input stream.
10359 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10360 * to avoid rescanning sequences of bytes, it DOES change the state of the
10361 * parser, do not use liberally.
10362 *
10363 * Returns the index to the current parsing point if the full sequence
10364 * is available, -1 otherwise.
10365 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010366static int
Owen Taylor3473f882001-02-23 17:55:21 +000010367xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10368 xmlChar next, xmlChar third) {
10369 int base, len;
10370 xmlParserInputPtr in;
10371 const xmlChar *buf;
10372
10373 in = ctxt->input;
10374 if (in == NULL) return(-1);
10375 base = in->cur - in->base;
10376 if (base < 0) return(-1);
10377 if (ctxt->checkIndex > base)
10378 base = ctxt->checkIndex;
10379 if (in->buf == NULL) {
10380 buf = in->base;
10381 len = in->length;
10382 } else {
10383 buf = in->buf->buffer->content;
10384 len = in->buf->buffer->use;
10385 }
10386 /* take into account the sequence length */
10387 if (third) len -= 2;
10388 else if (next) len --;
10389 for (;base < len;base++) {
10390 if (buf[base] == first) {
10391 if (third != 0) {
10392 if ((buf[base + 1] != next) ||
10393 (buf[base + 2] != third)) continue;
10394 } else if (next != 0) {
10395 if (buf[base + 1] != next) continue;
10396 }
10397 ctxt->checkIndex = 0;
10398#ifdef DEBUG_PUSH
10399 if (next == 0)
10400 xmlGenericError(xmlGenericErrorContext,
10401 "PP: lookup '%c' found at %d\n",
10402 first, base);
10403 else if (third == 0)
10404 xmlGenericError(xmlGenericErrorContext,
10405 "PP: lookup '%c%c' found at %d\n",
10406 first, next, base);
10407 else
10408 xmlGenericError(xmlGenericErrorContext,
10409 "PP: lookup '%c%c%c' found at %d\n",
10410 first, next, third, base);
10411#endif
10412 return(base - (in->cur - in->base));
10413 }
10414 }
10415 ctxt->checkIndex = base;
10416#ifdef DEBUG_PUSH
10417 if (next == 0)
10418 xmlGenericError(xmlGenericErrorContext,
10419 "PP: lookup '%c' failed\n", first);
10420 else if (third == 0)
10421 xmlGenericError(xmlGenericErrorContext,
10422 "PP: lookup '%c%c' failed\n", first, next);
10423 else
10424 xmlGenericError(xmlGenericErrorContext,
10425 "PP: lookup '%c%c%c' failed\n", first, next, third);
10426#endif
10427 return(-1);
10428}
10429
10430/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010431 * xmlParseGetLasts:
10432 * @ctxt: an XML parser context
10433 * @lastlt: pointer to store the last '<' from the input
10434 * @lastgt: pointer to store the last '>' from the input
10435 *
10436 * Lookup the last < and > in the current chunk
10437 */
10438static void
10439xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10440 const xmlChar **lastgt) {
10441 const xmlChar *tmp;
10442
10443 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10444 xmlGenericError(xmlGenericErrorContext,
10445 "Internal error: xmlParseGetLasts\n");
10446 return;
10447 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010448 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010449 tmp = ctxt->input->end;
10450 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010451 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010452 if (tmp < ctxt->input->base) {
10453 *lastlt = NULL;
10454 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010455 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010456 *lastlt = tmp;
10457 tmp++;
10458 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10459 if (*tmp == '\'') {
10460 tmp++;
10461 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10462 if (tmp < ctxt->input->end) tmp++;
10463 } else if (*tmp == '"') {
10464 tmp++;
10465 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10466 if (tmp < ctxt->input->end) tmp++;
10467 } else
10468 tmp++;
10469 }
10470 if (tmp < ctxt->input->end)
10471 *lastgt = tmp;
10472 else {
10473 tmp = *lastlt;
10474 tmp--;
10475 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10476 if (tmp >= ctxt->input->base)
10477 *lastgt = tmp;
10478 else
10479 *lastgt = NULL;
10480 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010481 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010482 } else {
10483 *lastlt = NULL;
10484 *lastgt = NULL;
10485 }
10486}
10487/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010488 * xmlCheckCdataPush:
10489 * @cur: pointer to the bock of characters
10490 * @len: length of the block in bytes
10491 *
10492 * Check that the block of characters is okay as SCdata content [20]
10493 *
10494 * Returns the number of bytes to pass if okay, a negative index where an
10495 * UTF-8 error occured otherwise
10496 */
10497static int
10498xmlCheckCdataPush(const xmlChar *utf, int len) {
10499 int ix;
10500 unsigned char c;
10501 int codepoint;
10502
10503 if ((utf == NULL) || (len <= 0))
10504 return(0);
10505
10506 for (ix = 0; ix < len;) { /* string is 0-terminated */
10507 c = utf[ix];
10508 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10509 if (c >= 0x20)
10510 ix++;
10511 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10512 ix++;
10513 else
10514 return(-ix);
10515 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10516 if (ix + 2 > len) return(ix);
10517 if ((utf[ix+1] & 0xc0 ) != 0x80)
10518 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010519 codepoint = (utf[ix] & 0x1f) << 6;
10520 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010521 if (!xmlIsCharQ(codepoint))
10522 return(-ix);
10523 ix += 2;
10524 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10525 if (ix + 3 > len) return(ix);
10526 if (((utf[ix+1] & 0xc0) != 0x80) ||
10527 ((utf[ix+2] & 0xc0) != 0x80))
10528 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010529 codepoint = (utf[ix] & 0xf) << 12;
10530 codepoint |= (utf[ix+1] & 0x3f) << 6;
10531 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010532 if (!xmlIsCharQ(codepoint))
10533 return(-ix);
10534 ix += 3;
10535 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10536 if (ix + 4 > len) return(ix);
10537 if (((utf[ix+1] & 0xc0) != 0x80) ||
10538 ((utf[ix+2] & 0xc0) != 0x80) ||
10539 ((utf[ix+3] & 0xc0) != 0x80))
10540 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010541 codepoint = (utf[ix] & 0x7) << 18;
10542 codepoint |= (utf[ix+1] & 0x3f) << 12;
10543 codepoint |= (utf[ix+2] & 0x3f) << 6;
10544 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010545 if (!xmlIsCharQ(codepoint))
10546 return(-ix);
10547 ix += 4;
10548 } else /* unknown encoding */
10549 return(-ix);
10550 }
10551 return(ix);
10552}
10553
10554/**
Owen Taylor3473f882001-02-23 17:55:21 +000010555 * xmlParseTryOrFinish:
10556 * @ctxt: an XML parser context
10557 * @terminate: last chunk indicator
10558 *
10559 * Try to progress on parsing
10560 *
10561 * Returns zero if no parsing was possible
10562 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010563static int
Owen Taylor3473f882001-02-23 17:55:21 +000010564xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10565 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010566 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010567 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010568 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010569
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010570 if (ctxt->input == NULL)
10571 return(0);
10572
Owen Taylor3473f882001-02-23 17:55:21 +000010573#ifdef DEBUG_PUSH
10574 switch (ctxt->instate) {
10575 case XML_PARSER_EOF:
10576 xmlGenericError(xmlGenericErrorContext,
10577 "PP: try EOF\n"); break;
10578 case XML_PARSER_START:
10579 xmlGenericError(xmlGenericErrorContext,
10580 "PP: try START\n"); break;
10581 case XML_PARSER_MISC:
10582 xmlGenericError(xmlGenericErrorContext,
10583 "PP: try MISC\n");break;
10584 case XML_PARSER_COMMENT:
10585 xmlGenericError(xmlGenericErrorContext,
10586 "PP: try COMMENT\n");break;
10587 case XML_PARSER_PROLOG:
10588 xmlGenericError(xmlGenericErrorContext,
10589 "PP: try PROLOG\n");break;
10590 case XML_PARSER_START_TAG:
10591 xmlGenericError(xmlGenericErrorContext,
10592 "PP: try START_TAG\n");break;
10593 case XML_PARSER_CONTENT:
10594 xmlGenericError(xmlGenericErrorContext,
10595 "PP: try CONTENT\n");break;
10596 case XML_PARSER_CDATA_SECTION:
10597 xmlGenericError(xmlGenericErrorContext,
10598 "PP: try CDATA_SECTION\n");break;
10599 case XML_PARSER_END_TAG:
10600 xmlGenericError(xmlGenericErrorContext,
10601 "PP: try END_TAG\n");break;
10602 case XML_PARSER_ENTITY_DECL:
10603 xmlGenericError(xmlGenericErrorContext,
10604 "PP: try ENTITY_DECL\n");break;
10605 case XML_PARSER_ENTITY_VALUE:
10606 xmlGenericError(xmlGenericErrorContext,
10607 "PP: try ENTITY_VALUE\n");break;
10608 case XML_PARSER_ATTRIBUTE_VALUE:
10609 xmlGenericError(xmlGenericErrorContext,
10610 "PP: try ATTRIBUTE_VALUE\n");break;
10611 case XML_PARSER_DTD:
10612 xmlGenericError(xmlGenericErrorContext,
10613 "PP: try DTD\n");break;
10614 case XML_PARSER_EPILOG:
10615 xmlGenericError(xmlGenericErrorContext,
10616 "PP: try EPILOG\n");break;
10617 case XML_PARSER_PI:
10618 xmlGenericError(xmlGenericErrorContext,
10619 "PP: try PI\n");break;
10620 case XML_PARSER_IGNORE:
10621 xmlGenericError(xmlGenericErrorContext,
10622 "PP: try IGNORE\n");break;
10623 }
10624#endif
10625
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010626 if ((ctxt->input != NULL) &&
10627 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010628 xmlSHRINK(ctxt);
10629 ctxt->checkIndex = 0;
10630 }
10631 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010632
Daniel Veillarda880b122003-04-21 21:36:41 +000010633 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010634 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010635 return(0);
10636
10637
Owen Taylor3473f882001-02-23 17:55:21 +000010638 /*
10639 * Pop-up of finished entities.
10640 */
10641 while ((RAW == 0) && (ctxt->inputNr > 1))
10642 xmlPopInput(ctxt);
10643
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010644 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010645 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010646 avail = ctxt->input->length -
10647 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010648 else {
10649 /*
10650 * If we are operating on converted input, try to flush
10651 * remainng chars to avoid them stalling in the non-converted
10652 * buffer.
10653 */
10654 if ((ctxt->input->buf->raw != NULL) &&
10655 (ctxt->input->buf->raw->use > 0)) {
10656 int base = ctxt->input->base -
10657 ctxt->input->buf->buffer->content;
10658 int current = ctxt->input->cur - ctxt->input->base;
10659
10660 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10661 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10662 ctxt->input->cur = ctxt->input->base + current;
10663 ctxt->input->end =
10664 &ctxt->input->buf->buffer->content[
10665 ctxt->input->buf->buffer->use];
10666 }
10667 avail = ctxt->input->buf->buffer->use -
10668 (ctxt->input->cur - ctxt->input->base);
10669 }
Owen Taylor3473f882001-02-23 17:55:21 +000010670 if (avail < 1)
10671 goto done;
10672 switch (ctxt->instate) {
10673 case XML_PARSER_EOF:
10674 /*
10675 * Document parsing is done !
10676 */
10677 goto done;
10678 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010679 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10680 xmlChar start[4];
10681 xmlCharEncoding enc;
10682
10683 /*
10684 * Very first chars read from the document flow.
10685 */
10686 if (avail < 4)
10687 goto done;
10688
10689 /*
10690 * Get the 4 first bytes and decode the charset
10691 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010692 * plug some encoding conversion routines,
10693 * else xmlSwitchEncoding will set to (default)
10694 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010695 */
10696 start[0] = RAW;
10697 start[1] = NXT(1);
10698 start[2] = NXT(2);
10699 start[3] = NXT(3);
10700 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010701 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010702 break;
10703 }
Owen Taylor3473f882001-02-23 17:55:21 +000010704
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010705 if (avail < 2)
10706 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010707 cur = ctxt->input->cur[0];
10708 next = ctxt->input->cur[1];
10709 if (cur == 0) {
10710 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10711 ctxt->sax->setDocumentLocator(ctxt->userData,
10712 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010713 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010714 ctxt->instate = XML_PARSER_EOF;
10715#ifdef DEBUG_PUSH
10716 xmlGenericError(xmlGenericErrorContext,
10717 "PP: entering EOF\n");
10718#endif
10719 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10720 ctxt->sax->endDocument(ctxt->userData);
10721 goto done;
10722 }
10723 if ((cur == '<') && (next == '?')) {
10724 /* PI or XML decl */
10725 if (avail < 5) return(ret);
10726 if ((!terminate) &&
10727 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10728 return(ret);
10729 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10730 ctxt->sax->setDocumentLocator(ctxt->userData,
10731 &xmlDefaultSAXLocator);
10732 if ((ctxt->input->cur[2] == 'x') &&
10733 (ctxt->input->cur[3] == 'm') &&
10734 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010735 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010736 ret += 5;
10737#ifdef DEBUG_PUSH
10738 xmlGenericError(xmlGenericErrorContext,
10739 "PP: Parsing XML Decl\n");
10740#endif
10741 xmlParseXMLDecl(ctxt);
10742 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10743 /*
10744 * The XML REC instructs us to stop parsing right
10745 * here
10746 */
10747 ctxt->instate = XML_PARSER_EOF;
10748 return(0);
10749 }
10750 ctxt->standalone = ctxt->input->standalone;
10751 if ((ctxt->encoding == NULL) &&
10752 (ctxt->input->encoding != NULL))
10753 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10754 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10755 (!ctxt->disableSAX))
10756 ctxt->sax->startDocument(ctxt->userData);
10757 ctxt->instate = XML_PARSER_MISC;
10758#ifdef DEBUG_PUSH
10759 xmlGenericError(xmlGenericErrorContext,
10760 "PP: entering MISC\n");
10761#endif
10762 } else {
10763 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10764 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10765 (!ctxt->disableSAX))
10766 ctxt->sax->startDocument(ctxt->userData);
10767 ctxt->instate = XML_PARSER_MISC;
10768#ifdef DEBUG_PUSH
10769 xmlGenericError(xmlGenericErrorContext,
10770 "PP: entering MISC\n");
10771#endif
10772 }
10773 } else {
10774 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10775 ctxt->sax->setDocumentLocator(ctxt->userData,
10776 &xmlDefaultSAXLocator);
10777 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010778 if (ctxt->version == NULL) {
10779 xmlErrMemory(ctxt, NULL);
10780 break;
10781 }
Owen Taylor3473f882001-02-23 17:55:21 +000010782 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10783 (!ctxt->disableSAX))
10784 ctxt->sax->startDocument(ctxt->userData);
10785 ctxt->instate = XML_PARSER_MISC;
10786#ifdef DEBUG_PUSH
10787 xmlGenericError(xmlGenericErrorContext,
10788 "PP: entering MISC\n");
10789#endif
10790 }
10791 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010792 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010793 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010794 const xmlChar *prefix = NULL;
10795 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010796 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010797
10798 if ((avail < 2) && (ctxt->inputNr == 1))
10799 goto done;
10800 cur = ctxt->input->cur[0];
10801 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010802 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010803 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010804 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10805 ctxt->sax->endDocument(ctxt->userData);
10806 goto done;
10807 }
10808 if (!terminate) {
10809 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010810 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010811 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010812 goto done;
10813 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10814 goto done;
10815 }
10816 }
10817 if (ctxt->spaceNr == 0)
10818 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010819 else if (*ctxt->space == -2)
10820 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010821 else
10822 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010823#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010824 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010825#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010826 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010827#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010828 else
10829 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010830#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010831 if (name == NULL) {
10832 spacePop(ctxt);
10833 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010834 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10835 ctxt->sax->endDocument(ctxt->userData);
10836 goto done;
10837 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010838#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010839 /*
10840 * [ VC: Root Element Type ]
10841 * The Name in the document type declaration must match
10842 * the element type of the root element.
10843 */
10844 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10845 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10846 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010847#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010848
10849 /*
10850 * Check for an Empty Element.
10851 */
10852 if ((RAW == '/') && (NXT(1) == '>')) {
10853 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010854
10855 if (ctxt->sax2) {
10856 if ((ctxt->sax != NULL) &&
10857 (ctxt->sax->endElementNs != NULL) &&
10858 (!ctxt->disableSAX))
10859 ctxt->sax->endElementNs(ctxt->userData, name,
10860 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010861 if (ctxt->nsNr - nsNr > 0)
10862 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010863#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010864 } else {
10865 if ((ctxt->sax != NULL) &&
10866 (ctxt->sax->endElement != NULL) &&
10867 (!ctxt->disableSAX))
10868 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010869#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010870 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010871 spacePop(ctxt);
10872 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010873 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010874 } else {
10875 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010876 }
10877 break;
10878 }
10879 if (RAW == '>') {
10880 NEXT;
10881 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010882 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010883 "Couldn't find end of Start Tag %s\n",
10884 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010885 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010886 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010887 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010888 if (ctxt->sax2)
10889 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010890#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010891 else
10892 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010893#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010894
Daniel Veillarda880b122003-04-21 21:36:41 +000010895 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010896 break;
10897 }
10898 case XML_PARSER_CONTENT: {
10899 const xmlChar *test;
10900 unsigned int cons;
10901 if ((avail < 2) && (ctxt->inputNr == 1))
10902 goto done;
10903 cur = ctxt->input->cur[0];
10904 next = ctxt->input->cur[1];
10905
10906 test = CUR_PTR;
10907 cons = ctxt->input->consumed;
10908 if ((cur == '<') && (next == '/')) {
10909 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010910 break;
10911 } else if ((cur == '<') && (next == '?')) {
10912 if ((!terminate) &&
10913 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10914 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010915 xmlParsePI(ctxt);
10916 } else if ((cur == '<') && (next != '!')) {
10917 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010918 break;
10919 } else if ((cur == '<') && (next == '!') &&
10920 (ctxt->input->cur[2] == '-') &&
10921 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010922 int term;
10923
10924 if (avail < 4)
10925 goto done;
10926 ctxt->input->cur += 4;
10927 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10928 ctxt->input->cur -= 4;
10929 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010930 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010931 xmlParseComment(ctxt);
10932 ctxt->instate = XML_PARSER_CONTENT;
10933 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10934 (ctxt->input->cur[2] == '[') &&
10935 (ctxt->input->cur[3] == 'C') &&
10936 (ctxt->input->cur[4] == 'D') &&
10937 (ctxt->input->cur[5] == 'A') &&
10938 (ctxt->input->cur[6] == 'T') &&
10939 (ctxt->input->cur[7] == 'A') &&
10940 (ctxt->input->cur[8] == '[')) {
10941 SKIP(9);
10942 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010943 break;
10944 } else if ((cur == '<') && (next == '!') &&
10945 (avail < 9)) {
10946 goto done;
10947 } else if (cur == '&') {
10948 if ((!terminate) &&
10949 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10950 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010951 xmlParseReference(ctxt);
10952 } else {
10953 /* TODO Avoid the extra copy, handle directly !!! */
10954 /*
10955 * Goal of the following test is:
10956 * - minimize calls to the SAX 'character' callback
10957 * when they are mergeable
10958 * - handle an problem for isBlank when we only parse
10959 * a sequence of blank chars and the next one is
10960 * not available to check against '<' presence.
10961 * - tries to homogenize the differences in SAX
10962 * callbacks between the push and pull versions
10963 * of the parser.
10964 */
10965 if ((ctxt->inputNr == 1) &&
10966 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10967 if (!terminate) {
10968 if (ctxt->progressive) {
10969 if ((lastlt == NULL) ||
10970 (ctxt->input->cur > lastlt))
10971 goto done;
10972 } else if (xmlParseLookupSequence(ctxt,
10973 '<', 0, 0) < 0) {
10974 goto done;
10975 }
10976 }
10977 }
10978 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010979 xmlParseCharData(ctxt, 0);
10980 }
10981 /*
10982 * Pop-up of finished entities.
10983 */
10984 while ((RAW == 0) && (ctxt->inputNr > 1))
10985 xmlPopInput(ctxt);
10986 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010987 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10988 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010989 ctxt->instate = XML_PARSER_EOF;
10990 break;
10991 }
10992 break;
10993 }
10994 case XML_PARSER_END_TAG:
10995 if (avail < 2)
10996 goto done;
10997 if (!terminate) {
10998 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010999 /* > can be found unescaped in attribute values */
11000 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011001 goto done;
11002 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11003 goto done;
11004 }
11005 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011006 if (ctxt->sax2) {
11007 xmlParseEndTag2(ctxt,
11008 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11009 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011010 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011011 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011012 }
11013#ifdef LIBXML_SAX1_ENABLED
11014 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011015 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011016#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011017 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011018 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011019 } else {
11020 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011021 }
11022 break;
11023 case XML_PARSER_CDATA_SECTION: {
11024 /*
11025 * The Push mode need to have the SAX callback for
11026 * cdataBlock merge back contiguous callbacks.
11027 */
11028 int base;
11029
11030 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11031 if (base < 0) {
11032 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011033 int tmp;
11034
11035 tmp = xmlCheckCdataPush(ctxt->input->cur,
11036 XML_PARSER_BIG_BUFFER_SIZE);
11037 if (tmp < 0) {
11038 tmp = -tmp;
11039 ctxt->input->cur += tmp;
11040 goto encoding_error;
11041 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011042 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11043 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011044 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011045 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011046 else if (ctxt->sax->characters != NULL)
11047 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011048 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011049 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011050 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011051 ctxt->checkIndex = 0;
11052 }
11053 goto done;
11054 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011055 int tmp;
11056
11057 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11058 if ((tmp < 0) || (tmp != base)) {
11059 tmp = -tmp;
11060 ctxt->input->cur += tmp;
11061 goto encoding_error;
11062 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011063 if ((ctxt->sax != NULL) && (base == 0) &&
11064 (ctxt->sax->cdataBlock != NULL) &&
11065 (!ctxt->disableSAX)) {
11066 /*
11067 * Special case to provide identical behaviour
11068 * between pull and push parsers on enpty CDATA
11069 * sections
11070 */
11071 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11072 (!strncmp((const char *)&ctxt->input->cur[-9],
11073 "<![CDATA[", 9)))
11074 ctxt->sax->cdataBlock(ctxt->userData,
11075 BAD_CAST "", 0);
11076 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011077 (!ctxt->disableSAX)) {
11078 if (ctxt->sax->cdataBlock != NULL)
11079 ctxt->sax->cdataBlock(ctxt->userData,
11080 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011081 else if (ctxt->sax->characters != NULL)
11082 ctxt->sax->characters(ctxt->userData,
11083 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011084 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011085 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011086 ctxt->checkIndex = 0;
11087 ctxt->instate = XML_PARSER_CONTENT;
11088#ifdef DEBUG_PUSH
11089 xmlGenericError(xmlGenericErrorContext,
11090 "PP: entering CONTENT\n");
11091#endif
11092 }
11093 break;
11094 }
Owen Taylor3473f882001-02-23 17:55:21 +000011095 case XML_PARSER_MISC:
11096 SKIP_BLANKS;
11097 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011098 avail = ctxt->input->length -
11099 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011100 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011101 avail = ctxt->input->buf->buffer->use -
11102 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011103 if (avail < 2)
11104 goto done;
11105 cur = ctxt->input->cur[0];
11106 next = ctxt->input->cur[1];
11107 if ((cur == '<') && (next == '?')) {
11108 if ((!terminate) &&
11109 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11110 goto done;
11111#ifdef DEBUG_PUSH
11112 xmlGenericError(xmlGenericErrorContext,
11113 "PP: Parsing PI\n");
11114#endif
11115 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011116 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011117 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011118 (ctxt->input->cur[2] == '-') &&
11119 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011120 if ((!terminate) &&
11121 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11122 goto done;
11123#ifdef DEBUG_PUSH
11124 xmlGenericError(xmlGenericErrorContext,
11125 "PP: Parsing Comment\n");
11126#endif
11127 xmlParseComment(ctxt);
11128 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011129 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011130 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011131 (ctxt->input->cur[2] == 'D') &&
11132 (ctxt->input->cur[3] == 'O') &&
11133 (ctxt->input->cur[4] == 'C') &&
11134 (ctxt->input->cur[5] == 'T') &&
11135 (ctxt->input->cur[6] == 'Y') &&
11136 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011137 (ctxt->input->cur[8] == 'E')) {
11138 if ((!terminate) &&
11139 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11140 goto done;
11141#ifdef DEBUG_PUSH
11142 xmlGenericError(xmlGenericErrorContext,
11143 "PP: Parsing internal subset\n");
11144#endif
11145 ctxt->inSubset = 1;
11146 xmlParseDocTypeDecl(ctxt);
11147 if (RAW == '[') {
11148 ctxt->instate = XML_PARSER_DTD;
11149#ifdef DEBUG_PUSH
11150 xmlGenericError(xmlGenericErrorContext,
11151 "PP: entering DTD\n");
11152#endif
11153 } else {
11154 /*
11155 * Create and update the external subset.
11156 */
11157 ctxt->inSubset = 2;
11158 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11159 (ctxt->sax->externalSubset != NULL))
11160 ctxt->sax->externalSubset(ctxt->userData,
11161 ctxt->intSubName, ctxt->extSubSystem,
11162 ctxt->extSubURI);
11163 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011164 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011165 ctxt->instate = XML_PARSER_PROLOG;
11166#ifdef DEBUG_PUSH
11167 xmlGenericError(xmlGenericErrorContext,
11168 "PP: entering PROLOG\n");
11169#endif
11170 }
11171 } else if ((cur == '<') && (next == '!') &&
11172 (avail < 9)) {
11173 goto done;
11174 } else {
11175 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011176 ctxt->progressive = 1;
11177 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011178#ifdef DEBUG_PUSH
11179 xmlGenericError(xmlGenericErrorContext,
11180 "PP: entering START_TAG\n");
11181#endif
11182 }
11183 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011184 case XML_PARSER_PROLOG:
11185 SKIP_BLANKS;
11186 if (ctxt->input->buf == NULL)
11187 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11188 else
11189 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11190 if (avail < 2)
11191 goto done;
11192 cur = ctxt->input->cur[0];
11193 next = ctxt->input->cur[1];
11194 if ((cur == '<') && (next == '?')) {
11195 if ((!terminate) &&
11196 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11197 goto done;
11198#ifdef DEBUG_PUSH
11199 xmlGenericError(xmlGenericErrorContext,
11200 "PP: Parsing PI\n");
11201#endif
11202 xmlParsePI(ctxt);
11203 } else if ((cur == '<') && (next == '!') &&
11204 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11205 if ((!terminate) &&
11206 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11207 goto done;
11208#ifdef DEBUG_PUSH
11209 xmlGenericError(xmlGenericErrorContext,
11210 "PP: Parsing Comment\n");
11211#endif
11212 xmlParseComment(ctxt);
11213 ctxt->instate = XML_PARSER_PROLOG;
11214 } else if ((cur == '<') && (next == '!') &&
11215 (avail < 4)) {
11216 goto done;
11217 } else {
11218 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011219 if (ctxt->progressive == 0)
11220 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011221 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011222#ifdef DEBUG_PUSH
11223 xmlGenericError(xmlGenericErrorContext,
11224 "PP: entering START_TAG\n");
11225#endif
11226 }
11227 break;
11228 case XML_PARSER_EPILOG:
11229 SKIP_BLANKS;
11230 if (ctxt->input->buf == NULL)
11231 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11232 else
11233 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11234 if (avail < 2)
11235 goto done;
11236 cur = ctxt->input->cur[0];
11237 next = ctxt->input->cur[1];
11238 if ((cur == '<') && (next == '?')) {
11239 if ((!terminate) &&
11240 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11241 goto done;
11242#ifdef DEBUG_PUSH
11243 xmlGenericError(xmlGenericErrorContext,
11244 "PP: Parsing PI\n");
11245#endif
11246 xmlParsePI(ctxt);
11247 ctxt->instate = XML_PARSER_EPILOG;
11248 } else if ((cur == '<') && (next == '!') &&
11249 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11250 if ((!terminate) &&
11251 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11252 goto done;
11253#ifdef DEBUG_PUSH
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: Parsing Comment\n");
11256#endif
11257 xmlParseComment(ctxt);
11258 ctxt->instate = XML_PARSER_EPILOG;
11259 } else if ((cur == '<') && (next == '!') &&
11260 (avail < 4)) {
11261 goto done;
11262 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011263 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011264 ctxt->instate = XML_PARSER_EOF;
11265#ifdef DEBUG_PUSH
11266 xmlGenericError(xmlGenericErrorContext,
11267 "PP: entering EOF\n");
11268#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011269 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011270 ctxt->sax->endDocument(ctxt->userData);
11271 goto done;
11272 }
11273 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011274 case XML_PARSER_DTD: {
11275 /*
11276 * Sorry but progressive parsing of the internal subset
11277 * is not expected to be supported. We first check that
11278 * the full content of the internal subset is available and
11279 * the parsing is launched only at that point.
11280 * Internal subset ends up with "']' S? '>'" in an unescaped
11281 * section and not in a ']]>' sequence which are conditional
11282 * sections (whoever argued to keep that crap in XML deserve
11283 * a place in hell !).
11284 */
11285 int base, i;
11286 xmlChar *buf;
11287 xmlChar quote = 0;
11288
11289 base = ctxt->input->cur - ctxt->input->base;
11290 if (base < 0) return(0);
11291 if (ctxt->checkIndex > base)
11292 base = ctxt->checkIndex;
11293 buf = ctxt->input->buf->buffer->content;
11294 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11295 base++) {
11296 if (quote != 0) {
11297 if (buf[base] == quote)
11298 quote = 0;
11299 continue;
11300 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011301 if ((quote == 0) && (buf[base] == '<')) {
11302 int found = 0;
11303 /* special handling of comments */
11304 if (((unsigned int) base + 4 <
11305 ctxt->input->buf->buffer->use) &&
11306 (buf[base + 1] == '!') &&
11307 (buf[base + 2] == '-') &&
11308 (buf[base + 3] == '-')) {
11309 for (;(unsigned int) base + 3 <
11310 ctxt->input->buf->buffer->use; base++) {
11311 if ((buf[base] == '-') &&
11312 (buf[base + 1] == '-') &&
11313 (buf[base + 2] == '>')) {
11314 found = 1;
11315 base += 2;
11316 break;
11317 }
11318 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011319 if (!found) {
11320#if 0
11321 fprintf(stderr, "unfinished comment\n");
11322#endif
11323 break; /* for */
11324 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011325 continue;
11326 }
11327 }
Owen Taylor3473f882001-02-23 17:55:21 +000011328 if (buf[base] == '"') {
11329 quote = '"';
11330 continue;
11331 }
11332 if (buf[base] == '\'') {
11333 quote = '\'';
11334 continue;
11335 }
11336 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011337#if 0
11338 fprintf(stderr, "%c%c%c%c: ", buf[base],
11339 buf[base + 1], buf[base + 2], buf[base + 3]);
11340#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011341 if ((unsigned int) base +1 >=
11342 ctxt->input->buf->buffer->use)
11343 break;
11344 if (buf[base + 1] == ']') {
11345 /* conditional crap, skip both ']' ! */
11346 base++;
11347 continue;
11348 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011349 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011350 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11351 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011352 if (buf[base + i] == '>') {
11353#if 0
11354 fprintf(stderr, "found\n");
11355#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011356 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011357 }
11358 if (!IS_BLANK_CH(buf[base + i])) {
11359#if 0
11360 fprintf(stderr, "not found\n");
11361#endif
11362 goto not_end_of_int_subset;
11363 }
Owen Taylor3473f882001-02-23 17:55:21 +000011364 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011365#if 0
11366 fprintf(stderr, "end of stream\n");
11367#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011368 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011369
Owen Taylor3473f882001-02-23 17:55:21 +000011370 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011371not_end_of_int_subset:
11372 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011373 }
11374 /*
11375 * We didn't found the end of the Internal subset
11376 */
Owen Taylor3473f882001-02-23 17:55:21 +000011377#ifdef DEBUG_PUSH
11378 if (next == 0)
11379 xmlGenericError(xmlGenericErrorContext,
11380 "PP: lookup of int subset end filed\n");
11381#endif
11382 goto done;
11383
11384found_end_int_subset:
11385 xmlParseInternalSubset(ctxt);
11386 ctxt->inSubset = 2;
11387 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11388 (ctxt->sax->externalSubset != NULL))
11389 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11390 ctxt->extSubSystem, ctxt->extSubURI);
11391 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011392 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011393 ctxt->instate = XML_PARSER_PROLOG;
11394 ctxt->checkIndex = 0;
11395#ifdef DEBUG_PUSH
11396 xmlGenericError(xmlGenericErrorContext,
11397 "PP: entering PROLOG\n");
11398#endif
11399 break;
11400 }
11401 case XML_PARSER_COMMENT:
11402 xmlGenericError(xmlGenericErrorContext,
11403 "PP: internal error, state == COMMENT\n");
11404 ctxt->instate = XML_PARSER_CONTENT;
11405#ifdef DEBUG_PUSH
11406 xmlGenericError(xmlGenericErrorContext,
11407 "PP: entering CONTENT\n");
11408#endif
11409 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011410 case XML_PARSER_IGNORE:
11411 xmlGenericError(xmlGenericErrorContext,
11412 "PP: internal error, state == IGNORE");
11413 ctxt->instate = XML_PARSER_DTD;
11414#ifdef DEBUG_PUSH
11415 xmlGenericError(xmlGenericErrorContext,
11416 "PP: entering DTD\n");
11417#endif
11418 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011419 case XML_PARSER_PI:
11420 xmlGenericError(xmlGenericErrorContext,
11421 "PP: internal error, state == PI\n");
11422 ctxt->instate = XML_PARSER_CONTENT;
11423#ifdef DEBUG_PUSH
11424 xmlGenericError(xmlGenericErrorContext,
11425 "PP: entering CONTENT\n");
11426#endif
11427 break;
11428 case XML_PARSER_ENTITY_DECL:
11429 xmlGenericError(xmlGenericErrorContext,
11430 "PP: internal error, state == ENTITY_DECL\n");
11431 ctxt->instate = XML_PARSER_DTD;
11432#ifdef DEBUG_PUSH
11433 xmlGenericError(xmlGenericErrorContext,
11434 "PP: entering DTD\n");
11435#endif
11436 break;
11437 case XML_PARSER_ENTITY_VALUE:
11438 xmlGenericError(xmlGenericErrorContext,
11439 "PP: internal error, state == ENTITY_VALUE\n");
11440 ctxt->instate = XML_PARSER_CONTENT;
11441#ifdef DEBUG_PUSH
11442 xmlGenericError(xmlGenericErrorContext,
11443 "PP: entering DTD\n");
11444#endif
11445 break;
11446 case XML_PARSER_ATTRIBUTE_VALUE:
11447 xmlGenericError(xmlGenericErrorContext,
11448 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11449 ctxt->instate = XML_PARSER_START_TAG;
11450#ifdef DEBUG_PUSH
11451 xmlGenericError(xmlGenericErrorContext,
11452 "PP: entering START_TAG\n");
11453#endif
11454 break;
11455 case XML_PARSER_SYSTEM_LITERAL:
11456 xmlGenericError(xmlGenericErrorContext,
11457 "PP: internal error, state == SYSTEM_LITERAL\n");
11458 ctxt->instate = XML_PARSER_START_TAG;
11459#ifdef DEBUG_PUSH
11460 xmlGenericError(xmlGenericErrorContext,
11461 "PP: entering START_TAG\n");
11462#endif
11463 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011464 case XML_PARSER_PUBLIC_LITERAL:
11465 xmlGenericError(xmlGenericErrorContext,
11466 "PP: internal error, state == PUBLIC_LITERAL\n");
11467 ctxt->instate = XML_PARSER_START_TAG;
11468#ifdef DEBUG_PUSH
11469 xmlGenericError(xmlGenericErrorContext,
11470 "PP: entering START_TAG\n");
11471#endif
11472 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011473 }
11474 }
11475done:
11476#ifdef DEBUG_PUSH
11477 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11478#endif
11479 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011480encoding_error:
11481 {
11482 char buffer[150];
11483
11484 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11485 ctxt->input->cur[0], ctxt->input->cur[1],
11486 ctxt->input->cur[2], ctxt->input->cur[3]);
11487 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11488 "Input is not proper UTF-8, indicate encoding !\n%s",
11489 BAD_CAST buffer, NULL);
11490 }
11491 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011492}
11493
11494/**
Owen Taylor3473f882001-02-23 17:55:21 +000011495 * xmlParseChunk:
11496 * @ctxt: an XML parser context
11497 * @chunk: an char array
11498 * @size: the size in byte of the chunk
11499 * @terminate: last chunk indicator
11500 *
11501 * Parse a Chunk of memory
11502 *
11503 * Returns zero if no error, the xmlParserErrors otherwise.
11504 */
11505int
11506xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11507 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011508 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011509 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011510
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011511 if (ctxt == NULL)
11512 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011513 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011514 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011515 if (ctxt->instate == XML_PARSER_START)
11516 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011517 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11518 (chunk[size - 1] == '\r')) {
11519 end_in_lf = 1;
11520 size--;
11521 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011522
11523xmldecl_done:
11524
Owen Taylor3473f882001-02-23 17:55:21 +000011525 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11526 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11527 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11528 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011529 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011530
11531 /*
11532 * Specific handling if we autodetected an encoding, we should not
11533 * push more than the first line ... which depend on the encoding
11534 * And only push the rest once the final encoding was detected
11535 */
11536 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11537 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11538 int len = 45;
11539
11540 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11541 BAD_CAST "UTF-16")) ||
11542 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11543 BAD_CAST "UTF16")))
11544 len = 90;
11545 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11546 BAD_CAST "UCS-4")) ||
11547 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11548 BAD_CAST "UCS4")))
11549 len = 180;
11550
11551 if (ctxt->input->buf->rawconsumed < len)
11552 len -= ctxt->input->buf->rawconsumed;
11553
11554 remain = size - len;
11555 size = len;
11556 }
William M. Bracka3215c72004-07-31 16:24:01 +000011557 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11558 if (res < 0) {
11559 ctxt->errNo = XML_PARSER_EOF;
11560 ctxt->disableSAX = 1;
11561 return (XML_PARSER_EOF);
11562 }
Owen Taylor3473f882001-02-23 17:55:21 +000011563 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11564 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011565 ctxt->input->end =
11566 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011567#ifdef DEBUG_PUSH
11568 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11569#endif
11570
Owen Taylor3473f882001-02-23 17:55:21 +000011571 } else if (ctxt->instate != XML_PARSER_EOF) {
11572 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11573 xmlParserInputBufferPtr in = ctxt->input->buf;
11574 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11575 (in->raw != NULL)) {
11576 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011577
Owen Taylor3473f882001-02-23 17:55:21 +000011578 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11579 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011580 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011581 xmlGenericError(xmlGenericErrorContext,
11582 "xmlParseChunk: encoder error\n");
11583 return(XML_ERR_INVALID_ENCODING);
11584 }
11585 }
11586 }
11587 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011588 if (remain != 0)
11589 xmlParseTryOrFinish(ctxt, 0);
11590 else
11591 xmlParseTryOrFinish(ctxt, terminate);
11592 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11593 return(ctxt->errNo);
11594
11595 if (remain != 0) {
11596 chunk += size;
11597 size = remain;
11598 remain = 0;
11599 goto xmldecl_done;
11600 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011601 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11602 (ctxt->input->buf != NULL)) {
11603 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11604 }
Owen Taylor3473f882001-02-23 17:55:21 +000011605 if (terminate) {
11606 /*
11607 * Check for termination
11608 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011609 int avail = 0;
11610
11611 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011612 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011613 avail = ctxt->input->length -
11614 (ctxt->input->cur - ctxt->input->base);
11615 else
11616 avail = ctxt->input->buf->buffer->use -
11617 (ctxt->input->cur - ctxt->input->base);
11618 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011619
Owen Taylor3473f882001-02-23 17:55:21 +000011620 if ((ctxt->instate != XML_PARSER_EOF) &&
11621 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011622 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011623 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011624 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011625 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011626 }
Owen Taylor3473f882001-02-23 17:55:21 +000011627 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011628 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011629 ctxt->sax->endDocument(ctxt->userData);
11630 }
11631 ctxt->instate = XML_PARSER_EOF;
11632 }
11633 return((xmlParserErrors) ctxt->errNo);
11634}
11635
11636/************************************************************************
11637 * *
11638 * I/O front end functions to the parser *
11639 * *
11640 ************************************************************************/
11641
11642/**
Owen Taylor3473f882001-02-23 17:55:21 +000011643 * xmlCreatePushParserCtxt:
11644 * @sax: a SAX handler
11645 * @user_data: The user data returned on SAX callbacks
11646 * @chunk: a pointer to an array of chars
11647 * @size: number of chars in the array
11648 * @filename: an optional file name or URI
11649 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011650 * Create a parser context for using the XML parser in push mode.
11651 * If @buffer and @size are non-NULL, the data is used to detect
11652 * the encoding. The remaining characters will be parsed so they
11653 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011654 * To allow content encoding detection, @size should be >= 4
11655 * The value of @filename is used for fetching external entities
11656 * and error/warning reports.
11657 *
11658 * Returns the new parser context or NULL
11659 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011660
Owen Taylor3473f882001-02-23 17:55:21 +000011661xmlParserCtxtPtr
11662xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11663 const char *chunk, int size, const char *filename) {
11664 xmlParserCtxtPtr ctxt;
11665 xmlParserInputPtr inputStream;
11666 xmlParserInputBufferPtr buf;
11667 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11668
11669 /*
11670 * plug some encoding conversion routines
11671 */
11672 if ((chunk != NULL) && (size >= 4))
11673 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11674
11675 buf = xmlAllocParserInputBuffer(enc);
11676 if (buf == NULL) return(NULL);
11677
11678 ctxt = xmlNewParserCtxt();
11679 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011680 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011681 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011682 return(NULL);
11683 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011684 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011685 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11686 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011687 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011688 xmlFreeParserInputBuffer(buf);
11689 xmlFreeParserCtxt(ctxt);
11690 return(NULL);
11691 }
Owen Taylor3473f882001-02-23 17:55:21 +000011692 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011693#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011694 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011695#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011696 xmlFree(ctxt->sax);
11697 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11698 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011699 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011700 xmlFreeParserInputBuffer(buf);
11701 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011702 return(NULL);
11703 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011704 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11705 if (sax->initialized == XML_SAX2_MAGIC)
11706 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11707 else
11708 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011709 if (user_data != NULL)
11710 ctxt->userData = user_data;
11711 }
11712 if (filename == NULL) {
11713 ctxt->directory = NULL;
11714 } else {
11715 ctxt->directory = xmlParserGetDirectory(filename);
11716 }
11717
11718 inputStream = xmlNewInputStream(ctxt);
11719 if (inputStream == NULL) {
11720 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011721 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011722 return(NULL);
11723 }
11724
11725 if (filename == NULL)
11726 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011727 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011728 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011729 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011730 if (inputStream->filename == NULL) {
11731 xmlFreeParserCtxt(ctxt);
11732 xmlFreeParserInputBuffer(buf);
11733 return(NULL);
11734 }
11735 }
Owen Taylor3473f882001-02-23 17:55:21 +000011736 inputStream->buf = buf;
11737 inputStream->base = inputStream->buf->buffer->content;
11738 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011739 inputStream->end =
11740 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011741
11742 inputPush(ctxt, inputStream);
11743
William M. Brack3a1cd212005-02-11 14:35:54 +000011744 /*
11745 * If the caller didn't provide an initial 'chunk' for determining
11746 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11747 * that it can be automatically determined later
11748 */
11749 if ((size == 0) || (chunk == NULL)) {
11750 ctxt->charset = XML_CHAR_ENCODING_NONE;
11751 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011752 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11753 int cur = ctxt->input->cur - ctxt->input->base;
11754
Owen Taylor3473f882001-02-23 17:55:21 +000011755 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011756
11757 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11758 ctxt->input->cur = ctxt->input->base + cur;
11759 ctxt->input->end =
11760 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011761#ifdef DEBUG_PUSH
11762 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11763#endif
11764 }
11765
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011766 if (enc != XML_CHAR_ENCODING_NONE) {
11767 xmlSwitchEncoding(ctxt, enc);
11768 }
11769
Owen Taylor3473f882001-02-23 17:55:21 +000011770 return(ctxt);
11771}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011772#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011773
11774/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011775 * xmlStopParser:
11776 * @ctxt: an XML parser context
11777 *
11778 * Blocks further parser processing
11779 */
11780void
11781xmlStopParser(xmlParserCtxtPtr ctxt) {
11782 if (ctxt == NULL)
11783 return;
11784 ctxt->instate = XML_PARSER_EOF;
11785 ctxt->disableSAX = 1;
11786 if (ctxt->input != NULL) {
11787 ctxt->input->cur = BAD_CAST"";
11788 ctxt->input->base = ctxt->input->cur;
11789 }
11790}
11791
11792/**
Owen Taylor3473f882001-02-23 17:55:21 +000011793 * xmlCreateIOParserCtxt:
11794 * @sax: a SAX handler
11795 * @user_data: The user data returned on SAX callbacks
11796 * @ioread: an I/O read function
11797 * @ioclose: an I/O close function
11798 * @ioctx: an I/O handler
11799 * @enc: the charset encoding if known
11800 *
11801 * Create a parser context for using the XML parser with an existing
11802 * I/O stream
11803 *
11804 * Returns the new parser context or NULL
11805 */
11806xmlParserCtxtPtr
11807xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11808 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11809 void *ioctx, xmlCharEncoding enc) {
11810 xmlParserCtxtPtr ctxt;
11811 xmlParserInputPtr inputStream;
11812 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011813
11814 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011815
11816 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11817 if (buf == NULL) return(NULL);
11818
11819 ctxt = xmlNewParserCtxt();
11820 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011821 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011822 return(NULL);
11823 }
11824 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011825#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011826 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011827#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011828 xmlFree(ctxt->sax);
11829 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11830 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011831 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011832 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011833 return(NULL);
11834 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011835 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11836 if (sax->initialized == XML_SAX2_MAGIC)
11837 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11838 else
11839 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011840 if (user_data != NULL)
11841 ctxt->userData = user_data;
11842 }
11843
11844 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11845 if (inputStream == NULL) {
11846 xmlFreeParserCtxt(ctxt);
11847 return(NULL);
11848 }
11849 inputPush(ctxt, inputStream);
11850
11851 return(ctxt);
11852}
11853
Daniel Veillard4432df22003-09-28 18:58:27 +000011854#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011855/************************************************************************
11856 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011857 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011858 * *
11859 ************************************************************************/
11860
11861/**
11862 * xmlIOParseDTD:
11863 * @sax: the SAX handler block or NULL
11864 * @input: an Input Buffer
11865 * @enc: the charset encoding if known
11866 *
11867 * Load and parse a DTD
11868 *
11869 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011870 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011871 */
11872
11873xmlDtdPtr
11874xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11875 xmlCharEncoding enc) {
11876 xmlDtdPtr ret = NULL;
11877 xmlParserCtxtPtr ctxt;
11878 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011879 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011880
11881 if (input == NULL)
11882 return(NULL);
11883
11884 ctxt = xmlNewParserCtxt();
11885 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011886 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011887 return(NULL);
11888 }
11889
11890 /*
11891 * Set-up the SAX context
11892 */
11893 if (sax != NULL) {
11894 if (ctxt->sax != NULL)
11895 xmlFree(ctxt->sax);
11896 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011897 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011898 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011899 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011900
11901 /*
11902 * generate a parser input from the I/O handler
11903 */
11904
Daniel Veillard43caefb2003-12-07 19:32:22 +000011905 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011906 if (pinput == NULL) {
11907 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011908 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011909 xmlFreeParserCtxt(ctxt);
11910 return(NULL);
11911 }
11912
11913 /*
11914 * plug some encoding conversion routines here.
11915 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011916 if (xmlPushInput(ctxt, pinput) < 0) {
11917 if (sax != NULL) ctxt->sax = NULL;
11918 xmlFreeParserCtxt(ctxt);
11919 return(NULL);
11920 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011921 if (enc != XML_CHAR_ENCODING_NONE) {
11922 xmlSwitchEncoding(ctxt, enc);
11923 }
Owen Taylor3473f882001-02-23 17:55:21 +000011924
11925 pinput->filename = NULL;
11926 pinput->line = 1;
11927 pinput->col = 1;
11928 pinput->base = ctxt->input->cur;
11929 pinput->cur = ctxt->input->cur;
11930 pinput->free = NULL;
11931
11932 /*
11933 * let's parse that entity knowing it's an external subset.
11934 */
11935 ctxt->inSubset = 2;
11936 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011937 if (ctxt->myDoc == NULL) {
11938 xmlErrMemory(ctxt, "New Doc failed");
11939 return(NULL);
11940 }
11941 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011942 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11943 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011944
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011945 if ((enc == XML_CHAR_ENCODING_NONE) &&
11946 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011947 /*
11948 * Get the 4 first bytes and decode the charset
11949 * if enc != XML_CHAR_ENCODING_NONE
11950 * plug some encoding conversion routines.
11951 */
11952 start[0] = RAW;
11953 start[1] = NXT(1);
11954 start[2] = NXT(2);
11955 start[3] = NXT(3);
11956 enc = xmlDetectCharEncoding(start, 4);
11957 if (enc != XML_CHAR_ENCODING_NONE) {
11958 xmlSwitchEncoding(ctxt, enc);
11959 }
11960 }
11961
Owen Taylor3473f882001-02-23 17:55:21 +000011962 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11963
11964 if (ctxt->myDoc != NULL) {
11965 if (ctxt->wellFormed) {
11966 ret = ctxt->myDoc->extSubset;
11967 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011968 if (ret != NULL) {
11969 xmlNodePtr tmp;
11970
11971 ret->doc = NULL;
11972 tmp = ret->children;
11973 while (tmp != NULL) {
11974 tmp->doc = NULL;
11975 tmp = tmp->next;
11976 }
11977 }
Owen Taylor3473f882001-02-23 17:55:21 +000011978 } else {
11979 ret = NULL;
11980 }
11981 xmlFreeDoc(ctxt->myDoc);
11982 ctxt->myDoc = NULL;
11983 }
11984 if (sax != NULL) ctxt->sax = NULL;
11985 xmlFreeParserCtxt(ctxt);
11986
11987 return(ret);
11988}
11989
11990/**
11991 * xmlSAXParseDTD:
11992 * @sax: the SAX handler block
11993 * @ExternalID: a NAME* containing the External ID of the DTD
11994 * @SystemID: a NAME* containing the URL to the DTD
11995 *
11996 * Load and parse an external subset.
11997 *
11998 * Returns the resulting xmlDtdPtr or NULL in case of error.
11999 */
12000
12001xmlDtdPtr
12002xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12003 const xmlChar *SystemID) {
12004 xmlDtdPtr ret = NULL;
12005 xmlParserCtxtPtr ctxt;
12006 xmlParserInputPtr input = NULL;
12007 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012008 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012009
12010 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12011
12012 ctxt = xmlNewParserCtxt();
12013 if (ctxt == NULL) {
12014 return(NULL);
12015 }
12016
12017 /*
12018 * Set-up the SAX context
12019 */
12020 if (sax != NULL) {
12021 if (ctxt->sax != NULL)
12022 xmlFree(ctxt->sax);
12023 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012024 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012025 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012026
12027 /*
12028 * Canonicalise the system ID
12029 */
12030 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012031 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012032 xmlFreeParserCtxt(ctxt);
12033 return(NULL);
12034 }
Owen Taylor3473f882001-02-23 17:55:21 +000012035
12036 /*
12037 * Ask the Entity resolver to load the damn thing
12038 */
12039
12040 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012041 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12042 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012043 if (input == NULL) {
12044 if (sax != NULL) ctxt->sax = NULL;
12045 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012046 if (systemIdCanonic != NULL)
12047 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012048 return(NULL);
12049 }
12050
12051 /*
12052 * plug some encoding conversion routines here.
12053 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012054 if (xmlPushInput(ctxt, input) < 0) {
12055 if (sax != NULL) ctxt->sax = NULL;
12056 xmlFreeParserCtxt(ctxt);
12057 if (systemIdCanonic != NULL)
12058 xmlFree(systemIdCanonic);
12059 return(NULL);
12060 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012061 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12062 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12063 xmlSwitchEncoding(ctxt, enc);
12064 }
Owen Taylor3473f882001-02-23 17:55:21 +000012065
12066 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012067 input->filename = (char *) systemIdCanonic;
12068 else
12069 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012070 input->line = 1;
12071 input->col = 1;
12072 input->base = ctxt->input->cur;
12073 input->cur = ctxt->input->cur;
12074 input->free = NULL;
12075
12076 /*
12077 * let's parse that entity knowing it's an external subset.
12078 */
12079 ctxt->inSubset = 2;
12080 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012081 if (ctxt->myDoc == NULL) {
12082 xmlErrMemory(ctxt, "New Doc failed");
12083 if (sax != NULL) ctxt->sax = NULL;
12084 xmlFreeParserCtxt(ctxt);
12085 return(NULL);
12086 }
12087 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012088 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12089 ExternalID, SystemID);
12090 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12091
12092 if (ctxt->myDoc != NULL) {
12093 if (ctxt->wellFormed) {
12094 ret = ctxt->myDoc->extSubset;
12095 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012096 if (ret != NULL) {
12097 xmlNodePtr tmp;
12098
12099 ret->doc = NULL;
12100 tmp = ret->children;
12101 while (tmp != NULL) {
12102 tmp->doc = NULL;
12103 tmp = tmp->next;
12104 }
12105 }
Owen Taylor3473f882001-02-23 17:55:21 +000012106 } else {
12107 ret = NULL;
12108 }
12109 xmlFreeDoc(ctxt->myDoc);
12110 ctxt->myDoc = NULL;
12111 }
12112 if (sax != NULL) ctxt->sax = NULL;
12113 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012114
Owen Taylor3473f882001-02-23 17:55:21 +000012115 return(ret);
12116}
12117
Daniel Veillard4432df22003-09-28 18:58:27 +000012118
Owen Taylor3473f882001-02-23 17:55:21 +000012119/**
12120 * xmlParseDTD:
12121 * @ExternalID: a NAME* containing the External ID of the DTD
12122 * @SystemID: a NAME* containing the URL to the DTD
12123 *
12124 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012125 *
Owen Taylor3473f882001-02-23 17:55:21 +000012126 * Returns the resulting xmlDtdPtr or NULL in case of error.
12127 */
12128
12129xmlDtdPtr
12130xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12131 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12132}
Daniel Veillard4432df22003-09-28 18:58:27 +000012133#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012134
12135/************************************************************************
12136 * *
12137 * Front ends when parsing an Entity *
12138 * *
12139 ************************************************************************/
12140
12141/**
Owen Taylor3473f882001-02-23 17:55:21 +000012142 * xmlParseCtxtExternalEntity:
12143 * @ctx: the existing parsing context
12144 * @URL: the URL for the entity to load
12145 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012146 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012147 *
12148 * Parse an external general entity within an existing parsing context
12149 * An external general parsed entity is well-formed if it matches the
12150 * production labeled extParsedEnt.
12151 *
12152 * [78] extParsedEnt ::= TextDecl? content
12153 *
12154 * Returns 0 if the entity is well formed, -1 in case of args problem and
12155 * the parser error code otherwise
12156 */
12157
12158int
12159xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012160 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012161 xmlParserCtxtPtr ctxt;
12162 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012163 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012164 xmlSAXHandlerPtr oldsax = NULL;
12165 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012166 xmlChar start[4];
12167 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012168
Daniel Veillardce682bc2004-11-05 17:22:25 +000012169 if (ctx == NULL) return(-1);
12170
Daniel Veillard0161e632008-08-28 15:36:32 +000012171 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12172 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012173 return(XML_ERR_ENTITY_LOOP);
12174 }
12175
Daniel Veillardcda96922001-08-21 10:56:31 +000012176 if (lst != NULL)
12177 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012178 if ((URL == NULL) && (ID == NULL))
12179 return(-1);
12180 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12181 return(-1);
12182
Rob Richards798743a2009-06-19 13:54:25 -040012183 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012184 if (ctxt == NULL) {
12185 return(-1);
12186 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012187
Owen Taylor3473f882001-02-23 17:55:21 +000012188 oldsax = ctxt->sax;
12189 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012190 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012191 newDoc = xmlNewDoc(BAD_CAST "1.0");
12192 if (newDoc == NULL) {
12193 xmlFreeParserCtxt(ctxt);
12194 return(-1);
12195 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012196 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012197 if (ctx->myDoc->dict) {
12198 newDoc->dict = ctx->myDoc->dict;
12199 xmlDictReference(newDoc->dict);
12200 }
Owen Taylor3473f882001-02-23 17:55:21 +000012201 if (ctx->myDoc != NULL) {
12202 newDoc->intSubset = ctx->myDoc->intSubset;
12203 newDoc->extSubset = ctx->myDoc->extSubset;
12204 }
12205 if (ctx->myDoc->URL != NULL) {
12206 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12207 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012208 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12209 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012210 ctxt->sax = oldsax;
12211 xmlFreeParserCtxt(ctxt);
12212 newDoc->intSubset = NULL;
12213 newDoc->extSubset = NULL;
12214 xmlFreeDoc(newDoc);
12215 return(-1);
12216 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012217 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012218 nodePush(ctxt, newDoc->children);
12219 if (ctx->myDoc == NULL) {
12220 ctxt->myDoc = newDoc;
12221 } else {
12222 ctxt->myDoc = ctx->myDoc;
12223 newDoc->children->doc = ctx->myDoc;
12224 }
12225
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012226 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012227 * Get the 4 first bytes and decode the charset
12228 * if enc != XML_CHAR_ENCODING_NONE
12229 * plug some encoding conversion routines.
12230 */
12231 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012232 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12233 start[0] = RAW;
12234 start[1] = NXT(1);
12235 start[2] = NXT(2);
12236 start[3] = NXT(3);
12237 enc = xmlDetectCharEncoding(start, 4);
12238 if (enc != XML_CHAR_ENCODING_NONE) {
12239 xmlSwitchEncoding(ctxt, enc);
12240 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012241 }
12242
Owen Taylor3473f882001-02-23 17:55:21 +000012243 /*
12244 * Parse a possible text declaration first
12245 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012246 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012247 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012248 /*
12249 * An XML-1.0 document can't reference an entity not XML-1.0
12250 */
12251 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12252 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12253 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12254 "Version mismatch between document and entity\n");
12255 }
Owen Taylor3473f882001-02-23 17:55:21 +000012256 }
12257
12258 /*
12259 * Doing validity checking on chunk doesn't make sense
12260 */
12261 ctxt->instate = XML_PARSER_CONTENT;
12262 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012263 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012264 ctxt->loadsubset = ctx->loadsubset;
12265 ctxt->depth = ctx->depth + 1;
12266 ctxt->replaceEntities = ctx->replaceEntities;
12267 if (ctxt->validate) {
12268 ctxt->vctxt.error = ctx->vctxt.error;
12269 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012270 } else {
12271 ctxt->vctxt.error = NULL;
12272 ctxt->vctxt.warning = NULL;
12273 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012274 ctxt->vctxt.nodeTab = NULL;
12275 ctxt->vctxt.nodeNr = 0;
12276 ctxt->vctxt.nodeMax = 0;
12277 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012278 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12279 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012280 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12281 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12282 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012283 ctxt->dictNames = ctx->dictNames;
12284 ctxt->attsDefault = ctx->attsDefault;
12285 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012286 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012287
12288 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012289
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012290 ctx->validate = ctxt->validate;
12291 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012292 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012293 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012294 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012295 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012296 }
12297 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012298 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012299 }
12300
12301 if (!ctxt->wellFormed) {
12302 if (ctxt->errNo == 0)
12303 ret = 1;
12304 else
12305 ret = ctxt->errNo;
12306 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012307 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012308 xmlNodePtr cur;
12309
12310 /*
12311 * Return the newly created nodeset after unlinking it from
12312 * they pseudo parent.
12313 */
12314 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012315 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012316 while (cur != NULL) {
12317 cur->parent = NULL;
12318 cur = cur->next;
12319 }
12320 newDoc->children->children = NULL;
12321 }
12322 ret = 0;
12323 }
12324 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012325 ctxt->dict = NULL;
12326 ctxt->attsDefault = NULL;
12327 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012328 xmlFreeParserCtxt(ctxt);
12329 newDoc->intSubset = NULL;
12330 newDoc->extSubset = NULL;
12331 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012332
Owen Taylor3473f882001-02-23 17:55:21 +000012333 return(ret);
12334}
12335
12336/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012337 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012338 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012339 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012340 * @sax: the SAX handler bloc (possibly NULL)
12341 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12342 * @depth: Used for loop detection, use 0
12343 * @URL: the URL for the entity to load
12344 * @ID: the System ID for the entity to load
12345 * @list: the return value for the set of parsed nodes
12346 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012347 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012348 *
12349 * Returns 0 if the entity is well formed, -1 in case of args problem and
12350 * the parser error code otherwise
12351 */
12352
Daniel Veillard7d515752003-09-26 19:12:37 +000012353static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012354xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12355 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012356 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012357 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012358 xmlParserCtxtPtr ctxt;
12359 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012360 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012361 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012362 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012363 xmlChar start[4];
12364 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012365
Daniel Veillard0161e632008-08-28 15:36:32 +000012366 if (((depth > 40) &&
12367 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12368 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012369 return(XML_ERR_ENTITY_LOOP);
12370 }
12371
Owen Taylor3473f882001-02-23 17:55:21 +000012372 if (list != NULL)
12373 *list = NULL;
12374 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012375 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012376 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012377 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012378
12379
Rob Richards9c0aa472009-03-26 18:10:19 +000012380 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012381 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012382 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012383 if (oldctxt != NULL) {
12384 ctxt->_private = oldctxt->_private;
12385 ctxt->loadsubset = oldctxt->loadsubset;
12386 ctxt->validate = oldctxt->validate;
12387 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012388 ctxt->record_info = oldctxt->record_info;
12389 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12390 ctxt->node_seq.length = oldctxt->node_seq.length;
12391 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012392 } else {
12393 /*
12394 * Doing validity checking on chunk without context
12395 * doesn't make sense
12396 */
12397 ctxt->_private = NULL;
12398 ctxt->validate = 0;
12399 ctxt->external = 2;
12400 ctxt->loadsubset = 0;
12401 }
Owen Taylor3473f882001-02-23 17:55:21 +000012402 if (sax != NULL) {
12403 oldsax = ctxt->sax;
12404 ctxt->sax = sax;
12405 if (user_data != NULL)
12406 ctxt->userData = user_data;
12407 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012408 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012409 newDoc = xmlNewDoc(BAD_CAST "1.0");
12410 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012411 ctxt->node_seq.maximum = 0;
12412 ctxt->node_seq.length = 0;
12413 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012414 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012415 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012416 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012417 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012418 newDoc->intSubset = doc->intSubset;
12419 newDoc->extSubset = doc->extSubset;
12420 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012421 xmlDictReference(newDoc->dict);
12422
Owen Taylor3473f882001-02-23 17:55:21 +000012423 if (doc->URL != NULL) {
12424 newDoc->URL = xmlStrdup(doc->URL);
12425 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012426 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12427 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012428 if (sax != NULL)
12429 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012430 ctxt->node_seq.maximum = 0;
12431 ctxt->node_seq.length = 0;
12432 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012433 xmlFreeParserCtxt(ctxt);
12434 newDoc->intSubset = NULL;
12435 newDoc->extSubset = NULL;
12436 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012437 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012438 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012439 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012440 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012441 ctxt->myDoc = doc;
12442 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012443
Daniel Veillard0161e632008-08-28 15:36:32 +000012444 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012445 * Get the 4 first bytes and decode the charset
12446 * if enc != XML_CHAR_ENCODING_NONE
12447 * plug some encoding conversion routines.
12448 */
12449 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012450 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12451 start[0] = RAW;
12452 start[1] = NXT(1);
12453 start[2] = NXT(2);
12454 start[3] = NXT(3);
12455 enc = xmlDetectCharEncoding(start, 4);
12456 if (enc != XML_CHAR_ENCODING_NONE) {
12457 xmlSwitchEncoding(ctxt, enc);
12458 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012459 }
12460
Owen Taylor3473f882001-02-23 17:55:21 +000012461 /*
12462 * Parse a possible text declaration first
12463 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012464 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012465 xmlParseTextDecl(ctxt);
12466 }
12467
Owen Taylor3473f882001-02-23 17:55:21 +000012468 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012469 ctxt->depth = depth;
12470
12471 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012472
Daniel Veillard561b7f82002-03-20 21:55:57 +000012473 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012474 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012475 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012476 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012477 }
12478 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012479 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012480 }
12481
12482 if (!ctxt->wellFormed) {
12483 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012484 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012485 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012486 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012487 } else {
12488 if (list != NULL) {
12489 xmlNodePtr cur;
12490
12491 /*
12492 * Return the newly created nodeset after unlinking it from
12493 * they pseudo parent.
12494 */
12495 cur = newDoc->children->children;
12496 *list = cur;
12497 while (cur != NULL) {
12498 cur->parent = NULL;
12499 cur = cur->next;
12500 }
12501 newDoc->children->children = NULL;
12502 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012503 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012504 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012505
12506 /*
12507 * Record in the parent context the number of entities replacement
12508 * done when parsing that reference.
12509 */
Daniel Veillard76d36452009-09-07 11:19:33 +020012510 if (oldctxt != NULL)
12511 oldctxt->nbentities += ctxt->nbentities;
12512
Daniel Veillard0161e632008-08-28 15:36:32 +000012513 /*
12514 * Also record the size of the entity parsed
12515 */
12516 if (ctxt->input != NULL) {
12517 oldctxt->sizeentities += ctxt->input->consumed;
12518 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12519 }
12520 /*
12521 * And record the last error if any
12522 */
12523 if (ctxt->lastError.code != XML_ERR_OK)
12524 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12525
Owen Taylor3473f882001-02-23 17:55:21 +000012526 if (sax != NULL)
12527 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012528 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12529 oldctxt->node_seq.length = ctxt->node_seq.length;
12530 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012531 ctxt->node_seq.maximum = 0;
12532 ctxt->node_seq.length = 0;
12533 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012534 xmlFreeParserCtxt(ctxt);
12535 newDoc->intSubset = NULL;
12536 newDoc->extSubset = NULL;
12537 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012538
Owen Taylor3473f882001-02-23 17:55:21 +000012539 return(ret);
12540}
12541
Daniel Veillard81273902003-09-30 00:43:48 +000012542#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012543/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012544 * xmlParseExternalEntity:
12545 * @doc: the document the chunk pertains to
12546 * @sax: the SAX handler bloc (possibly NULL)
12547 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12548 * @depth: Used for loop detection, use 0
12549 * @URL: the URL for the entity to load
12550 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012551 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012552 *
12553 * Parse an external general entity
12554 * An external general parsed entity is well-formed if it matches the
12555 * production labeled extParsedEnt.
12556 *
12557 * [78] extParsedEnt ::= TextDecl? content
12558 *
12559 * Returns 0 if the entity is well formed, -1 in case of args problem and
12560 * the parser error code otherwise
12561 */
12562
12563int
12564xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012565 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012566 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012567 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012568}
12569
12570/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012571 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012572 * @doc: the document the chunk pertains to
12573 * @sax: the SAX handler bloc (possibly NULL)
12574 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12575 * @depth: Used for loop detection, use 0
12576 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012577 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012578 *
12579 * Parse a well-balanced chunk of an XML document
12580 * called by the parser
12581 * The allowed sequence for the Well Balanced Chunk is the one defined by
12582 * the content production in the XML grammar:
12583 *
12584 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12585 *
12586 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12587 * the parser error code otherwise
12588 */
12589
12590int
12591xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012592 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012593 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12594 depth, string, lst, 0 );
12595}
Daniel Veillard81273902003-09-30 00:43:48 +000012596#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012597
12598/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012599 * xmlParseBalancedChunkMemoryInternal:
12600 * @oldctxt: the existing parsing context
12601 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12602 * @user_data: the user data field for the parser context
12603 * @lst: the return value for the set of parsed nodes
12604 *
12605 *
12606 * Parse a well-balanced chunk of an XML document
12607 * called by the parser
12608 * The allowed sequence for the Well Balanced Chunk is the one defined by
12609 * the content production in the XML grammar:
12610 *
12611 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12612 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012613 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12614 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012615 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012616 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012617 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012618 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012619static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012620xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12621 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12622 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012623 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012624 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012625 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012626 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012627 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012628 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012629 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012630#ifdef SAX2
12631 int i;
12632#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012633
Daniel Veillard0161e632008-08-28 15:36:32 +000012634 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12635 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012636 return(XML_ERR_ENTITY_LOOP);
12637 }
12638
12639
12640 if (lst != NULL)
12641 *lst = NULL;
12642 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012643 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012644
12645 size = xmlStrlen(string);
12646
12647 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012648 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012649 if (user_data != NULL)
12650 ctxt->userData = user_data;
12651 else
12652 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012653 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12654 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012655 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12656 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12657 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012658
Daniel Veillard74eaec12009-08-26 15:57:20 +020012659#ifdef SAX2
12660 /* propagate namespaces down the entity */
12661 for (i = 0;i < oldctxt->nsNr;i += 2) {
12662 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12663 }
12664#endif
12665
Daniel Veillard328f48c2002-11-15 15:24:34 +000012666 oldsax = ctxt->sax;
12667 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012668 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012669 ctxt->replaceEntities = oldctxt->replaceEntities;
12670 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012671
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012672 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012673 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012674 newDoc = xmlNewDoc(BAD_CAST "1.0");
12675 if (newDoc == NULL) {
12676 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012677 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012678 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012679 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012680 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012681 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012682 newDoc->dict = ctxt->dict;
12683 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012684 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012685 } else {
12686 ctxt->myDoc = oldctxt->myDoc;
12687 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012688 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012689 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012690 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12691 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012692 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012693 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012694 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012695 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012696 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012697 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012698 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012699 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012700 ctxt->myDoc->children = NULL;
12701 ctxt->myDoc->last = NULL;
12702 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012703 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012704 ctxt->instate = XML_PARSER_CONTENT;
12705 ctxt->depth = oldctxt->depth + 1;
12706
Daniel Veillard328f48c2002-11-15 15:24:34 +000012707 ctxt->validate = 0;
12708 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012709 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12710 /*
12711 * ID/IDREF registration will be done in xmlValidateElement below
12712 */
12713 ctxt->loadsubset |= XML_SKIP_IDS;
12714 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012715 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012716 ctxt->attsDefault = oldctxt->attsDefault;
12717 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012718
Daniel Veillard68e9e742002-11-16 15:35:11 +000012719 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012720 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012721 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012722 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012723 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012724 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012725 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012726 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012727 }
12728
12729 if (!ctxt->wellFormed) {
12730 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012731 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012732 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012733 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012734 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012735 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012736 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012737
William M. Brack7b9154b2003-09-27 19:23:50 +000012738 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012739 xmlNodePtr cur;
12740
12741 /*
12742 * Return the newly created nodeset after unlinking it from
12743 * they pseudo parent.
12744 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012745 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012746 *lst = cur;
12747 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012748#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012749 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12750 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12751 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012752 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12753 oldctxt->myDoc, cur);
12754 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012755#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012756 cur->parent = NULL;
12757 cur = cur->next;
12758 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012759 ctxt->myDoc->children->children = NULL;
12760 }
12761 if (ctxt->myDoc != NULL) {
12762 xmlFreeNode(ctxt->myDoc->children);
12763 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012764 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012765 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012766
12767 /*
12768 * Record in the parent context the number of entities replacement
12769 * done when parsing that reference.
12770 */
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012771 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard0161e632008-08-28 15:36:32 +000012772 /*
12773 * Also record the last error if any
12774 */
12775 if (ctxt->lastError.code != XML_ERR_OK)
12776 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12777
Daniel Veillard328f48c2002-11-15 15:24:34 +000012778 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012779 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012780 ctxt->attsDefault = NULL;
12781 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012782 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012783 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012784 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012785 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012786
Daniel Veillard328f48c2002-11-15 15:24:34 +000012787 return(ret);
12788}
12789
Daniel Veillard29b17482004-08-16 00:39:03 +000012790/**
12791 * xmlParseInNodeContext:
12792 * @node: the context node
12793 * @data: the input string
12794 * @datalen: the input string length in bytes
12795 * @options: a combination of xmlParserOption
12796 * @lst: the return value for the set of parsed nodes
12797 *
12798 * Parse a well-balanced chunk of an XML document
12799 * within the context (DTD, namespaces, etc ...) of the given node.
12800 *
12801 * The allowed sequence for the data is a Well Balanced Chunk defined by
12802 * the content production in the XML grammar:
12803 *
12804 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12805 *
12806 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12807 * error code otherwise
12808 */
12809xmlParserErrors
12810xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12811 int options, xmlNodePtr *lst) {
12812#ifdef SAX2
12813 xmlParserCtxtPtr ctxt;
12814 xmlDocPtr doc = NULL;
12815 xmlNodePtr fake, cur;
12816 int nsnr = 0;
12817
12818 xmlParserErrors ret = XML_ERR_OK;
12819
12820 /*
12821 * check all input parameters, grab the document
12822 */
12823 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12824 return(XML_ERR_INTERNAL_ERROR);
12825 switch (node->type) {
12826 case XML_ELEMENT_NODE:
12827 case XML_ATTRIBUTE_NODE:
12828 case XML_TEXT_NODE:
12829 case XML_CDATA_SECTION_NODE:
12830 case XML_ENTITY_REF_NODE:
12831 case XML_PI_NODE:
12832 case XML_COMMENT_NODE:
12833 case XML_DOCUMENT_NODE:
12834 case XML_HTML_DOCUMENT_NODE:
12835 break;
12836 default:
12837 return(XML_ERR_INTERNAL_ERROR);
12838
12839 }
12840 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12841 (node->type != XML_DOCUMENT_NODE) &&
12842 (node->type != XML_HTML_DOCUMENT_NODE))
12843 node = node->parent;
12844 if (node == NULL)
12845 return(XML_ERR_INTERNAL_ERROR);
12846 if (node->type == XML_ELEMENT_NODE)
12847 doc = node->doc;
12848 else
12849 doc = (xmlDocPtr) node;
12850 if (doc == NULL)
12851 return(XML_ERR_INTERNAL_ERROR);
12852
12853 /*
12854 * allocate a context and set-up everything not related to the
12855 * node position in the tree
12856 */
12857 if (doc->type == XML_DOCUMENT_NODE)
12858 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12859#ifdef LIBXML_HTML_ENABLED
12860 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12861 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12862#endif
12863 else
12864 return(XML_ERR_INTERNAL_ERROR);
12865
12866 if (ctxt == NULL)
12867 return(XML_ERR_NO_MEMORY);
12868 fake = xmlNewComment(NULL);
12869 if (fake == NULL) {
12870 xmlFreeParserCtxt(ctxt);
12871 return(XML_ERR_NO_MEMORY);
12872 }
12873 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012874
12875 /*
12876 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12877 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12878 * we must wait until the last moment to free the original one.
12879 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012880 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012881 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012882 xmlDictFree(ctxt->dict);
12883 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012884 } else
12885 options |= XML_PARSE_NODICT;
12886
Daniel Veillard37334572008-07-31 08:20:02 +000012887 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012888 xmlDetectSAX2(ctxt);
12889 ctxt->myDoc = doc;
12890
12891 if (node->type == XML_ELEMENT_NODE) {
12892 nodePush(ctxt, node);
12893 /*
12894 * initialize the SAX2 namespaces stack
12895 */
12896 cur = node;
12897 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12898 xmlNsPtr ns = cur->nsDef;
12899 const xmlChar *iprefix, *ihref;
12900
12901 while (ns != NULL) {
12902 if (ctxt->dict) {
12903 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12904 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12905 } else {
12906 iprefix = ns->prefix;
12907 ihref = ns->href;
12908 }
12909
12910 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12911 nsPush(ctxt, iprefix, ihref);
12912 nsnr++;
12913 }
12914 ns = ns->next;
12915 }
12916 cur = cur->parent;
12917 }
12918 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012919 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012920
12921 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12922 /*
12923 * ID/IDREF registration will be done in xmlValidateElement below
12924 */
12925 ctxt->loadsubset |= XML_SKIP_IDS;
12926 }
12927
Daniel Veillard499cc922006-01-18 17:22:35 +000012928#ifdef LIBXML_HTML_ENABLED
12929 if (doc->type == XML_HTML_DOCUMENT_NODE)
12930 __htmlParseContent(ctxt);
12931 else
12932#endif
12933 xmlParseContent(ctxt);
12934
Daniel Veillard29b17482004-08-16 00:39:03 +000012935 nsPop(ctxt, nsnr);
12936 if ((RAW == '<') && (NXT(1) == '/')) {
12937 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12938 } else if (RAW != 0) {
12939 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12940 }
12941 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12942 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12943 ctxt->wellFormed = 0;
12944 }
12945
12946 if (!ctxt->wellFormed) {
12947 if (ctxt->errNo == 0)
12948 ret = XML_ERR_INTERNAL_ERROR;
12949 else
12950 ret = (xmlParserErrors)ctxt->errNo;
12951 } else {
12952 ret = XML_ERR_OK;
12953 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012954
Daniel Veillard29b17482004-08-16 00:39:03 +000012955 /*
12956 * Return the newly created nodeset after unlinking it from
12957 * the pseudo sibling.
12958 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012959
Daniel Veillard29b17482004-08-16 00:39:03 +000012960 cur = fake->next;
12961 fake->next = NULL;
12962 node->last = fake;
12963
12964 if (cur != NULL) {
12965 cur->prev = NULL;
12966 }
12967
12968 *lst = cur;
12969
12970 while (cur != NULL) {
12971 cur->parent = NULL;
12972 cur = cur->next;
12973 }
12974
12975 xmlUnlinkNode(fake);
12976 xmlFreeNode(fake);
12977
12978
12979 if (ret != XML_ERR_OK) {
12980 xmlFreeNodeList(*lst);
12981 *lst = NULL;
12982 }
William M. Brackc3f81342004-10-03 01:22:44 +000012983
William M. Brackb7b54de2004-10-06 16:38:01 +000012984 if (doc->dict != NULL)
12985 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012986 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012987
Daniel Veillard29b17482004-08-16 00:39:03 +000012988 return(ret);
12989#else /* !SAX2 */
12990 return(XML_ERR_INTERNAL_ERROR);
12991#endif
12992}
12993
Daniel Veillard81273902003-09-30 00:43:48 +000012994#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012995/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012996 * xmlParseBalancedChunkMemoryRecover:
12997 * @doc: the document the chunk pertains to
12998 * @sax: the SAX handler bloc (possibly NULL)
12999 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13000 * @depth: Used for loop detection, use 0
13001 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13002 * @lst: the return value for the set of parsed nodes
13003 * @recover: return nodes even if the data is broken (use 0)
13004 *
13005 *
13006 * Parse a well-balanced chunk of an XML document
13007 * called by the parser
13008 * The allowed sequence for the Well Balanced Chunk is the one defined by
13009 * the content production in the XML grammar:
13010 *
13011 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13012 *
13013 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13014 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013015 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013016 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013017 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13018 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013019 */
13020int
13021xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013022 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013023 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013024 xmlParserCtxtPtr ctxt;
13025 xmlDocPtr newDoc;
13026 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013027 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013028 int size;
13029 int ret = 0;
13030
Daniel Veillard0161e632008-08-28 15:36:32 +000013031 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013032 return(XML_ERR_ENTITY_LOOP);
13033 }
13034
13035
Daniel Veillardcda96922001-08-21 10:56:31 +000013036 if (lst != NULL)
13037 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013038 if (string == NULL)
13039 return(-1);
13040
13041 size = xmlStrlen(string);
13042
13043 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13044 if (ctxt == NULL) return(-1);
13045 ctxt->userData = ctxt;
13046 if (sax != NULL) {
13047 oldsax = ctxt->sax;
13048 ctxt->sax = sax;
13049 if (user_data != NULL)
13050 ctxt->userData = user_data;
13051 }
13052 newDoc = xmlNewDoc(BAD_CAST "1.0");
13053 if (newDoc == NULL) {
13054 xmlFreeParserCtxt(ctxt);
13055 return(-1);
13056 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013057 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013058 if ((doc != NULL) && (doc->dict != NULL)) {
13059 xmlDictFree(ctxt->dict);
13060 ctxt->dict = doc->dict;
13061 xmlDictReference(ctxt->dict);
13062 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13063 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13064 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13065 ctxt->dictNames = 1;
13066 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013067 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013068 }
Owen Taylor3473f882001-02-23 17:55:21 +000013069 if (doc != NULL) {
13070 newDoc->intSubset = doc->intSubset;
13071 newDoc->extSubset = doc->extSubset;
13072 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013073 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13074 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013075 if (sax != NULL)
13076 ctxt->sax = oldsax;
13077 xmlFreeParserCtxt(ctxt);
13078 newDoc->intSubset = NULL;
13079 newDoc->extSubset = NULL;
13080 xmlFreeDoc(newDoc);
13081 return(-1);
13082 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013083 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13084 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013085 if (doc == NULL) {
13086 ctxt->myDoc = newDoc;
13087 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013088 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013089 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013090 /* Ensure that doc has XML spec namespace */
13091 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13092 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013093 }
13094 ctxt->instate = XML_PARSER_CONTENT;
13095 ctxt->depth = depth;
13096
13097 /*
13098 * Doing validity checking on chunk doesn't make sense
13099 */
13100 ctxt->validate = 0;
13101 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013102 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013103
Daniel Veillardb39bc392002-10-26 19:29:51 +000013104 if ( doc != NULL ){
13105 content = doc->children;
13106 doc->children = NULL;
13107 xmlParseContent(ctxt);
13108 doc->children = content;
13109 }
13110 else {
13111 xmlParseContent(ctxt);
13112 }
Owen Taylor3473f882001-02-23 17:55:21 +000013113 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013114 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013115 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013116 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013117 }
13118 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013119 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013120 }
13121
13122 if (!ctxt->wellFormed) {
13123 if (ctxt->errNo == 0)
13124 ret = 1;
13125 else
13126 ret = ctxt->errNo;
13127 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013128 ret = 0;
13129 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013130
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013131 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13132 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013133
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013134 /*
13135 * Return the newly created nodeset after unlinking it from
13136 * they pseudo parent.
13137 */
13138 cur = newDoc->children->children;
13139 *lst = cur;
13140 while (cur != NULL) {
13141 xmlSetTreeDoc(cur, doc);
13142 cur->parent = NULL;
13143 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013144 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013145 newDoc->children->children = NULL;
13146 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013147
13148 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013149 ctxt->sax = oldsax;
13150 xmlFreeParserCtxt(ctxt);
13151 newDoc->intSubset = NULL;
13152 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013153 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013154 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013155
Owen Taylor3473f882001-02-23 17:55:21 +000013156 return(ret);
13157}
13158
13159/**
13160 * xmlSAXParseEntity:
13161 * @sax: the SAX handler block
13162 * @filename: the filename
13163 *
13164 * parse an XML external entity out of context and build a tree.
13165 * It use the given SAX function block to handle the parsing callback.
13166 * If sax is NULL, fallback to the default DOM tree building routines.
13167 *
13168 * [78] extParsedEnt ::= TextDecl? content
13169 *
13170 * This correspond to a "Well Balanced" chunk
13171 *
13172 * Returns the resulting document tree
13173 */
13174
13175xmlDocPtr
13176xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13177 xmlDocPtr ret;
13178 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013179
13180 ctxt = xmlCreateFileParserCtxt(filename);
13181 if (ctxt == NULL) {
13182 return(NULL);
13183 }
13184 if (sax != NULL) {
13185 if (ctxt->sax != NULL)
13186 xmlFree(ctxt->sax);
13187 ctxt->sax = sax;
13188 ctxt->userData = NULL;
13189 }
13190
Owen Taylor3473f882001-02-23 17:55:21 +000013191 xmlParseExtParsedEnt(ctxt);
13192
13193 if (ctxt->wellFormed)
13194 ret = ctxt->myDoc;
13195 else {
13196 ret = NULL;
13197 xmlFreeDoc(ctxt->myDoc);
13198 ctxt->myDoc = NULL;
13199 }
13200 if (sax != NULL)
13201 ctxt->sax = NULL;
13202 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013203
Owen Taylor3473f882001-02-23 17:55:21 +000013204 return(ret);
13205}
13206
13207/**
13208 * xmlParseEntity:
13209 * @filename: the filename
13210 *
13211 * parse an XML external entity out of context and build a tree.
13212 *
13213 * [78] extParsedEnt ::= TextDecl? content
13214 *
13215 * This correspond to a "Well Balanced" chunk
13216 *
13217 * Returns the resulting document tree
13218 */
13219
13220xmlDocPtr
13221xmlParseEntity(const char *filename) {
13222 return(xmlSAXParseEntity(NULL, filename));
13223}
Daniel Veillard81273902003-09-30 00:43:48 +000013224#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013225
13226/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013227 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013228 * @URL: the entity URL
13229 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013230 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013231 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013232 *
13233 * Create a parser context for an external entity
13234 * Automatic support for ZLIB/Compress compressed document is provided
13235 * by default if found at compile-time.
13236 *
13237 * Returns the new parser context or NULL
13238 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013239static xmlParserCtxtPtr
13240xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13241 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013242 xmlParserCtxtPtr ctxt;
13243 xmlParserInputPtr inputStream;
13244 char *directory = NULL;
13245 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013246
Owen Taylor3473f882001-02-23 17:55:21 +000013247 ctxt = xmlNewParserCtxt();
13248 if (ctxt == NULL) {
13249 return(NULL);
13250 }
13251
Daniel Veillard48247b42009-07-10 16:12:46 +020013252 if (pctx != NULL) {
13253 ctxt->options = pctx->options;
13254 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013255 }
13256
Owen Taylor3473f882001-02-23 17:55:21 +000013257 uri = xmlBuildURI(URL, base);
13258
13259 if (uri == NULL) {
13260 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13261 if (inputStream == NULL) {
13262 xmlFreeParserCtxt(ctxt);
13263 return(NULL);
13264 }
13265
13266 inputPush(ctxt, inputStream);
13267
13268 if ((ctxt->directory == NULL) && (directory == NULL))
13269 directory = xmlParserGetDirectory((char *)URL);
13270 if ((ctxt->directory == NULL) && (directory != NULL))
13271 ctxt->directory = directory;
13272 } else {
13273 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13274 if (inputStream == NULL) {
13275 xmlFree(uri);
13276 xmlFreeParserCtxt(ctxt);
13277 return(NULL);
13278 }
13279
13280 inputPush(ctxt, inputStream);
13281
13282 if ((ctxt->directory == NULL) && (directory == NULL))
13283 directory = xmlParserGetDirectory((char *)uri);
13284 if ((ctxt->directory == NULL) && (directory != NULL))
13285 ctxt->directory = directory;
13286 xmlFree(uri);
13287 }
Owen Taylor3473f882001-02-23 17:55:21 +000013288 return(ctxt);
13289}
13290
Rob Richards9c0aa472009-03-26 18:10:19 +000013291/**
13292 * xmlCreateEntityParserCtxt:
13293 * @URL: the entity URL
13294 * @ID: the entity PUBLIC ID
13295 * @base: a possible base for the target URI
13296 *
13297 * Create a parser context for an external entity
13298 * Automatic support for ZLIB/Compress compressed document is provided
13299 * by default if found at compile-time.
13300 *
13301 * Returns the new parser context or NULL
13302 */
13303xmlParserCtxtPtr
13304xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13305 const xmlChar *base) {
13306 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13307
13308}
13309
Owen Taylor3473f882001-02-23 17:55:21 +000013310/************************************************************************
13311 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013312 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013313 * *
13314 ************************************************************************/
13315
13316/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013317 * xmlCreateURLParserCtxt:
13318 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013319 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013320 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013321 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013322 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013323 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013324 *
13325 * Returns the new parser context or NULL
13326 */
13327xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013328xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013329{
13330 xmlParserCtxtPtr ctxt;
13331 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013332 char *directory = NULL;
13333
Owen Taylor3473f882001-02-23 17:55:21 +000013334 ctxt = xmlNewParserCtxt();
13335 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013336 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013337 return(NULL);
13338 }
13339
Daniel Veillarddf292f72005-01-16 19:00:15 +000013340 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013341 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013342 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013343
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013344 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013345 if (inputStream == NULL) {
13346 xmlFreeParserCtxt(ctxt);
13347 return(NULL);
13348 }
13349
Owen Taylor3473f882001-02-23 17:55:21 +000013350 inputPush(ctxt, inputStream);
13351 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013352 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013353 if ((ctxt->directory == NULL) && (directory != NULL))
13354 ctxt->directory = directory;
13355
13356 return(ctxt);
13357}
13358
Daniel Veillard61b93382003-11-03 14:28:31 +000013359/**
13360 * xmlCreateFileParserCtxt:
13361 * @filename: the filename
13362 *
13363 * Create a parser context for a file content.
13364 * Automatic support for ZLIB/Compress compressed document is provided
13365 * by default if found at compile-time.
13366 *
13367 * Returns the new parser context or NULL
13368 */
13369xmlParserCtxtPtr
13370xmlCreateFileParserCtxt(const char *filename)
13371{
13372 return(xmlCreateURLParserCtxt(filename, 0));
13373}
13374
Daniel Veillard81273902003-09-30 00:43:48 +000013375#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013376/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013377 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013378 * @sax: the SAX handler block
13379 * @filename: the filename
13380 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13381 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013382 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013383 *
13384 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13385 * compressed document is provided by default if found at compile-time.
13386 * It use the given SAX function block to handle the parsing callback.
13387 * If sax is NULL, fallback to the default DOM tree building routines.
13388 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013389 * User data (void *) is stored within the parser context in the
13390 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013391 *
Owen Taylor3473f882001-02-23 17:55:21 +000013392 * Returns the resulting document tree
13393 */
13394
13395xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013396xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13397 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013398 xmlDocPtr ret;
13399 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013400
Daniel Veillard635ef722001-10-29 11:48:19 +000013401 xmlInitParser();
13402
Owen Taylor3473f882001-02-23 17:55:21 +000013403 ctxt = xmlCreateFileParserCtxt(filename);
13404 if (ctxt == NULL) {
13405 return(NULL);
13406 }
13407 if (sax != NULL) {
13408 if (ctxt->sax != NULL)
13409 xmlFree(ctxt->sax);
13410 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013411 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013412 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013413 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013414 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013415 }
Owen Taylor3473f882001-02-23 17:55:21 +000013416
Daniel Veillard37d2d162008-03-14 10:54:00 +000013417 if (ctxt->directory == NULL)
13418 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013419
Daniel Veillarddad3f682002-11-17 16:47:27 +000013420 ctxt->recovery = recovery;
13421
Owen Taylor3473f882001-02-23 17:55:21 +000013422 xmlParseDocument(ctxt);
13423
William M. Brackc07329e2003-09-08 01:57:30 +000013424 if ((ctxt->wellFormed) || recovery) {
13425 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013426 if (ret != NULL) {
13427 if (ctxt->input->buf->compressed > 0)
13428 ret->compression = 9;
13429 else
13430 ret->compression = ctxt->input->buf->compressed;
13431 }
William M. Brackc07329e2003-09-08 01:57:30 +000013432 }
Owen Taylor3473f882001-02-23 17:55:21 +000013433 else {
13434 ret = NULL;
13435 xmlFreeDoc(ctxt->myDoc);
13436 ctxt->myDoc = NULL;
13437 }
13438 if (sax != NULL)
13439 ctxt->sax = NULL;
13440 xmlFreeParserCtxt(ctxt);
13441
13442 return(ret);
13443}
13444
13445/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013446 * xmlSAXParseFile:
13447 * @sax: the SAX handler block
13448 * @filename: the filename
13449 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13450 * documents
13451 *
13452 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13453 * compressed document is provided by default if found at compile-time.
13454 * It use the given SAX function block to handle the parsing callback.
13455 * If sax is NULL, fallback to the default DOM tree building routines.
13456 *
13457 * Returns the resulting document tree
13458 */
13459
13460xmlDocPtr
13461xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13462 int recovery) {
13463 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13464}
13465
13466/**
Owen Taylor3473f882001-02-23 17:55:21 +000013467 * xmlRecoverDoc:
13468 * @cur: a pointer to an array of xmlChar
13469 *
13470 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013471 * In the case the document is not Well Formed, a attempt to build a
13472 * tree is tried anyway
13473 *
13474 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013475 */
13476
13477xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013478xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013479 return(xmlSAXParseDoc(NULL, cur, 1));
13480}
13481
13482/**
13483 * xmlParseFile:
13484 * @filename: the filename
13485 *
13486 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13487 * compressed document is provided by default if found at compile-time.
13488 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013489 * Returns the resulting document tree if the file was wellformed,
13490 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013491 */
13492
13493xmlDocPtr
13494xmlParseFile(const char *filename) {
13495 return(xmlSAXParseFile(NULL, filename, 0));
13496}
13497
13498/**
13499 * xmlRecoverFile:
13500 * @filename: the filename
13501 *
13502 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13503 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013504 * In the case the document is not Well Formed, it attempts to build
13505 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013506 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013507 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013508 */
13509
13510xmlDocPtr
13511xmlRecoverFile(const char *filename) {
13512 return(xmlSAXParseFile(NULL, filename, 1));
13513}
13514
13515
13516/**
13517 * xmlSetupParserForBuffer:
13518 * @ctxt: an XML parser context
13519 * @buffer: a xmlChar * buffer
13520 * @filename: a file name
13521 *
13522 * Setup the parser context to parse a new buffer; Clears any prior
13523 * contents from the parser context. The buffer parameter must not be
13524 * NULL, but the filename parameter can be
13525 */
13526void
13527xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13528 const char* filename)
13529{
13530 xmlParserInputPtr input;
13531
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013532 if ((ctxt == NULL) || (buffer == NULL))
13533 return;
13534
Owen Taylor3473f882001-02-23 17:55:21 +000013535 input = xmlNewInputStream(ctxt);
13536 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013537 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013538 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013539 return;
13540 }
13541
13542 xmlClearParserCtxt(ctxt);
13543 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013544 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013545 input->base = buffer;
13546 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013547 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013548 inputPush(ctxt, input);
13549}
13550
13551/**
13552 * xmlSAXUserParseFile:
13553 * @sax: a SAX handler
13554 * @user_data: The user data returned on SAX callbacks
13555 * @filename: a file name
13556 *
13557 * parse an XML file and call the given SAX handler routines.
13558 * Automatic support for ZLIB/Compress compressed document is provided
13559 *
13560 * Returns 0 in case of success or a error number otherwise
13561 */
13562int
13563xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13564 const char *filename) {
13565 int ret = 0;
13566 xmlParserCtxtPtr ctxt;
13567
13568 ctxt = xmlCreateFileParserCtxt(filename);
13569 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013570 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013571 xmlFree(ctxt->sax);
13572 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013573 xmlDetectSAX2(ctxt);
13574
Owen Taylor3473f882001-02-23 17:55:21 +000013575 if (user_data != NULL)
13576 ctxt->userData = user_data;
13577
13578 xmlParseDocument(ctxt);
13579
13580 if (ctxt->wellFormed)
13581 ret = 0;
13582 else {
13583 if (ctxt->errNo != 0)
13584 ret = ctxt->errNo;
13585 else
13586 ret = -1;
13587 }
13588 if (sax != NULL)
13589 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013590 if (ctxt->myDoc != NULL) {
13591 xmlFreeDoc(ctxt->myDoc);
13592 ctxt->myDoc = NULL;
13593 }
Owen Taylor3473f882001-02-23 17:55:21 +000013594 xmlFreeParserCtxt(ctxt);
13595
13596 return ret;
13597}
Daniel Veillard81273902003-09-30 00:43:48 +000013598#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013599
13600/************************************************************************
13601 * *
13602 * Front ends when parsing from memory *
13603 * *
13604 ************************************************************************/
13605
13606/**
13607 * xmlCreateMemoryParserCtxt:
13608 * @buffer: a pointer to a char array
13609 * @size: the size of the array
13610 *
13611 * Create a parser context for an XML in-memory document.
13612 *
13613 * Returns the new parser context or NULL
13614 */
13615xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013616xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013617 xmlParserCtxtPtr ctxt;
13618 xmlParserInputPtr input;
13619 xmlParserInputBufferPtr buf;
13620
13621 if (buffer == NULL)
13622 return(NULL);
13623 if (size <= 0)
13624 return(NULL);
13625
13626 ctxt = xmlNewParserCtxt();
13627 if (ctxt == NULL)
13628 return(NULL);
13629
Daniel Veillard53350552003-09-18 13:35:51 +000013630 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013631 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013632 if (buf == NULL) {
13633 xmlFreeParserCtxt(ctxt);
13634 return(NULL);
13635 }
Owen Taylor3473f882001-02-23 17:55:21 +000013636
13637 input = xmlNewInputStream(ctxt);
13638 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013639 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013640 xmlFreeParserCtxt(ctxt);
13641 return(NULL);
13642 }
13643
13644 input->filename = NULL;
13645 input->buf = buf;
13646 input->base = input->buf->buffer->content;
13647 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013648 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013649
13650 inputPush(ctxt, input);
13651 return(ctxt);
13652}
13653
Daniel Veillard81273902003-09-30 00:43:48 +000013654#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013655/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013656 * xmlSAXParseMemoryWithData:
13657 * @sax: the SAX handler block
13658 * @buffer: an pointer to a char array
13659 * @size: the size of the array
13660 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13661 * documents
13662 * @data: the userdata
13663 *
13664 * parse an XML in-memory block and use the given SAX function block
13665 * to handle the parsing callback. If sax is NULL, fallback to the default
13666 * DOM tree building routines.
13667 *
13668 * User data (void *) is stored within the parser context in the
13669 * context's _private member, so it is available nearly everywhere in libxml
13670 *
13671 * Returns the resulting document tree
13672 */
13673
13674xmlDocPtr
13675xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13676 int size, int recovery, void *data) {
13677 xmlDocPtr ret;
13678 xmlParserCtxtPtr ctxt;
13679
Daniel Veillardab2a7632009-07-09 08:45:03 +020013680 xmlInitParser();
13681
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013682 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13683 if (ctxt == NULL) return(NULL);
13684 if (sax != NULL) {
13685 if (ctxt->sax != NULL)
13686 xmlFree(ctxt->sax);
13687 ctxt->sax = sax;
13688 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013689 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013690 if (data!=NULL) {
13691 ctxt->_private=data;
13692 }
13693
Daniel Veillardadba5f12003-04-04 16:09:01 +000013694 ctxt->recovery = recovery;
13695
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013696 xmlParseDocument(ctxt);
13697
13698 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13699 else {
13700 ret = NULL;
13701 xmlFreeDoc(ctxt->myDoc);
13702 ctxt->myDoc = NULL;
13703 }
13704 if (sax != NULL)
13705 ctxt->sax = NULL;
13706 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013707
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013708 return(ret);
13709}
13710
13711/**
Owen Taylor3473f882001-02-23 17:55:21 +000013712 * xmlSAXParseMemory:
13713 * @sax: the SAX handler block
13714 * @buffer: an pointer to a char array
13715 * @size: the size of the array
13716 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13717 * documents
13718 *
13719 * parse an XML in-memory block and use the given SAX function block
13720 * to handle the parsing callback. If sax is NULL, fallback to the default
13721 * DOM tree building routines.
13722 *
13723 * Returns the resulting document tree
13724 */
13725xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013726xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13727 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013728 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013729}
13730
13731/**
13732 * xmlParseMemory:
13733 * @buffer: an pointer to a char array
13734 * @size: the size of the array
13735 *
13736 * parse an XML in-memory block and build a tree.
13737 *
13738 * Returns the resulting document tree
13739 */
13740
Daniel Veillard50822cb2001-07-26 20:05:51 +000013741xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013742 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13743}
13744
13745/**
13746 * xmlRecoverMemory:
13747 * @buffer: an pointer to a char array
13748 * @size: the size of the array
13749 *
13750 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013751 * In the case the document is not Well Formed, an attempt to
13752 * build a tree is tried anyway
13753 *
13754 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013755 */
13756
Daniel Veillard50822cb2001-07-26 20:05:51 +000013757xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013758 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13759}
13760
13761/**
13762 * xmlSAXUserParseMemory:
13763 * @sax: a SAX handler
13764 * @user_data: The user data returned on SAX callbacks
13765 * @buffer: an in-memory XML document input
13766 * @size: the length of the XML document in bytes
13767 *
13768 * A better SAX parsing routine.
13769 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013770 *
Owen Taylor3473f882001-02-23 17:55:21 +000013771 * Returns 0 in case of success or a error number otherwise
13772 */
13773int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013774 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013775 int ret = 0;
13776 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013777
13778 xmlInitParser();
13779
Owen Taylor3473f882001-02-23 17:55:21 +000013780 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13781 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013782 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13783 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013784 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013785 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013786
Daniel Veillard30211a02001-04-26 09:33:18 +000013787 if (user_data != NULL)
13788 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013789
Owen Taylor3473f882001-02-23 17:55:21 +000013790 xmlParseDocument(ctxt);
13791
13792 if (ctxt->wellFormed)
13793 ret = 0;
13794 else {
13795 if (ctxt->errNo != 0)
13796 ret = ctxt->errNo;
13797 else
13798 ret = -1;
13799 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013800 if (sax != NULL)
13801 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013802 if (ctxt->myDoc != NULL) {
13803 xmlFreeDoc(ctxt->myDoc);
13804 ctxt->myDoc = NULL;
13805 }
Owen Taylor3473f882001-02-23 17:55:21 +000013806 xmlFreeParserCtxt(ctxt);
13807
13808 return ret;
13809}
Daniel Veillard81273902003-09-30 00:43:48 +000013810#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013811
13812/**
13813 * xmlCreateDocParserCtxt:
13814 * @cur: a pointer to an array of xmlChar
13815 *
13816 * Creates a parser context for an XML in-memory document.
13817 *
13818 * Returns the new parser context or NULL
13819 */
13820xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013821xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013822 int len;
13823
13824 if (cur == NULL)
13825 return(NULL);
13826 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013827 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013828}
13829
Daniel Veillard81273902003-09-30 00:43:48 +000013830#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013831/**
13832 * xmlSAXParseDoc:
13833 * @sax: the SAX handler block
13834 * @cur: a pointer to an array of xmlChar
13835 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13836 * documents
13837 *
13838 * parse an XML in-memory document and build a tree.
13839 * It use the given SAX function block to handle the parsing callback.
13840 * If sax is NULL, fallback to the default DOM tree building routines.
13841 *
13842 * Returns the resulting document tree
13843 */
13844
13845xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013846xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013847 xmlDocPtr ret;
13848 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013849 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013850
Daniel Veillard38936062004-11-04 17:45:11 +000013851 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013852
13853
13854 ctxt = xmlCreateDocParserCtxt(cur);
13855 if (ctxt == NULL) return(NULL);
13856 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013857 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013858 ctxt->sax = sax;
13859 ctxt->userData = NULL;
13860 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013861 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013862
13863 xmlParseDocument(ctxt);
13864 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13865 else {
13866 ret = NULL;
13867 xmlFreeDoc(ctxt->myDoc);
13868 ctxt->myDoc = NULL;
13869 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013870 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013871 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013872 xmlFreeParserCtxt(ctxt);
13873
13874 return(ret);
13875}
13876
13877/**
13878 * xmlParseDoc:
13879 * @cur: a pointer to an array of xmlChar
13880 *
13881 * parse an XML in-memory document and build a tree.
13882 *
13883 * Returns the resulting document tree
13884 */
13885
13886xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013887xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013888 return(xmlSAXParseDoc(NULL, cur, 0));
13889}
Daniel Veillard81273902003-09-30 00:43:48 +000013890#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013891
Daniel Veillard81273902003-09-30 00:43:48 +000013892#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013893/************************************************************************
13894 * *
13895 * Specific function to keep track of entities references *
13896 * and used by the XSLT debugger *
13897 * *
13898 ************************************************************************/
13899
13900static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13901
13902/**
13903 * xmlAddEntityReference:
13904 * @ent : A valid entity
13905 * @firstNode : A valid first node for children of entity
13906 * @lastNode : A valid last node of children entity
13907 *
13908 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13909 */
13910static void
13911xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13912 xmlNodePtr lastNode)
13913{
13914 if (xmlEntityRefFunc != NULL) {
13915 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13916 }
13917}
13918
13919
13920/**
13921 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013922 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013923 *
13924 * Set the function to call call back when a xml reference has been made
13925 */
13926void
13927xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13928{
13929 xmlEntityRefFunc = func;
13930}
Daniel Veillard81273902003-09-30 00:43:48 +000013931#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013932
13933/************************************************************************
13934 * *
13935 * Miscellaneous *
13936 * *
13937 ************************************************************************/
13938
13939#ifdef LIBXML_XPATH_ENABLED
13940#include <libxml/xpath.h>
13941#endif
13942
Daniel Veillardffa3c742005-07-21 13:24:09 +000013943extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013944static int xmlParserInitialized = 0;
13945
13946/**
13947 * xmlInitParser:
13948 *
13949 * Initialization function for the XML parser.
13950 * This is not reentrant. Call once before processing in case of
13951 * use in multithreaded programs.
13952 */
13953
13954void
13955xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013956 if (xmlParserInitialized != 0)
13957 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013958
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013959#ifdef LIBXML_THREAD_ENABLED
13960 __xmlGlobalInitMutexLock();
13961 if (xmlParserInitialized == 0) {
13962#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020013963 xmlInitGlobals();
13964 xmlInitThreads();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013965 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13966 (xmlGenericError == NULL))
13967 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013968 xmlInitMemory();
13969 xmlInitCharEncodingHandlers();
13970 xmlDefaultSAXHandlerInit();
13971 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013972#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013973 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013974#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013975#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013976 htmlInitAutoClose();
13977 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013978#endif
13979#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013980 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013981#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013982 xmlParserInitialized = 1;
13983#ifdef LIBXML_THREAD_ENABLED
13984 }
13985 __xmlGlobalInitMutexUnlock();
13986#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013987}
13988
13989/**
13990 * xmlCleanupParser:
13991 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013992 * This function name is somewhat misleading. It does not clean up
13993 * parser state, it cleans up memory allocated by the library itself.
13994 * It is a cleanup function for the XML library. It tries to reclaim all
13995 * related global memory allocated for the library processing.
13996 * It doesn't deallocate any document related memory. One should
13997 * call xmlCleanupParser() only when the process has finished using
13998 * the library and all XML/HTML documents built with it.
13999 * See also xmlInitParser() which has the opposite function of preparing
14000 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014001 *
14002 * WARNING: if your application is multithreaded or has plugin support
14003 * calling this may crash the application if another thread or
14004 * a plugin is still using libxml2. It's sometimes very hard to
14005 * guess if libxml2 is in use in the application, some libraries
14006 * or plugins may use it without notice. In case of doubt abstain
14007 * from calling this function or do it just before calling exit()
14008 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014009 */
14010
14011void
14012xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014013 if (!xmlParserInitialized)
14014 return;
14015
Owen Taylor3473f882001-02-23 17:55:21 +000014016 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014017#ifdef LIBXML_CATALOG_ENABLED
14018 xmlCatalogCleanup();
14019#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014020 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014021 xmlCleanupInputCallbacks();
14022#ifdef LIBXML_OUTPUT_ENABLED
14023 xmlCleanupOutputCallbacks();
14024#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014025#ifdef LIBXML_SCHEMAS_ENABLED
14026 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014027 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014028#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014029 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014030 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014031 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014032 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014033 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014034}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014035
14036/************************************************************************
14037 * *
14038 * New set (2.6.0) of simpler and more flexible APIs *
14039 * *
14040 ************************************************************************/
14041
14042/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014043 * DICT_FREE:
14044 * @str: a string
14045 *
14046 * Free a string if it is not owned by the "dict" dictionnary in the
14047 * current scope
14048 */
14049#define DICT_FREE(str) \
14050 if ((str) && ((!dict) || \
14051 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14052 xmlFree((char *)(str));
14053
14054/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014055 * xmlCtxtReset:
14056 * @ctxt: an XML parser context
14057 *
14058 * Reset a parser context
14059 */
14060void
14061xmlCtxtReset(xmlParserCtxtPtr ctxt)
14062{
14063 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014064 xmlDictPtr dict;
14065
14066 if (ctxt == NULL)
14067 return;
14068
14069 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014070
14071 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14072 xmlFreeInputStream(input);
14073 }
14074 ctxt->inputNr = 0;
14075 ctxt->input = NULL;
14076
14077 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014078 if (ctxt->spaceTab != NULL) {
14079 ctxt->spaceTab[0] = -1;
14080 ctxt->space = &ctxt->spaceTab[0];
14081 } else {
14082 ctxt->space = NULL;
14083 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014084
14085
14086 ctxt->nodeNr = 0;
14087 ctxt->node = NULL;
14088
14089 ctxt->nameNr = 0;
14090 ctxt->name = NULL;
14091
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014092 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014093 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014094 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014095 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014096 DICT_FREE(ctxt->directory);
14097 ctxt->directory = NULL;
14098 DICT_FREE(ctxt->extSubURI);
14099 ctxt->extSubURI = NULL;
14100 DICT_FREE(ctxt->extSubSystem);
14101 ctxt->extSubSystem = NULL;
14102 if (ctxt->myDoc != NULL)
14103 xmlFreeDoc(ctxt->myDoc);
14104 ctxt->myDoc = NULL;
14105
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014106 ctxt->standalone = -1;
14107 ctxt->hasExternalSubset = 0;
14108 ctxt->hasPErefs = 0;
14109 ctxt->html = 0;
14110 ctxt->external = 0;
14111 ctxt->instate = XML_PARSER_START;
14112 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014113
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014114 ctxt->wellFormed = 1;
14115 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014116 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014117 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014118#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014119 ctxt->vctxt.userData = ctxt;
14120 ctxt->vctxt.error = xmlParserValidityError;
14121 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014122#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014123 ctxt->record_info = 0;
14124 ctxt->nbChars = 0;
14125 ctxt->checkIndex = 0;
14126 ctxt->inSubset = 0;
14127 ctxt->errNo = XML_ERR_OK;
14128 ctxt->depth = 0;
14129 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14130 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014131 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014132 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014133 xmlInitNodeInfoSeq(&ctxt->node_seq);
14134
14135 if (ctxt->attsDefault != NULL) {
14136 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14137 ctxt->attsDefault = NULL;
14138 }
14139 if (ctxt->attsSpecial != NULL) {
14140 xmlHashFree(ctxt->attsSpecial, NULL);
14141 ctxt->attsSpecial = NULL;
14142 }
14143
Daniel Veillard4432df22003-09-28 18:58:27 +000014144#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014145 if (ctxt->catalogs != NULL)
14146 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014147#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014148 if (ctxt->lastError.code != XML_ERR_OK)
14149 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014150}
14151
14152/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014153 * xmlCtxtResetPush:
14154 * @ctxt: an XML parser context
14155 * @chunk: a pointer to an array of chars
14156 * @size: number of chars in the array
14157 * @filename: an optional file name or URI
14158 * @encoding: the document encoding, or NULL
14159 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014160 * Reset a push parser context
14161 *
14162 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014163 */
14164int
14165xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14166 int size, const char *filename, const char *encoding)
14167{
14168 xmlParserInputPtr inputStream;
14169 xmlParserInputBufferPtr buf;
14170 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14171
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014172 if (ctxt == NULL)
14173 return(1);
14174
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014175 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14176 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14177
14178 buf = xmlAllocParserInputBuffer(enc);
14179 if (buf == NULL)
14180 return(1);
14181
14182 if (ctxt == NULL) {
14183 xmlFreeParserInputBuffer(buf);
14184 return(1);
14185 }
14186
14187 xmlCtxtReset(ctxt);
14188
14189 if (ctxt->pushTab == NULL) {
14190 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14191 sizeof(xmlChar *));
14192 if (ctxt->pushTab == NULL) {
14193 xmlErrMemory(ctxt, NULL);
14194 xmlFreeParserInputBuffer(buf);
14195 return(1);
14196 }
14197 }
14198
14199 if (filename == NULL) {
14200 ctxt->directory = NULL;
14201 } else {
14202 ctxt->directory = xmlParserGetDirectory(filename);
14203 }
14204
14205 inputStream = xmlNewInputStream(ctxt);
14206 if (inputStream == NULL) {
14207 xmlFreeParserInputBuffer(buf);
14208 return(1);
14209 }
14210
14211 if (filename == NULL)
14212 inputStream->filename = NULL;
14213 else
14214 inputStream->filename = (char *)
14215 xmlCanonicPath((const xmlChar *) filename);
14216 inputStream->buf = buf;
14217 inputStream->base = inputStream->buf->buffer->content;
14218 inputStream->cur = inputStream->buf->buffer->content;
14219 inputStream->end =
14220 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14221
14222 inputPush(ctxt, inputStream);
14223
14224 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14225 (ctxt->input->buf != NULL)) {
14226 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14227 int cur = ctxt->input->cur - ctxt->input->base;
14228
14229 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14230
14231 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14232 ctxt->input->cur = ctxt->input->base + cur;
14233 ctxt->input->end =
14234 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14235 use];
14236#ifdef DEBUG_PUSH
14237 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14238#endif
14239 }
14240
14241 if (encoding != NULL) {
14242 xmlCharEncodingHandlerPtr hdlr;
14243
Daniel Veillard37334572008-07-31 08:20:02 +000014244 if (ctxt->encoding != NULL)
14245 xmlFree((xmlChar *) ctxt->encoding);
14246 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14247
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014248 hdlr = xmlFindCharEncodingHandler(encoding);
14249 if (hdlr != NULL) {
14250 xmlSwitchToEncoding(ctxt, hdlr);
14251 } else {
14252 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14253 "Unsupported encoding %s\n", BAD_CAST encoding);
14254 }
14255 } else if (enc != XML_CHAR_ENCODING_NONE) {
14256 xmlSwitchEncoding(ctxt, enc);
14257 }
14258
14259 return(0);
14260}
14261
Daniel Veillard37334572008-07-31 08:20:02 +000014262
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014263/**
Daniel Veillard37334572008-07-31 08:20:02 +000014264 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014265 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014266 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014267 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014268 *
14269 * Applies the options to the parser context
14270 *
14271 * Returns 0 in case of success, the set of unknown or unimplemented options
14272 * in case of error.
14273 */
Daniel Veillard37334572008-07-31 08:20:02 +000014274static int
14275xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014276{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014277 if (ctxt == NULL)
14278 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014279 if (encoding != NULL) {
14280 if (ctxt->encoding != NULL)
14281 xmlFree((xmlChar *) ctxt->encoding);
14282 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14283 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014284 if (options & XML_PARSE_RECOVER) {
14285 ctxt->recovery = 1;
14286 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014287 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014288 } else
14289 ctxt->recovery = 0;
14290 if (options & XML_PARSE_DTDLOAD) {
14291 ctxt->loadsubset = XML_DETECT_IDS;
14292 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014293 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014294 } else
14295 ctxt->loadsubset = 0;
14296 if (options & XML_PARSE_DTDATTR) {
14297 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14298 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014299 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014300 }
14301 if (options & XML_PARSE_NOENT) {
14302 ctxt->replaceEntities = 1;
14303 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14304 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014305 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014306 } else
14307 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014308 if (options & XML_PARSE_PEDANTIC) {
14309 ctxt->pedantic = 1;
14310 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014311 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014312 } else
14313 ctxt->pedantic = 0;
14314 if (options & XML_PARSE_NOBLANKS) {
14315 ctxt->keepBlanks = 0;
14316 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14317 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014318 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014319 } else
14320 ctxt->keepBlanks = 1;
14321 if (options & XML_PARSE_DTDVALID) {
14322 ctxt->validate = 1;
14323 if (options & XML_PARSE_NOWARNING)
14324 ctxt->vctxt.warning = NULL;
14325 if (options & XML_PARSE_NOERROR)
14326 ctxt->vctxt.error = NULL;
14327 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014328 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014329 } else
14330 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014331 if (options & XML_PARSE_NOWARNING) {
14332 ctxt->sax->warning = NULL;
14333 options -= XML_PARSE_NOWARNING;
14334 }
14335 if (options & XML_PARSE_NOERROR) {
14336 ctxt->sax->error = NULL;
14337 ctxt->sax->fatalError = NULL;
14338 options -= XML_PARSE_NOERROR;
14339 }
Daniel Veillard81273902003-09-30 00:43:48 +000014340#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014341 if (options & XML_PARSE_SAX1) {
14342 ctxt->sax->startElement = xmlSAX2StartElement;
14343 ctxt->sax->endElement = xmlSAX2EndElement;
14344 ctxt->sax->startElementNs = NULL;
14345 ctxt->sax->endElementNs = NULL;
14346 ctxt->sax->initialized = 1;
14347 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014348 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014349 }
Daniel Veillard81273902003-09-30 00:43:48 +000014350#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014351 if (options & XML_PARSE_NODICT) {
14352 ctxt->dictNames = 0;
14353 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014354 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014355 } else {
14356 ctxt->dictNames = 1;
14357 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014358 if (options & XML_PARSE_NOCDATA) {
14359 ctxt->sax->cdataBlock = NULL;
14360 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014361 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014362 }
14363 if (options & XML_PARSE_NSCLEAN) {
14364 ctxt->options |= XML_PARSE_NSCLEAN;
14365 options -= XML_PARSE_NSCLEAN;
14366 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014367 if (options & XML_PARSE_NONET) {
14368 ctxt->options |= XML_PARSE_NONET;
14369 options -= XML_PARSE_NONET;
14370 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014371 if (options & XML_PARSE_COMPACT) {
14372 ctxt->options |= XML_PARSE_COMPACT;
14373 options -= XML_PARSE_COMPACT;
14374 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014375 if (options & XML_PARSE_OLD10) {
14376 ctxt->options |= XML_PARSE_OLD10;
14377 options -= XML_PARSE_OLD10;
14378 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014379 if (options & XML_PARSE_NOBASEFIX) {
14380 ctxt->options |= XML_PARSE_NOBASEFIX;
14381 options -= XML_PARSE_NOBASEFIX;
14382 }
14383 if (options & XML_PARSE_HUGE) {
14384 ctxt->options |= XML_PARSE_HUGE;
14385 options -= XML_PARSE_HUGE;
14386 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014387 if (options & XML_PARSE_OLDSAX) {
14388 ctxt->options |= XML_PARSE_OLDSAX;
14389 options -= XML_PARSE_OLDSAX;
14390 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014391 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014392 return (options);
14393}
14394
14395/**
Daniel Veillard37334572008-07-31 08:20:02 +000014396 * xmlCtxtUseOptions:
14397 * @ctxt: an XML parser context
14398 * @options: a combination of xmlParserOption
14399 *
14400 * Applies the options to the parser context
14401 *
14402 * Returns 0 in case of success, the set of unknown or unimplemented options
14403 * in case of error.
14404 */
14405int
14406xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14407{
14408 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14409}
14410
14411/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014412 * xmlDoRead:
14413 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014414 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014415 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014416 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014417 * @reuse: keep the context for reuse
14418 *
14419 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014420 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014421 * Returns the resulting document tree or NULL
14422 */
14423static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014424xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14425 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014426{
14427 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014428
14429 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014430 if (encoding != NULL) {
14431 xmlCharEncodingHandlerPtr hdlr;
14432
14433 hdlr = xmlFindCharEncodingHandler(encoding);
14434 if (hdlr != NULL)
14435 xmlSwitchToEncoding(ctxt, hdlr);
14436 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014437 if ((URL != NULL) && (ctxt->input != NULL) &&
14438 (ctxt->input->filename == NULL))
14439 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014440 xmlParseDocument(ctxt);
14441 if ((ctxt->wellFormed) || ctxt->recovery)
14442 ret = ctxt->myDoc;
14443 else {
14444 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014445 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014446 xmlFreeDoc(ctxt->myDoc);
14447 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014448 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014449 ctxt->myDoc = NULL;
14450 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014451 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014452 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014453
14454 return (ret);
14455}
14456
14457/**
14458 * xmlReadDoc:
14459 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014460 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014461 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014462 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014463 *
14464 * parse an XML in-memory document and build a tree.
14465 *
14466 * Returns the resulting document tree
14467 */
14468xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014469xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014470{
14471 xmlParserCtxtPtr ctxt;
14472
14473 if (cur == NULL)
14474 return (NULL);
14475
14476 ctxt = xmlCreateDocParserCtxt(cur);
14477 if (ctxt == NULL)
14478 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014479 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014480}
14481
14482/**
14483 * xmlReadFile:
14484 * @filename: a file or URL
14485 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014486 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014487 *
14488 * parse an XML file from the filesystem or the network.
14489 *
14490 * Returns the resulting document tree
14491 */
14492xmlDocPtr
14493xmlReadFile(const char *filename, const char *encoding, int options)
14494{
14495 xmlParserCtxtPtr ctxt;
14496
Daniel Veillard61b93382003-11-03 14:28:31 +000014497 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014498 if (ctxt == NULL)
14499 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014500 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014501}
14502
14503/**
14504 * xmlReadMemory:
14505 * @buffer: a pointer to a char array
14506 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014507 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014508 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014509 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014510 *
14511 * parse an XML in-memory document and build a tree.
14512 *
14513 * Returns the resulting document tree
14514 */
14515xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014516xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014517{
14518 xmlParserCtxtPtr ctxt;
14519
14520 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14521 if (ctxt == NULL)
14522 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014523 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014524}
14525
14526/**
14527 * xmlReadFd:
14528 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014529 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014530 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014531 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014532 *
14533 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014534 * NOTE that the file descriptor will not be closed when the
14535 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014536 *
14537 * Returns the resulting document tree
14538 */
14539xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014540xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014541{
14542 xmlParserCtxtPtr ctxt;
14543 xmlParserInputBufferPtr input;
14544 xmlParserInputPtr stream;
14545
14546 if (fd < 0)
14547 return (NULL);
14548
14549 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14550 if (input == NULL)
14551 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014552 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014553 ctxt = xmlNewParserCtxt();
14554 if (ctxt == NULL) {
14555 xmlFreeParserInputBuffer(input);
14556 return (NULL);
14557 }
14558 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14559 if (stream == NULL) {
14560 xmlFreeParserInputBuffer(input);
14561 xmlFreeParserCtxt(ctxt);
14562 return (NULL);
14563 }
14564 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014565 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014566}
14567
14568/**
14569 * xmlReadIO:
14570 * @ioread: an I/O read function
14571 * @ioclose: an I/O close function
14572 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014573 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014574 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014575 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014576 *
14577 * parse an XML document from I/O functions and source and build a tree.
14578 *
14579 * Returns the resulting document tree
14580 */
14581xmlDocPtr
14582xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014583 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014584{
14585 xmlParserCtxtPtr ctxt;
14586 xmlParserInputBufferPtr input;
14587 xmlParserInputPtr stream;
14588
14589 if (ioread == NULL)
14590 return (NULL);
14591
14592 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14593 XML_CHAR_ENCODING_NONE);
14594 if (input == NULL)
14595 return (NULL);
14596 ctxt = xmlNewParserCtxt();
14597 if (ctxt == NULL) {
14598 xmlFreeParserInputBuffer(input);
14599 return (NULL);
14600 }
14601 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14602 if (stream == NULL) {
14603 xmlFreeParserInputBuffer(input);
14604 xmlFreeParserCtxt(ctxt);
14605 return (NULL);
14606 }
14607 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014608 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014609}
14610
14611/**
14612 * xmlCtxtReadDoc:
14613 * @ctxt: an XML parser context
14614 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014615 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014616 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014617 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014618 *
14619 * parse an XML in-memory document and build a tree.
14620 * This reuses the existing @ctxt parser context
14621 *
14622 * Returns the resulting document tree
14623 */
14624xmlDocPtr
14625xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014626 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014627{
14628 xmlParserInputPtr stream;
14629
14630 if (cur == NULL)
14631 return (NULL);
14632 if (ctxt == NULL)
14633 return (NULL);
14634
14635 xmlCtxtReset(ctxt);
14636
14637 stream = xmlNewStringInputStream(ctxt, cur);
14638 if (stream == NULL) {
14639 return (NULL);
14640 }
14641 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014642 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014643}
14644
14645/**
14646 * xmlCtxtReadFile:
14647 * @ctxt: an XML parser context
14648 * @filename: a file or URL
14649 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014650 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014651 *
14652 * parse an XML file from the filesystem or the network.
14653 * This reuses the existing @ctxt parser context
14654 *
14655 * Returns the resulting document tree
14656 */
14657xmlDocPtr
14658xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14659 const char *encoding, int options)
14660{
14661 xmlParserInputPtr stream;
14662
14663 if (filename == NULL)
14664 return (NULL);
14665 if (ctxt == NULL)
14666 return (NULL);
14667
14668 xmlCtxtReset(ctxt);
14669
Daniel Veillard29614c72004-11-26 10:47:26 +000014670 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014671 if (stream == NULL) {
14672 return (NULL);
14673 }
14674 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014675 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014676}
14677
14678/**
14679 * xmlCtxtReadMemory:
14680 * @ctxt: an XML parser context
14681 * @buffer: a pointer to a char array
14682 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014683 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014684 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014685 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014686 *
14687 * parse an XML in-memory document and build a tree.
14688 * This reuses the existing @ctxt parser context
14689 *
14690 * Returns the resulting document tree
14691 */
14692xmlDocPtr
14693xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014694 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014695{
14696 xmlParserInputBufferPtr input;
14697 xmlParserInputPtr stream;
14698
14699 if (ctxt == NULL)
14700 return (NULL);
14701 if (buffer == NULL)
14702 return (NULL);
14703
14704 xmlCtxtReset(ctxt);
14705
14706 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14707 if (input == NULL) {
14708 return(NULL);
14709 }
14710
14711 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14712 if (stream == NULL) {
14713 xmlFreeParserInputBuffer(input);
14714 return(NULL);
14715 }
14716
14717 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014718 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014719}
14720
14721/**
14722 * xmlCtxtReadFd:
14723 * @ctxt: an XML parser context
14724 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014725 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014726 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014727 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014728 *
14729 * parse an XML from a file descriptor and build a tree.
14730 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014731 * NOTE that the file descriptor will not be closed when the
14732 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014733 *
14734 * Returns the resulting document tree
14735 */
14736xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014737xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14738 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014739{
14740 xmlParserInputBufferPtr input;
14741 xmlParserInputPtr stream;
14742
14743 if (fd < 0)
14744 return (NULL);
14745 if (ctxt == NULL)
14746 return (NULL);
14747
14748 xmlCtxtReset(ctxt);
14749
14750
14751 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14752 if (input == NULL)
14753 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014754 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014755 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14756 if (stream == NULL) {
14757 xmlFreeParserInputBuffer(input);
14758 return (NULL);
14759 }
14760 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014761 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014762}
14763
14764/**
14765 * xmlCtxtReadIO:
14766 * @ctxt: an XML parser context
14767 * @ioread: an I/O read function
14768 * @ioclose: an I/O close function
14769 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014770 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014771 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014772 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014773 *
14774 * parse an XML document from I/O functions and source and build a tree.
14775 * This reuses the existing @ctxt parser context
14776 *
14777 * Returns the resulting document tree
14778 */
14779xmlDocPtr
14780xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14781 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014782 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014783 const char *encoding, int options)
14784{
14785 xmlParserInputBufferPtr input;
14786 xmlParserInputPtr stream;
14787
14788 if (ioread == NULL)
14789 return (NULL);
14790 if (ctxt == NULL)
14791 return (NULL);
14792
14793 xmlCtxtReset(ctxt);
14794
14795 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14796 XML_CHAR_ENCODING_NONE);
14797 if (input == NULL)
14798 return (NULL);
14799 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14800 if (stream == NULL) {
14801 xmlFreeParserInputBuffer(input);
14802 return (NULL);
14803 }
14804 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014805 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014806}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014807
14808#define bottom_parser
14809#include "elfgcchack.h"