blob: d1c7888a9fcc619e8a2819cba140298e1c93851f [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
Rob Richards9c0aa472009-03-26 18:10:19 +000086static xmlParserCtxtPtr
87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090/************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96#define XML_PARSER_BIG_ENTITY 1000
97#define XML_PARSER_LOT_ENTITY 5000
98
99/*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105#define XML_PARSER_NON_LINEAR 10
106
107/*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116static int
117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119{
Daniel Veillardcba68392008-08-29 12:43:40 +0000120 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175}
176
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000177/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000178 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000179 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000185unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000186
Daniel Veillard0fb18932003-09-07 09:14:37 +0000187
Daniel Veillard0161e632008-08-28 15:36:32 +0000188
189#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000190#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000191#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
Owen Taylor3473f882001-02-23 17:55:21 +0000194/*
Owen Taylor3473f882001-02-23 17:55:21 +0000195 * List of XML prefixed PI allowed by W3C specs
196 */
197
Daniel Veillardb44025c2001-10-11 22:55:55 +0000198static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000199 "xml-stylesheet",
200 NULL
201};
202
Daniel Veillarda07050d2003-10-19 14:46:32 +0000203
Owen Taylor3473f882001-02-23 17:55:21 +0000204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200205static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000207
Daniel Veillard7d515752003-09-26 19:12:37 +0000208static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000211 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000212 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000213
Daniel Veillard37334572008-07-31 08:20:02 +0000214static int
215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000218static void
219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000222
Daniel Veillard7d515752003-09-26 19:12:37 +0000223static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000227static int
228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230/************************************************************************
231 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244static void
245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247{
Daniel Veillard157fee02003-10-31 10:36:03 +0000248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200253
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000254 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 (const char *) localname, NULL, NULL, 0, 0,
258 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000259 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 (const char *) prefix, (const char *) localname,
263 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
264 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000265 if (ctxt != NULL) {
266 ctxt->wellFormed = 0;
267 if (ctxt->recovery == 0)
268 ctxt->disableSAX = 1;
269 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000270}
271
272/**
273 * xmlFatalErr:
274 * @ctxt: an XML parser context
275 * @error: the error number
276 * @extra: extra information string
277 *
278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
279 */
280static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000282{
283 const char *errmsg;
284
Daniel Veillard157fee02003-10-31 10:36:03 +0000285 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286 (ctxt->instate == XML_PARSER_EOF))
287 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000288 switch (error) {
289 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "CharRef: invalid hexadecimal value\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg = "CharRef: invalid decimal value\n";
294 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000295 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000296 errmsg = "CharRef: invalid value\n";
297 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000298 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000299 errmsg = "internal error";
300 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000301 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000302 errmsg = "PEReference at end of document\n";
303 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000304 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000305 errmsg = "PEReference in prolog\n";
306 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000307 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000308 errmsg = "PEReference in epilog\n";
309 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000310 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000311 errmsg = "PEReference: no name\n";
312 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000313 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000314 errmsg = "PEReference: expecting ';'\n";
315 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000316 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000317 errmsg = "Detected an entity reference loop\n";
318 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000319 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000320 errmsg = "EntityValue: \" or ' expected\n";
321 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000322 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000323 errmsg = "PEReferences forbidden in internal subset\n";
324 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000325 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 errmsg = "EntityValue: \" or ' expected\n";
327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000329 errmsg = "AttValue: \" or ' expected\n";
330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 errmsg = "Unescaped '<' not allowed in attributes values\n";
333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "SystemLiteral \" or ' expected\n";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000338 errmsg = "Unfinished System or Public ID \" or ' expected\n";
339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000341 errmsg = "Sequence ']]>' not allowed in content\n";
342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000344 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000347 errmsg = "PUBLIC, the Public Identifier is missing\n";
348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350 errmsg = "Comment must not contain '--' (double-hyphen)\n";
351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000353 errmsg = "xmlParsePI : no target name\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 errmsg = "Invalid PI name\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 errmsg = "NOTATION: Name expected here\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 errmsg = "'>' required to close NOTATION declaration\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 errmsg = "Entity value required\n";
366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 errmsg = "Fragment not allowed";
369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 errmsg = "'(' required to start ATTLIST enumeration\n";
372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 errmsg = "NmToken expected in ATTLIST enumeration\n";
375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377 errmsg = "')' required to finish ATTLIST enumeration\n";
378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000383 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386 errmsg = "ContentDecl : Name or '(' expected\n";
387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000392 errmsg =
393 "PEReference: forbidden within markup decl in internal subset\n";
394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 errmsg = "expected '>'\n";
397 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000398 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 errmsg = "XML conditional section '[' expected\n";
400 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000401 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 errmsg = "Content error in the external subset\n";
403 break;
404 case XML_ERR_CONDSEC_INVALID_KEYWORD:
405 errmsg =
406 "conditional section INCLUDE or IGNORE keyword expected\n";
407 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000408 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000409 errmsg = "XML conditional section not closed\n";
410 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000411 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 errmsg = "Text declaration '<?xml' required\n";
413 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000414 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000415 errmsg = "parsing XML declaration: '?>' expected\n";
416 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000417 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000418 errmsg = "external parsed entities cannot be standalone\n";
419 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000420 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000421 errmsg = "EntityRef: expecting ';'\n";
422 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000423 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000424 errmsg = "DOCTYPE improperly terminated\n";
425 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000426 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 errmsg = "EndTag: '</' not found\n";
428 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000429 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 errmsg = "expected '='\n";
431 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000432 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 errmsg = "String not closed expecting \" or '\n";
434 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000435 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 errmsg = "String not started expecting ' or \"\n";
437 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000438 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 errmsg = "Invalid XML encoding name\n";
440 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000441 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 errmsg = "standalone accepts only 'yes' or 'no'\n";
443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 errmsg = "Document is empty\n";
446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 errmsg = "Extra content at the end of the document\n";
449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 errmsg = "chunk is not well balanced\n";
452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 errmsg = "extra content at the end of well balanced chunk\n";
455 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000456 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000457 errmsg = "Malformed declaration expecting version\n";
458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000460 case:
461 errmsg = "\n";
462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 default:
465 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000466 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000467 if (ctxt != NULL)
468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
471 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000472 if (ctxt != NULL) {
473 ctxt->wellFormed = 0;
474 if (ctxt->recovery == 0)
475 ctxt->disableSAX = 1;
476 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477}
478
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000479/**
480 * xmlFatalErrMsg:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 *
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
486 */
487static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000490{
Daniel Veillard157fee02003-10-31 10:36:03 +0000491 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492 (ctxt->instate == XML_PARSER_EOF))
493 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000494 if (ctxt != NULL)
495 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200497 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000498 if (ctxt != NULL) {
499 ctxt->wellFormed = 0;
500 if (ctxt->recovery == 0)
501 ctxt->disableSAX = 1;
502 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000503}
504
505/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000506 * xmlWarningMsg:
507 * @ctxt: an XML parser context
508 * @error: the error number
509 * @msg: the error message
510 * @str1: extra data
511 * @str2: extra data
512 *
513 * Handle a warning.
514 */
515static void
516xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar *str1, const xmlChar *str2)
518{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000519 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000520
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000526 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200527 if (ctxt != NULL) {
528 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000529 (ctxt->sax) ? ctxt->sax->warning : NULL,
530 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000531 ctxt, NULL, XML_FROM_PARSER, error,
532 XML_ERR_WARNING, NULL, 0,
533 (const char *) str1, (const char *) str2, NULL, 0, 0,
534 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200535 } else {
536 __xmlRaiseError(schannel, NULL, NULL,
537 ctxt, NULL, XML_FROM_PARSER, error,
538 XML_ERR_WARNING, NULL, 0,
539 (const char *) str1, (const char *) str2, NULL, 0, 0,
540 msg, (const char *) str1, (const char *) str2);
541 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000542}
543
544/**
545 * xmlValidityError:
546 * @ctxt: an XML parser context
547 * @error: the error number
548 * @msg: the error message
549 * @str1: extra data
550 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000551 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000552 */
553static void
554xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000555 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000556{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000557 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000558
559 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
560 (ctxt->instate == XML_PARSER_EOF))
561 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000562 if (ctxt != NULL) {
563 ctxt->errNo = error;
564 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
565 schannel = ctxt->sax->serror;
566 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200567 if (ctxt != NULL) {
568 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000569 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000570 ctxt, NULL, XML_FROM_DTD, error,
571 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000572 (const char *) str2, NULL, 0, 0,
573 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000574 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200575 } else {
576 __xmlRaiseError(schannel, NULL, NULL,
577 ctxt, NULL, XML_FROM_DTD, error,
578 XML_ERR_ERROR, NULL, 0, (const char *) str1,
579 (const char *) str2, NULL, 0, 0,
580 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000581 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000582}
583
584/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000585 * xmlFatalErrMsgInt:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @val: an integer value
590 *
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592 */
593static void
594xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000595 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
604 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->wellFormed = 0;
607 if (ctxt->recovery == 0)
608 ctxt->disableSAX = 1;
609 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000610}
611
612/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000613 * xmlFatalErrMsgStrIntStr:
614 * @ctxt: an XML parser context
615 * @error: the error number
616 * @msg: the error message
617 * @str1: an string info
618 * @val: an integer value
619 * @str2: an string info
620 *
621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
622 */
623static void
624xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
625 const char *msg, const xmlChar *str1, int val,
626 const xmlChar *str2)
627{
Daniel Veillard157fee02003-10-31 10:36:03 +0000628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000631 if (ctxt != NULL)
632 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000633 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000634 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
635 NULL, 0, (const char *) str1, (const char *) str2,
636 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000637 if (ctxt != NULL) {
638 ctxt->wellFormed = 0;
639 if (ctxt->recovery == 0)
640 ctxt->disableSAX = 1;
641 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000642}
643
644/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000645 * xmlFatalErrMsgStr:
646 * @ctxt: an XML parser context
647 * @error: the error number
648 * @msg: the error message
649 * @val: a string value
650 *
651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
652 */
653static void
654xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000655 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000656{
Daniel Veillard157fee02003-10-31 10:36:03 +0000657 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
658 (ctxt->instate == XML_PARSER_EOF))
659 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000660 if (ctxt != NULL)
661 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000662 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000663 XML_FROM_PARSER, error, XML_ERR_FATAL,
664 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
665 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000666 if (ctxt != NULL) {
667 ctxt->wellFormed = 0;
668 if (ctxt->recovery == 0)
669 ctxt->disableSAX = 1;
670 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000671}
672
673/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000674 * xmlErrMsgStr:
675 * @ctxt: an XML parser context
676 * @error: the error number
677 * @msg: the error message
678 * @val: a string value
679 *
680 * Handle a non fatal parser error
681 */
682static void
683xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
684 const char *msg, const xmlChar * val)
685{
Daniel Veillard157fee02003-10-31 10:36:03 +0000686 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687 (ctxt->instate == XML_PARSER_EOF))
688 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000689 if (ctxt != NULL)
690 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000691 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000692 XML_FROM_PARSER, error, XML_ERR_ERROR,
693 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
694 val);
695}
696
697/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000698 * xmlNsErr:
699 * @ctxt: an XML parser context
700 * @error: the error number
701 * @msg: the message
702 * @info1: extra information string
703 * @info2: extra information string
704 *
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706 */
707static void
708xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000710 const xmlChar * info1, const xmlChar * info2,
711 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000712{
Daniel Veillard157fee02003-10-31 10:36:03 +0000713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714 (ctxt->instate == XML_PARSER_EOF))
715 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000716 if (ctxt != NULL)
717 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000719 XML_ERR_ERROR, NULL, 0, (const char *) info1,
720 (const char *) info2, (const char *) info3, 0, 0, msg,
721 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000722 if (ctxt != NULL)
723 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000724}
725
Daniel Veillard37334572008-07-31 08:20:02 +0000726/**
727 * xmlNsWarn
728 * @ctxt: an XML parser context
729 * @error: the error number
730 * @msg: the message
731 * @info1: extra information string
732 * @info2: extra information string
733 *
734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
735 */
736static void
737xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
738 const char *msg,
739 const xmlChar * info1, const xmlChar * info2,
740 const xmlChar * info3)
741{
742 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
743 (ctxt->instate == XML_PARSER_EOF))
744 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746 XML_ERR_WARNING, NULL, 0, (const char *) info1,
747 (const char *) info2, (const char *) info3, 0, 0, msg,
748 info1, info2, info3);
749}
750
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000751/************************************************************************
752 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000753 * Library wide options *
754 * *
755 ************************************************************************/
756
757/**
758 * xmlHasFeature:
759 * @feature: the feature to be examined
760 *
761 * Examines if the library has been compiled with a given feature.
762 *
763 * Returns a non-zero value if the feature exist, otherwise zero.
764 * Returns zero (0) if the feature does not exist or an unknown
765 * unknown feature is requested, non-zero otherwise.
766 */
767int
768xmlHasFeature(xmlFeature feature)
769{
770 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_THREAD_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_TREE_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_OUTPUT_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_PUSH_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_READER_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef LIBXML_PATTERN_ENABLED
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_WRITER_ENABLED
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000813 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000814#ifdef LIBXML_SAX1_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000819 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000820#ifdef LIBXML_FTP_ENABLED
821 return(1);
822#else
823 return(0);
824#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000825 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000826#ifdef LIBXML_HTTP_ENABLED
827 return(1);
828#else
829 return(0);
830#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000831 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000832#ifdef LIBXML_VALID_ENABLED
833 return(1);
834#else
835 return(0);
836#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000837 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000838#ifdef LIBXML_HTML_ENABLED
839 return(1);
840#else
841 return(0);
842#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000843 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000844#ifdef LIBXML_LEGACY_ENABLED
845 return(1);
846#else
847 return(0);
848#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000849 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000850#ifdef LIBXML_C14N_ENABLED
851 return(1);
852#else
853 return(0);
854#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000855 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000856#ifdef LIBXML_CATALOG_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000861 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000862#ifdef LIBXML_XPATH_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000867 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000868#ifdef LIBXML_XPTR_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000873 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000874#ifdef LIBXML_XINCLUDE_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000879 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000880#ifdef LIBXML_ICONV_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000885 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000886#ifdef LIBXML_ISO8859X_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000891 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000892#ifdef LIBXML_UNICODE_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000897 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000898#ifdef LIBXML_REGEXP_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000903 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000904#ifdef LIBXML_AUTOMATA_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000909 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000910#ifdef LIBXML_EXPR_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000915 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000916#ifdef LIBXML_SCHEMAS_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000921 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000922#ifdef LIBXML_SCHEMATRON_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000927 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000928#ifdef LIBXML_MODULES_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000933 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000934#ifdef LIBXML_DEBUG_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000939 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940#ifdef DEBUG_MEMORY_LOCATION
941 return(1);
942#else
943 return(0);
944#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000945 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000946#ifdef LIBXML_DEBUG_RUNTIME
947 return(1);
948#else
949 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000950#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000951 case XML_WITH_ZLIB:
952#ifdef LIBXML_ZLIB_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100957 case XML_WITH_ICU:
958#ifdef LIBXML_ICU_ENABLED
959 return(1);
960#else
961 return(0);
962#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000963 default:
964 break;
965 }
966 return(0);
967}
968
969/************************************************************************
970 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000971 * SAX2 defaulted attributes handling *
972 * *
973 ************************************************************************/
974
975/**
976 * xmlDetectSAX2:
977 * @ctxt: an XML parser context
978 *
979 * Do the SAX2 detection and specific intialization
980 */
981static void
982xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
983 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000984#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
986 ((ctxt->sax->startElementNs != NULL) ||
987 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000988#else
989 ctxt->sax2 = 1;
990#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000991
992 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
993 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
994 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000995 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
996 (ctxt->str_xml_ns == NULL)) {
997 xmlErrMemory(ctxt, NULL);
998 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000999}
1000
Daniel Veillarde57ec792003-09-10 10:50:59 +00001001typedef struct _xmlDefAttrs xmlDefAttrs;
1002typedef xmlDefAttrs *xmlDefAttrsPtr;
1003struct _xmlDefAttrs {
1004 int nbAttrs; /* number of defaulted attributes on that element */
1005 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001006 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001007};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001008
1009/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001010 * xmlAttrNormalizeSpace:
1011 * @src: the source string
1012 * @dst: the target string
1013 *
1014 * Normalize the space in non CDATA attribute values:
1015 * If the attribute type is not CDATA, then the XML processor MUST further
1016 * process the normalized attribute value by discarding any leading and
1017 * trailing space (#x20) characters, and by replacing sequences of space
1018 * (#x20) characters by a single space (#x20) character.
1019 * Note that the size of dst need to be at least src, and if one doesn't need
1020 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1021 * passing src as dst is just fine.
1022 *
1023 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1024 * is needed.
1025 */
1026static xmlChar *
1027xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1028{
1029 if ((src == NULL) || (dst == NULL))
1030 return(NULL);
1031
1032 while (*src == 0x20) src++;
1033 while (*src != 0) {
1034 if (*src == 0x20) {
1035 while (*src == 0x20) src++;
1036 if (*src != 0)
1037 *dst++ = 0x20;
1038 } else {
1039 *dst++ = *src++;
1040 }
1041 }
1042 *dst = 0;
1043 if (dst == src)
1044 return(NULL);
1045 return(dst);
1046}
1047
1048/**
1049 * xmlAttrNormalizeSpace2:
1050 * @src: the source string
1051 *
1052 * Normalize the space in non CDATA attribute values, a slightly more complex
1053 * front end to avoid allocation problems when running on attribute values
1054 * coming from the input.
1055 *
1056 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1057 * is needed.
1058 */
1059static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001060xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001061{
1062 int i;
1063 int remove_head = 0;
1064 int need_realloc = 0;
1065 const xmlChar *cur;
1066
1067 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1068 return(NULL);
1069 i = *len;
1070 if (i <= 0)
1071 return(NULL);
1072
1073 cur = src;
1074 while (*cur == 0x20) {
1075 cur++;
1076 remove_head++;
1077 }
1078 while (*cur != 0) {
1079 if (*cur == 0x20) {
1080 cur++;
1081 if ((*cur == 0x20) || (*cur == 0)) {
1082 need_realloc = 1;
1083 break;
1084 }
1085 } else
1086 cur++;
1087 }
1088 if (need_realloc) {
1089 xmlChar *ret;
1090
1091 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1092 if (ret == NULL) {
1093 xmlErrMemory(ctxt, NULL);
1094 return(NULL);
1095 }
1096 xmlAttrNormalizeSpace(ret, ret);
1097 *len = (int) strlen((const char *)ret);
1098 return(ret);
1099 } else if (remove_head) {
1100 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001101 memmove(src, src + remove_head, 1 + *len);
1102 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001103 }
1104 return(NULL);
1105}
1106
1107/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001108 * xmlAddDefAttrs:
1109 * @ctxt: an XML parser context
1110 * @fullname: the element fullname
1111 * @fullattr: the attribute fullname
1112 * @value: the attribute value
1113 *
1114 * Add a defaulted attribute for an element
1115 */
1116static void
1117xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1118 const xmlChar *fullname,
1119 const xmlChar *fullattr,
1120 const xmlChar *value) {
1121 xmlDefAttrsPtr defaults;
1122 int len;
1123 const xmlChar *name;
1124 const xmlChar *prefix;
1125
Daniel Veillard6a31b832008-03-26 14:06:44 +00001126 /*
1127 * Allows to detect attribute redefinitions
1128 */
1129 if (ctxt->attsSpecial != NULL) {
1130 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1131 return;
1132 }
1133
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001135 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001136 if (ctxt->attsDefault == NULL)
1137 goto mem_error;
1138 }
1139
1140 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001141 * split the element name into prefix:localname , the string found
1142 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143 */
1144 name = xmlSplitQName3(fullname, &len);
1145 if (name == NULL) {
1146 name = xmlDictLookup(ctxt->dict, fullname, -1);
1147 prefix = NULL;
1148 } else {
1149 name = xmlDictLookup(ctxt->dict, name, -1);
1150 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1151 }
1152
1153 /*
1154 * make sure there is some storage
1155 */
1156 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1157 if (defaults == NULL) {
1158 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001159 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001160 if (defaults == NULL)
1161 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001163 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001164 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1165 defaults, NULL) < 0) {
1166 xmlFree(defaults);
1167 goto mem_error;
1168 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001169 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001170 xmlDefAttrsPtr temp;
1171
1172 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001173 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001174 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001175 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001176 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001178 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1179 defaults, NULL) < 0) {
1180 xmlFree(defaults);
1181 goto mem_error;
1182 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001183 }
1184
1185 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001186 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001187 * are within the DTD and hen not associated to namespace names.
1188 */
1189 name = xmlSplitQName3(fullattr, &len);
1190 if (name == NULL) {
1191 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1192 prefix = NULL;
1193 } else {
1194 name = xmlDictLookup(ctxt->dict, name, -1);
1195 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1196 }
1197
Daniel Veillardae0765b2008-07-31 19:54:59 +00001198 defaults->values[5 * defaults->nbAttrs] = name;
1199 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001200 /* intern the string and precompute the end */
1201 len = xmlStrlen(value);
1202 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001203 defaults->values[5 * defaults->nbAttrs + 2] = value;
1204 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1205 if (ctxt->external)
1206 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1207 else
1208 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 defaults->nbAttrs++;
1210
1211 return;
1212
1213mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001214 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001215 return;
1216}
1217
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001218/**
1219 * xmlAddSpecialAttr:
1220 * @ctxt: an XML parser context
1221 * @fullname: the element fullname
1222 * @fullattr: the attribute fullname
1223 * @type: the attribute type
1224 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001225 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001226 */
1227static void
1228xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1229 const xmlChar *fullname,
1230 const xmlChar *fullattr,
1231 int type)
1232{
1233 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001234 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001235 if (ctxt->attsSpecial == NULL)
1236 goto mem_error;
1237 }
1238
Daniel Veillardac4118d2008-01-11 05:27:32 +00001239 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1240 return;
1241
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001242 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1243 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001244 return;
1245
1246mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001247 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001248 return;
1249}
1250
Daniel Veillard4432df22003-09-28 18:58:27 +00001251/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001252 * xmlCleanSpecialAttrCallback:
1253 *
1254 * Removes CDATA attributes from the special attribute table
1255 */
1256static void
1257xmlCleanSpecialAttrCallback(void *payload, void *data,
1258 const xmlChar *fullname, const xmlChar *fullattr,
1259 const xmlChar *unused ATTRIBUTE_UNUSED) {
1260 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1261
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001262 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001263 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1264 }
1265}
1266
1267/**
1268 * xmlCleanSpecialAttr:
1269 * @ctxt: an XML parser context
1270 *
1271 * Trim the list of attributes defined to remove all those of type
1272 * CDATA as they are not special. This call should be done when finishing
1273 * to parse the DTD and before starting to parse the document root.
1274 */
1275static void
1276xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1277{
1278 if (ctxt->attsSpecial == NULL)
1279 return;
1280
1281 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1282
1283 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1284 xmlHashFree(ctxt->attsSpecial, NULL);
1285 ctxt->attsSpecial = NULL;
1286 }
1287 return;
1288}
1289
1290/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001291 * xmlCheckLanguageID:
1292 * @lang: pointer to the string value
1293 *
1294 * Checks that the value conforms to the LanguageID production:
1295 *
1296 * NOTE: this is somewhat deprecated, those productions were removed from
1297 * the XML Second edition.
1298 *
1299 * [33] LanguageID ::= Langcode ('-' Subcode)*
1300 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1301 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1302 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1303 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1304 * [38] Subcode ::= ([a-z] | [A-Z])+
1305 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001306 * The current REC reference the sucessors of RFC 1766, currently 5646
1307 *
1308 * http://www.rfc-editor.org/rfc/rfc5646.txt
1309 * langtag = language
1310 * ["-" script]
1311 * ["-" region]
1312 * *("-" variant)
1313 * *("-" extension)
1314 * ["-" privateuse]
1315 * language = 2*3ALPHA ; shortest ISO 639 code
1316 * ["-" extlang] ; sometimes followed by
1317 * ; extended language subtags
1318 * / 4ALPHA ; or reserved for future use
1319 * / 5*8ALPHA ; or registered language subtag
1320 *
1321 * extlang = 3ALPHA ; selected ISO 639 codes
1322 * *2("-" 3ALPHA) ; permanently reserved
1323 *
1324 * script = 4ALPHA ; ISO 15924 code
1325 *
1326 * region = 2ALPHA ; ISO 3166-1 code
1327 * / 3DIGIT ; UN M.49 code
1328 *
1329 * variant = 5*8alphanum ; registered variants
1330 * / (DIGIT 3alphanum)
1331 *
1332 * extension = singleton 1*("-" (2*8alphanum))
1333 *
1334 * ; Single alphanumerics
1335 * ; "x" reserved for private use
1336 * singleton = DIGIT ; 0 - 9
1337 * / %x41-57 ; A - W
1338 * / %x59-5A ; Y - Z
1339 * / %x61-77 ; a - w
1340 * / %x79-7A ; y - z
1341 *
1342 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1343 * The parser below doesn't try to cope with extension or privateuse
1344 * that could be added but that's not interoperable anyway
1345 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001346 * Returns 1 if correct 0 otherwise
1347 **/
1348int
1349xmlCheckLanguageID(const xmlChar * lang)
1350{
Daniel Veillard60587d62010-11-04 15:16:27 +01001351 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001352
1353 if (cur == NULL)
1354 return (0);
1355 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001356 ((cur[0] == 'I') && (cur[1] == '-')) ||
1357 ((cur[0] == 'x') && (cur[1] == '-')) ||
1358 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001359 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001360 * Still allow IANA code and user code which were coming
1361 * from the previous version of the XML-1.0 specification
1362 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001363 */
1364 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001365 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001366 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1367 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001368 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001369 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001370 nxt = cur;
1371 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1372 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1373 nxt++;
1374 if (nxt - cur >= 4) {
1375 /*
1376 * Reserved
1377 */
1378 if ((nxt - cur > 8) || (nxt[0] != 0))
1379 return(0);
1380 return(1);
1381 }
1382 if (nxt - cur < 2)
1383 return(0);
1384 /* we got an ISO 639 code */
1385 if (nxt[0] == 0)
1386 return(1);
1387 if (nxt[0] != '-')
1388 return(0);
1389
1390 nxt++;
1391 cur = nxt;
1392 /* now we can have extlang or script or region or variant */
1393 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1394 goto region_m49;
1395
1396 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1397 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1398 nxt++;
1399 if (nxt - cur == 4)
1400 goto script;
1401 if (nxt - cur == 2)
1402 goto region;
1403 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1404 goto variant;
1405 if (nxt - cur != 3)
1406 return(0);
1407 /* we parsed an extlang */
1408 if (nxt[0] == 0)
1409 return(1);
1410 if (nxt[0] != '-')
1411 return(0);
1412
1413 nxt++;
1414 cur = nxt;
1415 /* now we can have script or region or variant */
1416 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1417 goto region_m49;
1418
1419 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1420 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1421 nxt++;
1422 if (nxt - cur == 2)
1423 goto region;
1424 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1425 goto variant;
1426 if (nxt - cur != 4)
1427 return(0);
1428 /* we parsed a script */
1429script:
1430 if (nxt[0] == 0)
1431 return(1);
1432 if (nxt[0] != '-')
1433 return(0);
1434
1435 nxt++;
1436 cur = nxt;
1437 /* now we can have region or variant */
1438 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1439 goto region_m49;
1440
1441 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1442 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1443 nxt++;
1444
1445 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1446 goto variant;
1447 if (nxt - cur != 2)
1448 return(0);
1449 /* we parsed a region */
1450region:
1451 if (nxt[0] == 0)
1452 return(1);
1453 if (nxt[0] != '-')
1454 return(0);
1455
1456 nxt++;
1457 cur = nxt;
1458 /* now we can just have a variant */
1459 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1460 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1461 nxt++;
1462
1463 if ((nxt - cur < 5) || (nxt - cur > 8))
1464 return(0);
1465
1466 /* we parsed a variant */
1467variant:
1468 if (nxt[0] == 0)
1469 return(1);
1470 if (nxt[0] != '-')
1471 return(0);
1472 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001473 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001474
1475region_m49:
1476 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1477 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1478 nxt += 3;
1479 goto region;
1480 }
1481 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001482}
1483
Owen Taylor3473f882001-02-23 17:55:21 +00001484/************************************************************************
1485 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001486 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001487 * *
1488 ************************************************************************/
1489
Daniel Veillard8ed10722009-08-20 19:17:36 +02001490static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1491 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001492
Daniel Veillard0fb18932003-09-07 09:14:37 +00001493#ifdef SAX2
1494/**
1495 * nsPush:
1496 * @ctxt: an XML parser context
1497 * @prefix: the namespace prefix or NULL
1498 * @URL: the namespace name
1499 *
1500 * Pushes a new parser namespace on top of the ns stack
1501 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001502 * Returns -1 in case of error, -2 if the namespace should be discarded
1503 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001504 */
1505static int
1506nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1507{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001508 if (ctxt->options & XML_PARSE_NSCLEAN) {
1509 int i;
1510 for (i = 0;i < ctxt->nsNr;i += 2) {
1511 if (ctxt->nsTab[i] == prefix) {
1512 /* in scope */
1513 if (ctxt->nsTab[i + 1] == URL)
1514 return(-2);
1515 /* out of scope keep it */
1516 break;
1517 }
1518 }
1519 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001520 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1521 ctxt->nsMax = 10;
1522 ctxt->nsNr = 0;
1523 ctxt->nsTab = (const xmlChar **)
1524 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1525 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001526 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001527 ctxt->nsMax = 0;
1528 return (-1);
1529 }
1530 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001531 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001532 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001533 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1534 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1535 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001536 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001537 ctxt->nsMax /= 2;
1538 return (-1);
1539 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001540 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001541 }
1542 ctxt->nsTab[ctxt->nsNr++] = prefix;
1543 ctxt->nsTab[ctxt->nsNr++] = URL;
1544 return (ctxt->nsNr);
1545}
1546/**
1547 * nsPop:
1548 * @ctxt: an XML parser context
1549 * @nr: the number to pop
1550 *
1551 * Pops the top @nr parser prefix/namespace from the ns stack
1552 *
1553 * Returns the number of namespaces removed
1554 */
1555static int
1556nsPop(xmlParserCtxtPtr ctxt, int nr)
1557{
1558 int i;
1559
1560 if (ctxt->nsTab == NULL) return(0);
1561 if (ctxt->nsNr < nr) {
1562 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1563 nr = ctxt->nsNr;
1564 }
1565 if (ctxt->nsNr <= 0)
1566 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001567
Daniel Veillard0fb18932003-09-07 09:14:37 +00001568 for (i = 0;i < nr;i++) {
1569 ctxt->nsNr--;
1570 ctxt->nsTab[ctxt->nsNr] = NULL;
1571 }
1572 return(nr);
1573}
1574#endif
1575
1576static int
1577xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1578 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001579 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001580 int maxatts;
1581
1582 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001583 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001584 atts = (const xmlChar **)
1585 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001586 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001587 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001588 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1589 if (attallocs == NULL) goto mem_error;
1590 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001591 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001592 } else if (nr + 5 > ctxt->maxatts) {
1593 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001594 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1595 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001596 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001597 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001598 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1599 (maxatts / 5) * sizeof(int));
1600 if (attallocs == NULL) goto mem_error;
1601 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001602 ctxt->maxatts = maxatts;
1603 }
1604 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001605mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001606 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001607 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001608}
1609
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001610/**
1611 * inputPush:
1612 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001613 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001614 *
1615 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001616 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001617 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001618 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001619int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001620inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1621{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001622 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001623 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001624 if (ctxt->inputNr >= ctxt->inputMax) {
1625 ctxt->inputMax *= 2;
1626 ctxt->inputTab =
1627 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1628 ctxt->inputMax *
1629 sizeof(ctxt->inputTab[0]));
1630 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001631 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001632 xmlFreeInputStream(value);
1633 ctxt->inputMax /= 2;
1634 value = NULL;
1635 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001636 }
1637 }
1638 ctxt->inputTab[ctxt->inputNr] = value;
1639 ctxt->input = value;
1640 return (ctxt->inputNr++);
1641}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001642/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001643 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001644 * @ctxt: an XML parser context
1645 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001646 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001647 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001648 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001649 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001650xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001651inputPop(xmlParserCtxtPtr ctxt)
1652{
1653 xmlParserInputPtr ret;
1654
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001655 if (ctxt == NULL)
1656 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001657 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001658 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001659 ctxt->inputNr--;
1660 if (ctxt->inputNr > 0)
1661 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1662 else
1663 ctxt->input = NULL;
1664 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001665 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001666 return (ret);
1667}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001668/**
1669 * nodePush:
1670 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001671 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001672 *
1673 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001674 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001675 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001676 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001677int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001678nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1679{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001680 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001681 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001682 xmlNodePtr *tmp;
1683
1684 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1685 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001687 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001688 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001689 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001690 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001691 ctxt->nodeTab = tmp;
1692 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001693 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001694 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1695 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001696 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001697 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001698 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001699 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001700 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001701 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001702 ctxt->nodeTab[ctxt->nodeNr] = value;
1703 ctxt->node = value;
1704 return (ctxt->nodeNr++);
1705}
Daniel Veillard8915c152008-08-26 13:05:34 +00001706
Daniel Veillard1c732d22002-11-30 11:22:59 +00001707/**
1708 * nodePop:
1709 * @ctxt: an XML parser context
1710 *
1711 * Pops the top element node from the node stack
1712 *
1713 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001714 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001715xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001716nodePop(xmlParserCtxtPtr ctxt)
1717{
1718 xmlNodePtr ret;
1719
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001720 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001721 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001722 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001723 ctxt->nodeNr--;
1724 if (ctxt->nodeNr > 0)
1725 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1726 else
1727 ctxt->node = NULL;
1728 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001729 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730 return (ret);
1731}
Daniel Veillarda2351322004-06-27 12:08:10 +00001732
1733#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001734/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001735 * nameNsPush:
1736 * @ctxt: an XML parser context
1737 * @value: the element name
1738 * @prefix: the element prefix
1739 * @URI: the element namespace name
1740 *
1741 * Pushes a new element name/prefix/URL on top of the name stack
1742 *
1743 * Returns -1 in case of error, the index in the stack otherwise
1744 */
1745static int
1746nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1747 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1748{
1749 if (ctxt->nameNr >= ctxt->nameMax) {
1750 const xmlChar * *tmp;
1751 void **tmp2;
1752 ctxt->nameMax *= 2;
1753 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1754 ctxt->nameMax *
1755 sizeof(ctxt->nameTab[0]));
1756 if (tmp == NULL) {
1757 ctxt->nameMax /= 2;
1758 goto mem_error;
1759 }
1760 ctxt->nameTab = tmp;
1761 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1762 ctxt->nameMax * 3 *
1763 sizeof(ctxt->pushTab[0]));
1764 if (tmp2 == NULL) {
1765 ctxt->nameMax /= 2;
1766 goto mem_error;
1767 }
1768 ctxt->pushTab = tmp2;
1769 }
1770 ctxt->nameTab[ctxt->nameNr] = value;
1771 ctxt->name = value;
1772 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1773 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001774 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001775 return (ctxt->nameNr++);
1776mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001777 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001778 return (-1);
1779}
1780/**
1781 * nameNsPop:
1782 * @ctxt: an XML parser context
1783 *
1784 * Pops the top element/prefix/URI name from the name stack
1785 *
1786 * Returns the name just removed
1787 */
1788static const xmlChar *
1789nameNsPop(xmlParserCtxtPtr ctxt)
1790{
1791 const xmlChar *ret;
1792
1793 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001794 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001795 ctxt->nameNr--;
1796 if (ctxt->nameNr > 0)
1797 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1798 else
1799 ctxt->name = NULL;
1800 ret = ctxt->nameTab[ctxt->nameNr];
1801 ctxt->nameTab[ctxt->nameNr] = NULL;
1802 return (ret);
1803}
Daniel Veillarda2351322004-06-27 12:08:10 +00001804#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001805
1806/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001807 * namePush:
1808 * @ctxt: an XML parser context
1809 * @value: the element name
1810 *
1811 * Pushes a new element name on top of the name stack
1812 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001813 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001814 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001815int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001816namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001817{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001818 if (ctxt == NULL) return (-1);
1819
Daniel Veillard1c732d22002-11-30 11:22:59 +00001820 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001821 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001822 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001823 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001824 ctxt->nameMax *
1825 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001826 if (tmp == NULL) {
1827 ctxt->nameMax /= 2;
1828 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001829 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001830 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001831 }
1832 ctxt->nameTab[ctxt->nameNr] = value;
1833 ctxt->name = value;
1834 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001835mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001836 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001837 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001838}
1839/**
1840 * namePop:
1841 * @ctxt: an XML parser context
1842 *
1843 * Pops the top element name from the name stack
1844 *
1845 * Returns the name just removed
1846 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001847const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001848namePop(xmlParserCtxtPtr ctxt)
1849{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001850 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001851
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001852 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1853 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001854 ctxt->nameNr--;
1855 if (ctxt->nameNr > 0)
1856 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1857 else
1858 ctxt->name = NULL;
1859 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001860 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001861 return (ret);
1862}
Owen Taylor3473f882001-02-23 17:55:21 +00001863
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001864static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001865 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001866 int *tmp;
1867
Owen Taylor3473f882001-02-23 17:55:21 +00001868 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001869 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1870 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1871 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001872 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001873 ctxt->spaceMax /=2;
1874 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001875 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001876 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001877 }
1878 ctxt->spaceTab[ctxt->spaceNr] = val;
1879 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1880 return(ctxt->spaceNr++);
1881}
1882
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001883static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 int ret;
1885 if (ctxt->spaceNr <= 0) return(0);
1886 ctxt->spaceNr--;
1887 if (ctxt->spaceNr > 0)
1888 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1889 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001890 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001891 ret = ctxt->spaceTab[ctxt->spaceNr];
1892 ctxt->spaceTab[ctxt->spaceNr] = -1;
1893 return(ret);
1894}
1895
1896/*
1897 * Macros for accessing the content. Those should be used only by the parser,
1898 * and not exported.
1899 *
1900 * Dirty macros, i.e. one often need to make assumption on the context to
1901 * use them
1902 *
1903 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1904 * To be used with extreme caution since operations consuming
1905 * characters may move the input buffer to a different location !
1906 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1907 * This should be used internally by the parser
1908 * only to compare to ASCII values otherwise it would break when
1909 * running with UTF-8 encoding.
1910 * RAW same as CUR but in the input buffer, bypass any token
1911 * extraction that may have been done
1912 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1913 * to compare on ASCII based substring.
1914 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001915 * strings without newlines within the parser.
1916 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1917 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001918 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1919 *
1920 * NEXT Skip to the next character, this does the proper decoding
1921 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001922 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001923 * CUR_CHAR(l) returns the current unicode character (int), set l
1924 * to the number of xmlChars used for the encoding [0-5].
1925 * CUR_SCHAR same but operate on a string instead of the context
1926 * COPY_BUF copy the current unicode char to the target buffer, increment
1927 * the index
1928 * GROW, SHRINK handling of input buffers
1929 */
1930
Daniel Veillardfdc91562002-07-01 21:52:03 +00001931#define RAW (*ctxt->input->cur)
1932#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001933#define NXT(val) ctxt->input->cur[(val)]
1934#define CUR_PTR ctxt->input->cur
1935
Daniel Veillarda07050d2003-10-19 14:46:32 +00001936#define CMP4( s, c1, c2, c3, c4 ) \
1937 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1938 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1939#define CMP5( s, c1, c2, c3, c4, c5 ) \
1940 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1941#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1942 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1943#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1944 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1945#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1946 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1947#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1948 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1949 ((unsigned char *) s)[ 8 ] == c9 )
1950#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1951 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1952 ((unsigned char *) s)[ 9 ] == c10 )
1953
Owen Taylor3473f882001-02-23 17:55:21 +00001954#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001955 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001956 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001957 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001958 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1959 xmlPopInput(ctxt); \
1960 } while (0)
1961
Daniel Veillard0b787f32004-03-26 17:29:53 +00001962#define SKIPL(val) do { \
1963 int skipl; \
1964 for(skipl=0; skipl<val; skipl++) { \
1965 if (*(ctxt->input->cur) == '\n') { \
1966 ctxt->input->line++; ctxt->input->col = 1; \
1967 } else ctxt->input->col++; \
1968 ctxt->nbChars++; \
1969 ctxt->input->cur++; \
1970 } \
1971 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1972 if ((*ctxt->input->cur == 0) && \
1973 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1974 xmlPopInput(ctxt); \
1975 } while (0)
1976
Daniel Veillarda880b122003-04-21 21:36:41 +00001977#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001978 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1979 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001980 xmlSHRINK (ctxt);
1981
1982static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1983 xmlParserInputShrink(ctxt->input);
1984 if ((*ctxt->input->cur == 0) &&
1985 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1986 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001987 }
Owen Taylor3473f882001-02-23 17:55:21 +00001988
Daniel Veillarda880b122003-04-21 21:36:41 +00001989#define GROW if ((ctxt->progressive == 0) && \
1990 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001991 xmlGROW (ctxt);
1992
1993static void xmlGROW (xmlParserCtxtPtr ctxt) {
1994 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01001995 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00001996 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1997 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001998}
Owen Taylor3473f882001-02-23 17:55:21 +00001999
2000#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2001
2002#define NEXT xmlNextChar(ctxt)
2003
Daniel Veillard21a0f912001-02-25 19:54:14 +00002004#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002005 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002006 ctxt->input->cur++; \
2007 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002008 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002009 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2010 }
2011
Owen Taylor3473f882001-02-23 17:55:21 +00002012#define NEXTL(l) do { \
2013 if (*(ctxt->input->cur) == '\n') { \
2014 ctxt->input->line++; ctxt->input->col = 1; \
2015 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002016 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002017 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002018 } while (0)
2019
2020#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2021#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2022
2023#define COPY_BUF(l,b,i,v) \
2024 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002025 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002026
2027/**
2028 * xmlSkipBlankChars:
2029 * @ctxt: the XML parser context
2030 *
2031 * skip all blanks character found at that point in the input streams.
2032 * It pops up finished entities in the process if allowable at that point.
2033 *
2034 * Returns the number of space chars skipped
2035 */
2036
2037int
2038xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002039 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002040
2041 /*
2042 * It's Okay to use CUR/NEXT here since all the blanks are on
2043 * the ASCII range.
2044 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002045 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2046 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002047 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002048 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002049 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002050 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002051 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002052 if (*cur == '\n') {
2053 ctxt->input->line++; ctxt->input->col = 1;
2054 }
2055 cur++;
2056 res++;
2057 if (*cur == 0) {
2058 ctxt->input->cur = cur;
2059 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2060 cur = ctxt->input->cur;
2061 }
2062 }
2063 ctxt->input->cur = cur;
2064 } else {
2065 int cur;
2066 do {
2067 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002068 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002069 NEXT;
2070 cur = CUR;
2071 res++;
2072 }
2073 while ((cur == 0) && (ctxt->inputNr > 1) &&
2074 (ctxt->instate != XML_PARSER_COMMENT)) {
2075 xmlPopInput(ctxt);
2076 cur = CUR;
2077 }
2078 /*
2079 * Need to handle support of entities branching here
2080 */
2081 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2082 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2083 }
Owen Taylor3473f882001-02-23 17:55:21 +00002084 return(res);
2085}
2086
2087/************************************************************************
2088 * *
2089 * Commodity functions to handle entities *
2090 * *
2091 ************************************************************************/
2092
2093/**
2094 * xmlPopInput:
2095 * @ctxt: an XML parser context
2096 *
2097 * xmlPopInput: the current input pointed by ctxt->input came to an end
2098 * pop it and return the next char.
2099 *
2100 * Returns the current xmlChar in the parser context
2101 */
2102xmlChar
2103xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002104 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002105 if (xmlParserDebugEntities)
2106 xmlGenericError(xmlGenericErrorContext,
2107 "Popping input %d\n", ctxt->inputNr);
2108 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002109 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002110 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2111 return(xmlPopInput(ctxt));
2112 return(CUR);
2113}
2114
2115/**
2116 * xmlPushInput:
2117 * @ctxt: an XML parser context
2118 * @input: an XML parser input fragment (entity, XML fragment ...).
2119 *
2120 * xmlPushInput: switch to a new input stream which is stacked on top
2121 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002122 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002123 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002124int
Owen Taylor3473f882001-02-23 17:55:21 +00002125xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002126 int ret;
2127 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002128
2129 if (xmlParserDebugEntities) {
2130 if ((ctxt->input != NULL) && (ctxt->input->filename))
2131 xmlGenericError(xmlGenericErrorContext,
2132 "%s(%d): ", ctxt->input->filename,
2133 ctxt->input->line);
2134 xmlGenericError(xmlGenericErrorContext,
2135 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2136 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002137 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002138 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002139 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002140}
2141
2142/**
2143 * xmlParseCharRef:
2144 * @ctxt: an XML parser context
2145 *
2146 * parse Reference declarations
2147 *
2148 * [66] CharRef ::= '&#' [0-9]+ ';' |
2149 * '&#x' [0-9a-fA-F]+ ';'
2150 *
2151 * [ WFC: Legal Character ]
2152 * Characters referred to using character references must match the
2153 * production for Char.
2154 *
2155 * Returns the value parsed (as an int), 0 in case of error
2156 */
2157int
2158xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002159 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002160 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002161 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002162
Owen Taylor3473f882001-02-23 17:55:21 +00002163 /*
2164 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2165 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002166 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002167 (NXT(2) == 'x')) {
2168 SKIP(3);
2169 GROW;
2170 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002171 if (count++ > 20) {
2172 count = 0;
2173 GROW;
2174 }
2175 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002176 val = val * 16 + (CUR - '0');
2177 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2178 val = val * 16 + (CUR - 'a') + 10;
2179 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2180 val = val * 16 + (CUR - 'A') + 10;
2181 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002182 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002183 val = 0;
2184 break;
2185 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002186 if (val > 0x10FFFF)
2187 outofrange = val;
2188
Owen Taylor3473f882001-02-23 17:55:21 +00002189 NEXT;
2190 count++;
2191 }
2192 if (RAW == ';') {
2193 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002194 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002195 ctxt->nbChars ++;
2196 ctxt->input->cur++;
2197 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002198 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002199 SKIP(2);
2200 GROW;
2201 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002202 if (count++ > 20) {
2203 count = 0;
2204 GROW;
2205 }
2206 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002207 val = val * 10 + (CUR - '0');
2208 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002209 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002210 val = 0;
2211 break;
2212 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002213 if (val > 0x10FFFF)
2214 outofrange = val;
2215
Owen Taylor3473f882001-02-23 17:55:21 +00002216 NEXT;
2217 count++;
2218 }
2219 if (RAW == ';') {
2220 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002221 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002222 ctxt->nbChars ++;
2223 ctxt->input->cur++;
2224 }
2225 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002226 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002227 }
2228
2229 /*
2230 * [ WFC: Legal Character ]
2231 * Characters referred to using character references must match the
2232 * production for Char.
2233 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002234 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002235 return(val);
2236 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002237 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2238 "xmlParseCharRef: invalid xmlChar value %d\n",
2239 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002240 }
2241 return(0);
2242}
2243
2244/**
2245 * xmlParseStringCharRef:
2246 * @ctxt: an XML parser context
2247 * @str: a pointer to an index in the string
2248 *
2249 * parse Reference declarations, variant parsing from a string rather
2250 * than an an input flow.
2251 *
2252 * [66] CharRef ::= '&#' [0-9]+ ';' |
2253 * '&#x' [0-9a-fA-F]+ ';'
2254 *
2255 * [ WFC: Legal Character ]
2256 * Characters referred to using character references must match the
2257 * production for Char.
2258 *
2259 * Returns the value parsed (as an int), 0 in case of error, str will be
2260 * updated to the current value of the index
2261 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002262static int
Owen Taylor3473f882001-02-23 17:55:21 +00002263xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2264 const xmlChar *ptr;
2265 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002266 unsigned int val = 0;
2267 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002268
2269 if ((str == NULL) || (*str == NULL)) return(0);
2270 ptr = *str;
2271 cur = *ptr;
2272 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2273 ptr += 3;
2274 cur = *ptr;
2275 while (cur != ';') { /* Non input consuming loop */
2276 if ((cur >= '0') && (cur <= '9'))
2277 val = val * 16 + (cur - '0');
2278 else if ((cur >= 'a') && (cur <= 'f'))
2279 val = val * 16 + (cur - 'a') + 10;
2280 else if ((cur >= 'A') && (cur <= 'F'))
2281 val = val * 16 + (cur - 'A') + 10;
2282 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002283 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002284 val = 0;
2285 break;
2286 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002287 if (val > 0x10FFFF)
2288 outofrange = val;
2289
Owen Taylor3473f882001-02-23 17:55:21 +00002290 ptr++;
2291 cur = *ptr;
2292 }
2293 if (cur == ';')
2294 ptr++;
2295 } else if ((cur == '&') && (ptr[1] == '#')){
2296 ptr += 2;
2297 cur = *ptr;
2298 while (cur != ';') { /* Non input consuming loops */
2299 if ((cur >= '0') && (cur <= '9'))
2300 val = val * 10 + (cur - '0');
2301 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002302 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002303 val = 0;
2304 break;
2305 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002306 if (val > 0x10FFFF)
2307 outofrange = val;
2308
Owen Taylor3473f882001-02-23 17:55:21 +00002309 ptr++;
2310 cur = *ptr;
2311 }
2312 if (cur == ';')
2313 ptr++;
2314 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002315 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002316 return(0);
2317 }
2318 *str = ptr;
2319
2320 /*
2321 * [ WFC: Legal Character ]
2322 * Characters referred to using character references must match the
2323 * production for Char.
2324 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002325 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002326 return(val);
2327 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002328 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2329 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2330 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002331 }
2332 return(0);
2333}
2334
2335/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002336 * xmlNewBlanksWrapperInputStream:
2337 * @ctxt: an XML parser context
2338 * @entity: an Entity pointer
2339 *
2340 * Create a new input stream for wrapping
2341 * blanks around a PEReference
2342 *
2343 * Returns the new input stream or NULL
2344 */
2345
2346static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2347
Daniel Veillardf4862f02002-09-10 11:13:43 +00002348static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002349xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2350 xmlParserInputPtr input;
2351 xmlChar *buffer;
2352 size_t length;
2353 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002354 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2355 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002356 return(NULL);
2357 }
2358 if (xmlParserDebugEntities)
2359 xmlGenericError(xmlGenericErrorContext,
2360 "new blanks wrapper for entity: %s\n", entity->name);
2361 input = xmlNewInputStream(ctxt);
2362 if (input == NULL) {
2363 return(NULL);
2364 }
2365 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002366 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002367 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002368 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002369 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002370 return(NULL);
2371 }
2372 buffer [0] = ' ';
2373 buffer [1] = '%';
2374 buffer [length-3] = ';';
2375 buffer [length-2] = ' ';
2376 buffer [length-1] = 0;
2377 memcpy(buffer + 2, entity->name, length - 5);
2378 input->free = deallocblankswrapper;
2379 input->base = buffer;
2380 input->cur = buffer;
2381 input->length = length;
2382 input->end = &buffer[length];
2383 return(input);
2384}
2385
2386/**
Owen Taylor3473f882001-02-23 17:55:21 +00002387 * xmlParserHandlePEReference:
2388 * @ctxt: the parser context
2389 *
2390 * [69] PEReference ::= '%' Name ';'
2391 *
2392 * [ WFC: No Recursion ]
2393 * A parsed entity must not contain a recursive
2394 * reference to itself, either directly or indirectly.
2395 *
2396 * [ WFC: Entity Declared ]
2397 * In a document without any DTD, a document with only an internal DTD
2398 * subset which contains no parameter entity references, or a document
2399 * with "standalone='yes'", ... ... The declaration of a parameter
2400 * entity must precede any reference to it...
2401 *
2402 * [ VC: Entity Declared ]
2403 * In a document with an external subset or external parameter entities
2404 * with "standalone='no'", ... ... The declaration of a parameter entity
2405 * must precede any reference to it...
2406 *
2407 * [ WFC: In DTD ]
2408 * Parameter-entity references may only appear in the DTD.
2409 * NOTE: misleading but this is handled.
2410 *
2411 * A PEReference may have been detected in the current input stream
2412 * the handling is done accordingly to
2413 * http://www.w3.org/TR/REC-xml#entproc
2414 * i.e.
2415 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002416 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002417 */
2418void
2419xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002420 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002421 xmlEntityPtr entity = NULL;
2422 xmlParserInputPtr input;
2423
Owen Taylor3473f882001-02-23 17:55:21 +00002424 if (RAW != '%') return;
2425 switch(ctxt->instate) {
2426 case XML_PARSER_CDATA_SECTION:
2427 return;
2428 case XML_PARSER_COMMENT:
2429 return;
2430 case XML_PARSER_START_TAG:
2431 return;
2432 case XML_PARSER_END_TAG:
2433 return;
2434 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002435 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002436 return;
2437 case XML_PARSER_PROLOG:
2438 case XML_PARSER_START:
2439 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002440 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002441 return;
2442 case XML_PARSER_ENTITY_DECL:
2443 case XML_PARSER_CONTENT:
2444 case XML_PARSER_ATTRIBUTE_VALUE:
2445 case XML_PARSER_PI:
2446 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002447 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002448 /* we just ignore it there */
2449 return;
2450 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002451 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002452 return;
2453 case XML_PARSER_ENTITY_VALUE:
2454 /*
2455 * NOTE: in the case of entity values, we don't do the
2456 * substitution here since we need the literal
2457 * entity value to be able to save the internal
2458 * subset of the document.
2459 * This will be handled by xmlStringDecodeEntities
2460 */
2461 return;
2462 case XML_PARSER_DTD:
2463 /*
2464 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2465 * In the internal DTD subset, parameter-entity references
2466 * can occur only where markup declarations can occur, not
2467 * within markup declarations.
2468 * In that case this is handled in xmlParseMarkupDecl
2469 */
2470 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2471 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002472 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002473 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002474 break;
2475 case XML_PARSER_IGNORE:
2476 return;
2477 }
2478
2479 NEXT;
2480 name = xmlParseName(ctxt);
2481 if (xmlParserDebugEntities)
2482 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002483 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002484 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002485 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002486 } else {
2487 if (RAW == ';') {
2488 NEXT;
2489 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2490 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2491 if (entity == NULL) {
2492
2493 /*
2494 * [ WFC: Entity Declared ]
2495 * In a document without any DTD, a document with only an
2496 * internal DTD subset which contains no parameter entity
2497 * references, or a document with "standalone='yes'", ...
2498 * ... The declaration of a parameter entity must precede
2499 * any reference to it...
2500 */
2501 if ((ctxt->standalone == 1) ||
2502 ((ctxt->hasExternalSubset == 0) &&
2503 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002504 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002505 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002506 } else {
2507 /*
2508 * [ VC: Entity Declared ]
2509 * In a document with an external subset or external
2510 * parameter entities with "standalone='no'", ...
2511 * ... The declaration of a parameter entity must precede
2512 * any reference to it...
2513 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002514 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2515 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2516 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002517 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002518 } else
2519 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2520 "PEReference: %%%s; not found\n",
2521 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002522 ctxt->valid = 0;
2523 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002524 } else if (ctxt->input->free != deallocblankswrapper) {
2525 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002526 if (xmlPushInput(ctxt, input) < 0)
2527 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002528 } else {
2529 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2530 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002531 xmlChar start[4];
2532 xmlCharEncoding enc;
2533
Owen Taylor3473f882001-02-23 17:55:21 +00002534 /*
2535 * handle the extra spaces added before and after
2536 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002537 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002538 */
2539 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002540 if (xmlPushInput(ctxt, input) < 0)
2541 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002542
2543 /*
2544 * Get the 4 first bytes and decode the charset
2545 * if enc != XML_CHAR_ENCODING_NONE
2546 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002547 * Note that, since we may have some non-UTF8
2548 * encoding (like UTF16, bug 135229), the 'length'
2549 * is not known, but we can calculate based upon
2550 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002551 */
2552 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002553 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002554 start[0] = RAW;
2555 start[1] = NXT(1);
2556 start[2] = NXT(2);
2557 start[3] = NXT(3);
2558 enc = xmlDetectCharEncoding(start, 4);
2559 if (enc != XML_CHAR_ENCODING_NONE) {
2560 xmlSwitchEncoding(ctxt, enc);
2561 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002562 }
2563
Owen Taylor3473f882001-02-23 17:55:21 +00002564 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002565 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2566 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002567 xmlParseTextDecl(ctxt);
2568 }
Owen Taylor3473f882001-02-23 17:55:21 +00002569 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002570 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2571 "PEReference: %s is not a parameter entity\n",
2572 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002573 }
2574 }
2575 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002576 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002577 }
Owen Taylor3473f882001-02-23 17:55:21 +00002578 }
2579}
2580
2581/*
2582 * Macro used to grow the current buffer.
2583 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002584#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002585 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002586 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002587 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002588 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002589 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002590 if (tmp == NULL) goto mem_error; \
2591 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002592}
2593
2594/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002595 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002596 * @ctxt: the parser context
2597 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002598 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002599 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2600 * @end: an end marker xmlChar, 0 if none
2601 * @end2: an end marker xmlChar, 0 if none
2602 * @end3: an end marker xmlChar, 0 if none
2603 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002604 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002605 *
2606 * [67] Reference ::= EntityRef | CharRef
2607 *
2608 * [69] PEReference ::= '%' Name ';'
2609 *
2610 * Returns A newly allocated string with the substitution done. The caller
2611 * must deallocate it !
2612 */
2613xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002614xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002616 xmlChar *buffer = NULL;
2617 int buffer_size = 0;
2618
2619 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002620 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002621 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002622 xmlEntityPtr ent;
2623 int c,l;
2624 int nbchars = 0;
2625
Daniel Veillarda82b1822004-11-08 16:24:57 +00002626 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002627 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002628 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002629
Daniel Veillard0161e632008-08-28 15:36:32 +00002630 if (((ctxt->depth > 40) &&
2631 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2632 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002633 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002634 return(NULL);
2635 }
2636
2637 /*
2638 * allocate a translation buffer.
2639 */
2640 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002641 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002642 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002643
2644 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002645 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002646 * we are operating on already parsed values.
2647 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002648 if (str < last)
2649 c = CUR_SCHAR(str, l);
2650 else
2651 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002652 while ((c != 0) && (c != end) && /* non input consuming loop */
2653 (c != end2) && (c != end3)) {
2654
2655 if (c == 0) break;
2656 if ((c == '&') && (str[1] == '#')) {
2657 int val = xmlParseStringCharRef(ctxt, &str);
2658 if (val != 0) {
2659 COPY_BUF(0,buffer,nbchars,val);
2660 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002661 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002662 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002663 }
Owen Taylor3473f882001-02-23 17:55:21 +00002664 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2665 if (xmlParserDebugEntities)
2666 xmlGenericError(xmlGenericErrorContext,
2667 "String decoding Entity Reference: %.30s\n",
2668 str);
2669 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002670 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2671 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002672 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002673 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002674 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002675 if ((ent != NULL) &&
2676 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2677 if (ent->content != NULL) {
2678 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002679 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002680 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002681 }
Owen Taylor3473f882001-02-23 17:55:21 +00002682 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002683 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2684 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002685 }
2686 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002687 ctxt->depth++;
2688 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2689 0, 0, 0);
2690 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002691
Owen Taylor3473f882001-02-23 17:55:21 +00002692 if (rep != NULL) {
2693 current = rep;
2694 while (*current != 0) { /* non input consuming loop */
2695 buffer[nbchars++] = *current++;
2696 if (nbchars >
2697 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002698 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2699 goto int_error;
2700 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002701 }
2702 }
2703 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002704 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002705 }
2706 } else if (ent != NULL) {
2707 int i = xmlStrlen(ent->name);
2708 const xmlChar *cur = ent->name;
2709
2710 buffer[nbchars++] = '&';
2711 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002712 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002713 }
2714 for (;i > 0;i--)
2715 buffer[nbchars++] = *cur++;
2716 buffer[nbchars++] = ';';
2717 }
2718 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2719 if (xmlParserDebugEntities)
2720 xmlGenericError(xmlGenericErrorContext,
2721 "String decoding PE Reference: %.30s\n", str);
2722 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002723 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2724 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002725 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002726 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002727 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002728 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002729 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002730 }
Owen Taylor3473f882001-02-23 17:55:21 +00002731 ctxt->depth++;
2732 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2733 0, 0, 0);
2734 ctxt->depth--;
2735 if (rep != NULL) {
2736 current = rep;
2737 while (*current != 0) { /* non input consuming loop */
2738 buffer[nbchars++] = *current++;
2739 if (nbchars >
2740 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002741 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2742 goto int_error;
2743 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002744 }
2745 }
2746 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002747 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002748 }
2749 }
2750 } else {
2751 COPY_BUF(l,buffer,nbchars,c);
2752 str += l;
2753 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002754 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002755 }
2756 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002757 if (str < last)
2758 c = CUR_SCHAR(str, l);
2759 else
2760 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002761 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002762 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002763 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002764
2765mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002766 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002767int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002768 if (rep != NULL)
2769 xmlFree(rep);
2770 if (buffer != NULL)
2771 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002772 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002773}
2774
Daniel Veillarde57ec792003-09-10 10:50:59 +00002775/**
2776 * xmlStringDecodeEntities:
2777 * @ctxt: the parser context
2778 * @str: the input string
2779 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2780 * @end: an end marker xmlChar, 0 if none
2781 * @end2: an end marker xmlChar, 0 if none
2782 * @end3: an end marker xmlChar, 0 if none
2783 *
2784 * Takes a entity string content and process to do the adequate substitutions.
2785 *
2786 * [67] Reference ::= EntityRef | CharRef
2787 *
2788 * [69] PEReference ::= '%' Name ';'
2789 *
2790 * Returns A newly allocated string with the substitution done. The caller
2791 * must deallocate it !
2792 */
2793xmlChar *
2794xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2795 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002796 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002797 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2798 end, end2, end3));
2799}
Owen Taylor3473f882001-02-23 17:55:21 +00002800
2801/************************************************************************
2802 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002803 * Commodity functions, cleanup needed ? *
2804 * *
2805 ************************************************************************/
2806
2807/**
2808 * areBlanks:
2809 * @ctxt: an XML parser context
2810 * @str: a xmlChar *
2811 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002812 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002813 *
2814 * Is this a sequence of blank chars that one can ignore ?
2815 *
2816 * Returns 1 if ignorable 0 otherwise.
2817 */
2818
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002819static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2820 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002821 int i, ret;
2822 xmlNodePtr lastChild;
2823
Daniel Veillard05c13a22001-09-09 08:38:09 +00002824 /*
2825 * Don't spend time trying to differentiate them, the same callback is
2826 * used !
2827 */
2828 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002829 return(0);
2830
Owen Taylor3473f882001-02-23 17:55:21 +00002831 /*
2832 * Check for xml:space value.
2833 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002834 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2835 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002836 return(0);
2837
2838 /*
2839 * Check that the string is made of blanks
2840 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002841 if (blank_chars == 0) {
2842 for (i = 0;i < len;i++)
2843 if (!(IS_BLANK_CH(str[i]))) return(0);
2844 }
Owen Taylor3473f882001-02-23 17:55:21 +00002845
2846 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002847 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002848 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002849 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002850 if (ctxt->myDoc != NULL) {
2851 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2852 if (ret == 0) return(1);
2853 if (ret == 1) return(0);
2854 }
2855
2856 /*
2857 * Otherwise, heuristic :-\
2858 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002859 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002860 if ((ctxt->node->children == NULL) &&
2861 (RAW == '<') && (NXT(1) == '/')) return(0);
2862
2863 lastChild = xmlGetLastChild(ctxt->node);
2864 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002865 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2866 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002867 } else if (xmlNodeIsText(lastChild))
2868 return(0);
2869 else if ((ctxt->node->children != NULL) &&
2870 (xmlNodeIsText(ctxt->node->children)))
2871 return(0);
2872 return(1);
2873}
2874
Owen Taylor3473f882001-02-23 17:55:21 +00002875/************************************************************************
2876 * *
2877 * Extra stuff for namespace support *
2878 * Relates to http://www.w3.org/TR/WD-xml-names *
2879 * *
2880 ************************************************************************/
2881
2882/**
2883 * xmlSplitQName:
2884 * @ctxt: an XML parser context
2885 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002886 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002887 *
2888 * parse an UTF8 encoded XML qualified name string
2889 *
2890 * [NS 5] QName ::= (Prefix ':')? LocalPart
2891 *
2892 * [NS 6] Prefix ::= NCName
2893 *
2894 * [NS 7] LocalPart ::= NCName
2895 *
2896 * Returns the local part, and prefix is updated
2897 * to get the Prefix if any.
2898 */
2899
2900xmlChar *
2901xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2902 xmlChar buf[XML_MAX_NAMELEN + 5];
2903 xmlChar *buffer = NULL;
2904 int len = 0;
2905 int max = XML_MAX_NAMELEN;
2906 xmlChar *ret = NULL;
2907 const xmlChar *cur = name;
2908 int c;
2909
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002910 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002911 *prefix = NULL;
2912
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002913 if (cur == NULL) return(NULL);
2914
Owen Taylor3473f882001-02-23 17:55:21 +00002915#ifndef XML_XML_NAMESPACE
2916 /* xml: prefix is not really a namespace */
2917 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2918 (cur[2] == 'l') && (cur[3] == ':'))
2919 return(xmlStrdup(name));
2920#endif
2921
Daniel Veillard597bc482003-07-24 16:08:28 +00002922 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002923 if (cur[0] == ':')
2924 return(xmlStrdup(name));
2925
2926 c = *cur++;
2927 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2928 buf[len++] = c;
2929 c = *cur++;
2930 }
2931 if (len >= max) {
2932 /*
2933 * Okay someone managed to make a huge name, so he's ready to pay
2934 * for the processing speed.
2935 */
2936 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002937
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002938 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002939 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002940 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002941 return(NULL);
2942 }
2943 memcpy(buffer, buf, len);
2944 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2945 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002946 xmlChar *tmp;
2947
Owen Taylor3473f882001-02-23 17:55:21 +00002948 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002949 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002950 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002951 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002952 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002953 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 return(NULL);
2955 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
2958 buffer[len++] = c;
2959 c = *cur++;
2960 }
2961 buffer[len] = 0;
2962 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002963
Daniel Veillard597bc482003-07-24 16:08:28 +00002964 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002965 if (buffer != NULL)
2966 xmlFree(buffer);
2967 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002968 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002969 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002970
Owen Taylor3473f882001-02-23 17:55:21 +00002971 if (buffer == NULL)
2972 ret = xmlStrndup(buf, len);
2973 else {
2974 ret = buffer;
2975 buffer = NULL;
2976 max = XML_MAX_NAMELEN;
2977 }
2978
2979
2980 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002981 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002982 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002983 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002984 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002985 }
Owen Taylor3473f882001-02-23 17:55:21 +00002986 len = 0;
2987
Daniel Veillardbb284f42002-10-16 18:02:47 +00002988 /*
2989 * Check that the first character is proper to start
2990 * a new name
2991 */
2992 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2993 ((c >= 0x41) && (c <= 0x5A)) ||
2994 (c == '_') || (c == ':'))) {
2995 int l;
2996 int first = CUR_SCHAR(cur, l);
2997
2998 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002999 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003000 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003001 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003002 }
3003 }
3004 cur++;
3005
Owen Taylor3473f882001-02-23 17:55:21 +00003006 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3007 buf[len++] = c;
3008 c = *cur++;
3009 }
3010 if (len >= max) {
3011 /*
3012 * Okay someone managed to make a huge name, so he's ready to pay
3013 * for the processing speed.
3014 */
3015 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003016
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003017 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003018 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003019 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003020 return(NULL);
3021 }
3022 memcpy(buffer, buf, len);
3023 while (c != 0) { /* tested bigname2.xml */
3024 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003025 xmlChar *tmp;
3026
Owen Taylor3473f882001-02-23 17:55:21 +00003027 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003028 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003029 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003030 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003031 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003032 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003033 return(NULL);
3034 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003035 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003036 }
3037 buffer[len++] = c;
3038 c = *cur++;
3039 }
3040 buffer[len] = 0;
3041 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003042
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (buffer == NULL)
3044 ret = xmlStrndup(buf, len);
3045 else {
3046 ret = buffer;
3047 }
3048 }
3049
3050 return(ret);
3051}
3052
3053/************************************************************************
3054 * *
3055 * The parser itself *
3056 * Relates to http://www.w3.org/TR/REC-xml *
3057 * *
3058 ************************************************************************/
3059
Daniel Veillard34e3f642008-07-29 09:02:27 +00003060/************************************************************************
3061 * *
3062 * Routines to parse Name, NCName and NmToken *
3063 * *
3064 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003065#ifdef DEBUG
3066static unsigned long nbParseName = 0;
3067static unsigned long nbParseNmToken = 0;
3068static unsigned long nbParseNCName = 0;
3069static unsigned long nbParseNCNameComplex = 0;
3070static unsigned long nbParseNameComplex = 0;
3071static unsigned long nbParseStringName = 0;
3072#endif
3073
Daniel Veillard34e3f642008-07-29 09:02:27 +00003074/*
3075 * The two following functions are related to the change of accepted
3076 * characters for Name and NmToken in the Revision 5 of XML-1.0
3077 * They correspond to the modified production [4] and the new production [4a]
3078 * changes in that revision. Also note that the macros used for the
3079 * productions Letter, Digit, CombiningChar and Extender are not needed
3080 * anymore.
3081 * We still keep compatibility to pre-revision5 parsing semantic if the
3082 * new XML_PARSE_OLD10 option is given to the parser.
3083 */
3084static int
3085xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3086 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3087 /*
3088 * Use the new checks of production [4] [4a] amd [5] of the
3089 * Update 5 of XML-1.0
3090 */
3091 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3092 (((c >= 'a') && (c <= 'z')) ||
3093 ((c >= 'A') && (c <= 'Z')) ||
3094 (c == '_') || (c == ':') ||
3095 ((c >= 0xC0) && (c <= 0xD6)) ||
3096 ((c >= 0xD8) && (c <= 0xF6)) ||
3097 ((c >= 0xF8) && (c <= 0x2FF)) ||
3098 ((c >= 0x370) && (c <= 0x37D)) ||
3099 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3100 ((c >= 0x200C) && (c <= 0x200D)) ||
3101 ((c >= 0x2070) && (c <= 0x218F)) ||
3102 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3103 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3104 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3105 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3106 ((c >= 0x10000) && (c <= 0xEFFFF))))
3107 return(1);
3108 } else {
3109 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3110 return(1);
3111 }
3112 return(0);
3113}
3114
3115static int
3116xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3117 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3118 /*
3119 * Use the new checks of production [4] [4a] amd [5] of the
3120 * Update 5 of XML-1.0
3121 */
3122 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3123 (((c >= 'a') && (c <= 'z')) ||
3124 ((c >= 'A') && (c <= 'Z')) ||
3125 ((c >= '0') && (c <= '9')) || /* !start */
3126 (c == '_') || (c == ':') ||
3127 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3128 ((c >= 0xC0) && (c <= 0xD6)) ||
3129 ((c >= 0xD8) && (c <= 0xF6)) ||
3130 ((c >= 0xF8) && (c <= 0x2FF)) ||
3131 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3132 ((c >= 0x370) && (c <= 0x37D)) ||
3133 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3134 ((c >= 0x200C) && (c <= 0x200D)) ||
3135 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3136 ((c >= 0x2070) && (c <= 0x218F)) ||
3137 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3138 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3139 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3140 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3141 ((c >= 0x10000) && (c <= 0xEFFFF))))
3142 return(1);
3143 } else {
3144 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3145 (c == '.') || (c == '-') ||
3146 (c == '_') || (c == ':') ||
3147 (IS_COMBINING(c)) ||
3148 (IS_EXTENDER(c)))
3149 return(1);
3150 }
3151 return(0);
3152}
3153
Daniel Veillarde57ec792003-09-10 10:50:59 +00003154static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003155 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003156
Daniel Veillard34e3f642008-07-29 09:02:27 +00003157static const xmlChar *
3158xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3159 int len = 0, l;
3160 int c;
3161 int count = 0;
3162
Daniel Veillardc6561462009-03-25 10:22:31 +00003163#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003164 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003165#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003166
3167 /*
3168 * Handler for more complex cases
3169 */
3170 GROW;
3171 c = CUR_CHAR(l);
3172 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3173 /*
3174 * Use the new checks of production [4] [4a] amd [5] of the
3175 * Update 5 of XML-1.0
3176 */
3177 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3178 (!(((c >= 'a') && (c <= 'z')) ||
3179 ((c >= 'A') && (c <= 'Z')) ||
3180 (c == '_') || (c == ':') ||
3181 ((c >= 0xC0) && (c <= 0xD6)) ||
3182 ((c >= 0xD8) && (c <= 0xF6)) ||
3183 ((c >= 0xF8) && (c <= 0x2FF)) ||
3184 ((c >= 0x370) && (c <= 0x37D)) ||
3185 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3186 ((c >= 0x200C) && (c <= 0x200D)) ||
3187 ((c >= 0x2070) && (c <= 0x218F)) ||
3188 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3189 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3190 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3191 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3192 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3193 return(NULL);
3194 }
3195 len += l;
3196 NEXTL(l);
3197 c = CUR_CHAR(l);
3198 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3199 (((c >= 'a') && (c <= 'z')) ||
3200 ((c >= 'A') && (c <= 'Z')) ||
3201 ((c >= '0') && (c <= '9')) || /* !start */
3202 (c == '_') || (c == ':') ||
3203 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3204 ((c >= 0xC0) && (c <= 0xD6)) ||
3205 ((c >= 0xD8) && (c <= 0xF6)) ||
3206 ((c >= 0xF8) && (c <= 0x2FF)) ||
3207 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3208 ((c >= 0x370) && (c <= 0x37D)) ||
3209 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3210 ((c >= 0x200C) && (c <= 0x200D)) ||
3211 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3212 ((c >= 0x2070) && (c <= 0x218F)) ||
3213 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3214 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3215 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3216 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3217 ((c >= 0x10000) && (c <= 0xEFFFF))
3218 )) {
3219 if (count++ > 100) {
3220 count = 0;
3221 GROW;
3222 }
3223 len += l;
3224 NEXTL(l);
3225 c = CUR_CHAR(l);
3226 }
3227 } else {
3228 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3229 (!IS_LETTER(c) && (c != '_') &&
3230 (c != ':'))) {
3231 return(NULL);
3232 }
3233 len += l;
3234 NEXTL(l);
3235 c = CUR_CHAR(l);
3236
3237 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3238 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3239 (c == '.') || (c == '-') ||
3240 (c == '_') || (c == ':') ||
3241 (IS_COMBINING(c)) ||
3242 (IS_EXTENDER(c)))) {
3243 if (count++ > 100) {
3244 count = 0;
3245 GROW;
3246 }
3247 len += l;
3248 NEXTL(l);
3249 c = CUR_CHAR(l);
3250 }
3251 }
3252 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3253 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3254 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3255}
3256
Owen Taylor3473f882001-02-23 17:55:21 +00003257/**
3258 * xmlParseName:
3259 * @ctxt: an XML parser context
3260 *
3261 * parse an XML name.
3262 *
3263 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3264 * CombiningChar | Extender
3265 *
3266 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3267 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003268 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003269 *
3270 * Returns the Name parsed or NULL
3271 */
3272
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003273const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003274xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003275 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003276 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003277 int count = 0;
3278
3279 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003280
Daniel Veillardc6561462009-03-25 10:22:31 +00003281#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003282 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003283#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003284
Daniel Veillard48b2f892001-02-25 16:11:03 +00003285 /*
3286 * Accelerator for simple ASCII names
3287 */
3288 in = ctxt->input->cur;
3289 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3290 ((*in >= 0x41) && (*in <= 0x5A)) ||
3291 (*in == '_') || (*in == ':')) {
3292 in++;
3293 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3294 ((*in >= 0x41) && (*in <= 0x5A)) ||
3295 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003296 (*in == '_') || (*in == '-') ||
3297 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003298 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003299 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003300 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003301 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003302 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003303 ctxt->nbChars += count;
3304 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003305 if (ret == NULL)
3306 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003307 return(ret);
3308 }
3309 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003310 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003311 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003312}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003313
Daniel Veillard34e3f642008-07-29 09:02:27 +00003314static const xmlChar *
3315xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3316 int len = 0, l;
3317 int c;
3318 int count = 0;
3319
Daniel Veillardc6561462009-03-25 10:22:31 +00003320#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003321 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003322#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323
3324 /*
3325 * Handler for more complex cases
3326 */
3327 GROW;
3328 c = CUR_CHAR(l);
3329 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3330 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3331 return(NULL);
3332 }
3333
3334 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3335 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3336 if (count++ > 100) {
3337 count = 0;
3338 GROW;
3339 }
3340 len += l;
3341 NEXTL(l);
3342 c = CUR_CHAR(l);
3343 }
3344 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3345}
3346
3347/**
3348 * xmlParseNCName:
3349 * @ctxt: an XML parser context
3350 * @len: lenght of the string parsed
3351 *
3352 * parse an XML name.
3353 *
3354 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3355 * CombiningChar | Extender
3356 *
3357 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3358 *
3359 * Returns the Name parsed or NULL
3360 */
3361
3362static const xmlChar *
3363xmlParseNCName(xmlParserCtxtPtr ctxt) {
3364 const xmlChar *in;
3365 const xmlChar *ret;
3366 int count = 0;
3367
Daniel Veillardc6561462009-03-25 10:22:31 +00003368#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003369 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003370#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003371
3372 /*
3373 * Accelerator for simple ASCII names
3374 */
3375 in = ctxt->input->cur;
3376 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 ((*in >= 0x41) && (*in <= 0x5A)) ||
3378 (*in == '_')) {
3379 in++;
3380 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3381 ((*in >= 0x41) && (*in <= 0x5A)) ||
3382 ((*in >= 0x30) && (*in <= 0x39)) ||
3383 (*in == '_') || (*in == '-') ||
3384 (*in == '.'))
3385 in++;
3386 if ((*in > 0) && (*in < 0x80)) {
3387 count = in - ctxt->input->cur;
3388 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389 ctxt->input->cur = in;
3390 ctxt->nbChars += count;
3391 ctxt->input->col += count;
3392 if (ret == NULL) {
3393 xmlErrMemory(ctxt, NULL);
3394 }
3395 return(ret);
3396 }
3397 }
3398 return(xmlParseNCNameComplex(ctxt));
3399}
3400
Daniel Veillard46de64e2002-05-29 08:21:33 +00003401/**
3402 * xmlParseNameAndCompare:
3403 * @ctxt: an XML parser context
3404 *
3405 * parse an XML name and compares for match
3406 * (specialized for endtag parsing)
3407 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003408 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3409 * and the name for mismatch
3410 */
3411
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003412static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003413xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003414 register const xmlChar *cmp = other;
3415 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003416 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003417
3418 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003419
Daniel Veillard46de64e2002-05-29 08:21:33 +00003420 in = ctxt->input->cur;
3421 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003422 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003423 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003424 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003425 }
William M. Brack76e95df2003-10-18 16:20:14 +00003426 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003427 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003428 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003429 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003430 }
3431 /* failure (or end of input buffer), check with full function */
3432 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003433 /* strings coming from the dictionnary direct compare possible */
3434 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003435 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003436 }
3437 return ret;
3438}
3439
Owen Taylor3473f882001-02-23 17:55:21 +00003440/**
3441 * xmlParseStringName:
3442 * @ctxt: an XML parser context
3443 * @str: a pointer to the string pointer (IN/OUT)
3444 *
3445 * parse an XML name.
3446 *
3447 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3448 * CombiningChar | Extender
3449 *
3450 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3451 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003452 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003453 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003454 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003455 * is updated to the current location in the string.
3456 */
3457
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003458static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003459xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3460 xmlChar buf[XML_MAX_NAMELEN + 5];
3461 const xmlChar *cur = *str;
3462 int len = 0, l;
3463 int c;
3464
Daniel Veillardc6561462009-03-25 10:22:31 +00003465#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003466 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003467#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003468
Owen Taylor3473f882001-02-23 17:55:21 +00003469 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003470 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003471 return(NULL);
3472 }
3473
Daniel Veillard34e3f642008-07-29 09:02:27 +00003474 COPY_BUF(l,buf,len,c);
3475 cur += l;
3476 c = CUR_SCHAR(cur, l);
3477 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003478 COPY_BUF(l,buf,len,c);
3479 cur += l;
3480 c = CUR_SCHAR(cur, l);
3481 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3482 /*
3483 * Okay someone managed to make a huge name, so he's ready to pay
3484 * for the processing speed.
3485 */
3486 xmlChar *buffer;
3487 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003488
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003489 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003490 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003491 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003492 return(NULL);
3493 }
3494 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003495 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003496 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003497 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003498 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003499 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003500 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003501 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003502 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003503 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003504 return(NULL);
3505 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003506 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003507 }
3508 COPY_BUF(l,buffer,len,c);
3509 cur += l;
3510 c = CUR_SCHAR(cur, l);
3511 }
3512 buffer[len] = 0;
3513 *str = cur;
3514 return(buffer);
3515 }
3516 }
3517 *str = cur;
3518 return(xmlStrndup(buf, len));
3519}
3520
3521/**
3522 * xmlParseNmtoken:
3523 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003524 *
Owen Taylor3473f882001-02-23 17:55:21 +00003525 * parse an XML Nmtoken.
3526 *
3527 * [7] Nmtoken ::= (NameChar)+
3528 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003529 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003530 *
3531 * Returns the Nmtoken parsed or NULL
3532 */
3533
3534xmlChar *
3535xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3536 xmlChar buf[XML_MAX_NAMELEN + 5];
3537 int len = 0, l;
3538 int c;
3539 int count = 0;
3540
Daniel Veillardc6561462009-03-25 10:22:31 +00003541#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003542 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003543#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003544
Owen Taylor3473f882001-02-23 17:55:21 +00003545 GROW;
3546 c = CUR_CHAR(l);
3547
Daniel Veillard34e3f642008-07-29 09:02:27 +00003548 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003549 if (count++ > 100) {
3550 count = 0;
3551 GROW;
3552 }
3553 COPY_BUF(l,buf,len,c);
3554 NEXTL(l);
3555 c = CUR_CHAR(l);
3556 if (len >= XML_MAX_NAMELEN) {
3557 /*
3558 * Okay someone managed to make a huge token, so he's ready to pay
3559 * for the processing speed.
3560 */
3561 xmlChar *buffer;
3562 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003563
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003564 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003565 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003566 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003567 return(NULL);
3568 }
3569 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003570 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003571 if (count++ > 100) {
3572 count = 0;
3573 GROW;
3574 }
3575 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003576 xmlChar *tmp;
3577
Owen Taylor3473f882001-02-23 17:55:21 +00003578 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003579 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003580 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003581 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003582 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003583 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003584 return(NULL);
3585 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003586 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003587 }
3588 COPY_BUF(l,buffer,len,c);
3589 NEXTL(l);
3590 c = CUR_CHAR(l);
3591 }
3592 buffer[len] = 0;
3593 return(buffer);
3594 }
3595 }
3596 if (len == 0)
3597 return(NULL);
3598 return(xmlStrndup(buf, len));
3599}
3600
3601/**
3602 * xmlParseEntityValue:
3603 * @ctxt: an XML parser context
3604 * @orig: if non-NULL store a copy of the original entity value
3605 *
3606 * parse a value for ENTITY declarations
3607 *
3608 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3609 * "'" ([^%&'] | PEReference | Reference)* "'"
3610 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003611 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003612 */
3613
3614xmlChar *
3615xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3616 xmlChar *buf = NULL;
3617 int len = 0;
3618 int size = XML_PARSER_BUFFER_SIZE;
3619 int c, l;
3620 xmlChar stop;
3621 xmlChar *ret = NULL;
3622 const xmlChar *cur = NULL;
3623 xmlParserInputPtr input;
3624
3625 if (RAW == '"') stop = '"';
3626 else if (RAW == '\'') stop = '\'';
3627 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003628 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003629 return(NULL);
3630 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003631 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003632 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003633 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003634 return(NULL);
3635 }
3636
3637 /*
3638 * The content of the entity definition is copied in a buffer.
3639 */
3640
3641 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3642 input = ctxt->input;
3643 GROW;
3644 NEXT;
3645 c = CUR_CHAR(l);
3646 /*
3647 * NOTE: 4.4.5 Included in Literal
3648 * When a parameter entity reference appears in a literal entity
3649 * value, ... a single or double quote character in the replacement
3650 * text is always treated as a normal data character and will not
3651 * terminate the literal.
3652 * In practice it means we stop the loop only when back at parsing
3653 * the initial entity and the quote is found
3654 */
William M. Brack871611b2003-10-18 04:53:14 +00003655 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003656 (ctxt->input != input))) {
3657 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003658 xmlChar *tmp;
3659
Owen Taylor3473f882001-02-23 17:55:21 +00003660 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003661 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3662 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003663 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003664 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003665 return(NULL);
3666 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003667 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003668 }
3669 COPY_BUF(l,buf,len,c);
3670 NEXTL(l);
3671 /*
3672 * Pop-up of finished entities.
3673 */
3674 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3675 xmlPopInput(ctxt);
3676
3677 GROW;
3678 c = CUR_CHAR(l);
3679 if (c == 0) {
3680 GROW;
3681 c = CUR_CHAR(l);
3682 }
3683 }
3684 buf[len] = 0;
3685
3686 /*
3687 * Raise problem w.r.t. '&' and '%' being used in non-entities
3688 * reference constructs. Note Charref will be handled in
3689 * xmlStringDecodeEntities()
3690 */
3691 cur = buf;
3692 while (*cur != 0) { /* non input consuming */
3693 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3694 xmlChar *name;
3695 xmlChar tmp = *cur;
3696
3697 cur++;
3698 name = xmlParseStringName(ctxt, &cur);
3699 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003700 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003701 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003702 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003703 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003704 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3705 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003706 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003707 }
3708 if (name != NULL)
3709 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003710 if (*cur == 0)
3711 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003712 }
3713 cur++;
3714 }
3715
3716 /*
3717 * Then PEReference entities are substituted.
3718 */
3719 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003720 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003721 xmlFree(buf);
3722 } else {
3723 NEXT;
3724 /*
3725 * NOTE: 4.4.7 Bypassed
3726 * When a general entity reference appears in the EntityValue in
3727 * an entity declaration, it is bypassed and left as is.
3728 * so XML_SUBSTITUTE_REF is not set here.
3729 */
3730 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3731 0, 0, 0);
3732 if (orig != NULL)
3733 *orig = buf;
3734 else
3735 xmlFree(buf);
3736 }
3737
3738 return(ret);
3739}
3740
3741/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003742 * xmlParseAttValueComplex:
3743 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003744 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003745 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003746 *
3747 * parse a value for an attribute, this is the fallback function
3748 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003749 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003750 *
3751 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3752 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003753static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003754xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003755 xmlChar limit = 0;
3756 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003757 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003758 int len = 0;
3759 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003760 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003761 xmlChar *current = NULL;
3762 xmlEntityPtr ent;
3763
Owen Taylor3473f882001-02-23 17:55:21 +00003764 if (NXT(0) == '"') {
3765 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3766 limit = '"';
3767 NEXT;
3768 } else if (NXT(0) == '\'') {
3769 limit = '\'';
3770 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3771 NEXT;
3772 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003773 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003774 return(NULL);
3775 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003776
Owen Taylor3473f882001-02-23 17:55:21 +00003777 /*
3778 * allocate a translation buffer.
3779 */
3780 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003781 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003782 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003783
3784 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003785 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003786 */
3787 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003788 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003789 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003790 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003791 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003792 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003793 if (NXT(1) == '#') {
3794 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003795
Owen Taylor3473f882001-02-23 17:55:21 +00003796 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003797 if (ctxt->replaceEntities) {
3798 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003799 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003800 }
3801 buf[len++] = '&';
3802 } else {
3803 /*
3804 * The reparsing will be done in xmlStringGetNodeList()
3805 * called by the attribute() function in SAX.c
3806 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003807 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003808 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003809 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003810 buf[len++] = '&';
3811 buf[len++] = '#';
3812 buf[len++] = '3';
3813 buf[len++] = '8';
3814 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003815 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003816 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003817 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003818 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003819 }
Owen Taylor3473f882001-02-23 17:55:21 +00003820 len += xmlCopyChar(0, &buf[len], val);
3821 }
3822 } else {
3823 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003824 ctxt->nbentities++;
3825 if (ent != NULL)
3826 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003827 if ((ent != NULL) &&
3828 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3829 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003830 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003831 }
3832 if ((ctxt->replaceEntities == 0) &&
3833 (ent->content[0] == '&')) {
3834 buf[len++] = '&';
3835 buf[len++] = '#';
3836 buf[len++] = '3';
3837 buf[len++] = '8';
3838 buf[len++] = ';';
3839 } else {
3840 buf[len++] = ent->content[0];
3841 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003842 } else if ((ent != NULL) &&
3843 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003844 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3845 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003846 XML_SUBSTITUTE_REF,
3847 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003848 if (rep != NULL) {
3849 current = rep;
3850 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003851 if ((*current == 0xD) || (*current == 0xA) ||
3852 (*current == 0x9)) {
3853 buf[len++] = 0x20;
3854 current++;
3855 } else
3856 buf[len++] = *current++;
Owen Taylor3473f882001-02-23 17:55:21 +00003857 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003858 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003859 }
3860 }
3861 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003862 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003863 }
3864 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003865 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003866 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003867 }
Owen Taylor3473f882001-02-23 17:55:21 +00003868 if (ent->content != NULL)
3869 buf[len++] = ent->content[0];
3870 }
3871 } else if (ent != NULL) {
3872 int i = xmlStrlen(ent->name);
3873 const xmlChar *cur = ent->name;
3874
3875 /*
3876 * This may look absurd but is needed to detect
3877 * entities problems
3878 */
3879 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3880 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003881 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003882 XML_SUBSTITUTE_REF, 0, 0, 0);
3883 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003884 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003885 rep = NULL;
3886 }
Owen Taylor3473f882001-02-23 17:55:21 +00003887 }
3888
3889 /*
3890 * Just output the reference
3891 */
3892 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003893 while (len > buf_size - i - 10) {
3894 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003895 }
3896 for (;i > 0;i--)
3897 buf[len++] = *cur++;
3898 buf[len++] = ';';
3899 }
3900 }
3901 } else {
3902 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003903 if ((len != 0) || (!normalize)) {
3904 if ((!normalize) || (!in_space)) {
3905 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003906 while (len > buf_size - 10) {
3907 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003908 }
3909 }
3910 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003911 }
3912 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003913 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003914 COPY_BUF(l,buf,len,c);
3915 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003916 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003917 }
3918 }
3919 NEXTL(l);
3920 }
3921 GROW;
3922 c = CUR_CHAR(l);
3923 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003924 if ((in_space) && (normalize)) {
3925 while (buf[len - 1] == 0x20) len--;
3926 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003927 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003928 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003929 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003930 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003931 if ((c != 0) && (!IS_CHAR(c))) {
3932 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3933 "invalid character in attribute value\n");
3934 } else {
3935 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3936 "AttValue: ' expected\n");
3937 }
Owen Taylor3473f882001-02-23 17:55:21 +00003938 } else
3939 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003940 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003941 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003942
3943mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003944 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003945 if (buf != NULL)
3946 xmlFree(buf);
3947 if (rep != NULL)
3948 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003949 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003950}
3951
3952/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003953 * xmlParseAttValue:
3954 * @ctxt: an XML parser context
3955 *
3956 * parse a value for an attribute
3957 * Note: the parser won't do substitution of entities here, this
3958 * will be handled later in xmlStringGetNodeList
3959 *
3960 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3961 * "'" ([^<&'] | Reference)* "'"
3962 *
3963 * 3.3.3 Attribute-Value Normalization:
3964 * Before the value of an attribute is passed to the application or
3965 * checked for validity, the XML processor must normalize it as follows:
3966 * - a character reference is processed by appending the referenced
3967 * character to the attribute value
3968 * - an entity reference is processed by recursively processing the
3969 * replacement text of the entity
3970 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3971 * appending #x20 to the normalized value, except that only a single
3972 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3973 * parsed entity or the literal entity value of an internal parsed entity
3974 * - other characters are processed by appending them to the normalized value
3975 * If the declared value is not CDATA, then the XML processor must further
3976 * process the normalized attribute value by discarding any leading and
3977 * trailing space (#x20) characters, and by replacing sequences of space
3978 * (#x20) characters by a single space (#x20) character.
3979 * All attributes for which no declaration has been read should be treated
3980 * by a non-validating parser as if declared CDATA.
3981 *
3982 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3983 */
3984
3985
3986xmlChar *
3987xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003988 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003989 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003990}
3991
3992/**
Owen Taylor3473f882001-02-23 17:55:21 +00003993 * xmlParseSystemLiteral:
3994 * @ctxt: an XML parser context
3995 *
3996 * parse an XML Literal
3997 *
3998 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3999 *
4000 * Returns the SystemLiteral parsed or NULL
4001 */
4002
4003xmlChar *
4004xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4005 xmlChar *buf = NULL;
4006 int len = 0;
4007 int size = XML_PARSER_BUFFER_SIZE;
4008 int cur, l;
4009 xmlChar stop;
4010 int state = ctxt->instate;
4011 int count = 0;
4012
4013 SHRINK;
4014 if (RAW == '"') {
4015 NEXT;
4016 stop = '"';
4017 } else if (RAW == '\'') {
4018 NEXT;
4019 stop = '\'';
4020 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004021 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004022 return(NULL);
4023 }
4024
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004025 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004026 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004027 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004028 return(NULL);
4029 }
4030 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4031 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004032 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004033 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004034 xmlChar *tmp;
4035
Owen Taylor3473f882001-02-23 17:55:21 +00004036 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004037 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4038 if (tmp == NULL) {
4039 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004040 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004041 ctxt->instate = (xmlParserInputState) state;
4042 return(NULL);
4043 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004044 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004045 }
4046 count++;
4047 if (count > 50) {
4048 GROW;
4049 count = 0;
4050 }
4051 COPY_BUF(l,buf,len,cur);
4052 NEXTL(l);
4053 cur = CUR_CHAR(l);
4054 if (cur == 0) {
4055 GROW;
4056 SHRINK;
4057 cur = CUR_CHAR(l);
4058 }
4059 }
4060 buf[len] = 0;
4061 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004062 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004063 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004064 } else {
4065 NEXT;
4066 }
4067 return(buf);
4068}
4069
4070/**
4071 * xmlParsePubidLiteral:
4072 * @ctxt: an XML parser context
4073 *
4074 * parse an XML public literal
4075 *
4076 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4077 *
4078 * Returns the PubidLiteral parsed or NULL.
4079 */
4080
4081xmlChar *
4082xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4083 xmlChar *buf = NULL;
4084 int len = 0;
4085 int size = XML_PARSER_BUFFER_SIZE;
4086 xmlChar cur;
4087 xmlChar stop;
4088 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004089 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004090
4091 SHRINK;
4092 if (RAW == '"') {
4093 NEXT;
4094 stop = '"';
4095 } else if (RAW == '\'') {
4096 NEXT;
4097 stop = '\'';
4098 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004099 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004100 return(NULL);
4101 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004102 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004103 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004104 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004105 return(NULL);
4106 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004107 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004108 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004109 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004110 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004111 xmlChar *tmp;
4112
Owen Taylor3473f882001-02-23 17:55:21 +00004113 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004114 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4115 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004116 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004117 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004118 return(NULL);
4119 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004120 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004121 }
4122 buf[len++] = cur;
4123 count++;
4124 if (count > 50) {
4125 GROW;
4126 count = 0;
4127 }
4128 NEXT;
4129 cur = CUR;
4130 if (cur == 0) {
4131 GROW;
4132 SHRINK;
4133 cur = CUR;
4134 }
4135 }
4136 buf[len] = 0;
4137 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004138 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004139 } else {
4140 NEXT;
4141 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004142 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004143 return(buf);
4144}
4145
Daniel Veillard8ed10722009-08-20 19:17:36 +02004146static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004147
4148/*
4149 * used for the test in the inner loop of the char data testing
4150 */
4151static const unsigned char test_char_data[256] = {
4152 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4153 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4154 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4155 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4156 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4157 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4158 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4159 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4160 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4161 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4162 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4163 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4164 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4165 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4166 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4167 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4168 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4169 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4170 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4171 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4172 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4173 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4174 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4175 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4184};
4185
Owen Taylor3473f882001-02-23 17:55:21 +00004186/**
4187 * xmlParseCharData:
4188 * @ctxt: an XML parser context
4189 * @cdata: int indicating whether we are within a CDATA section
4190 *
4191 * parse a CharData section.
4192 * if we are within a CDATA section ']]>' marks an end of section.
4193 *
4194 * The right angle bracket (>) may be represented using the string "&gt;",
4195 * and must, for compatibility, be escaped using "&gt;" or a character
4196 * reference when it appears in the string "]]>" in content, when that
4197 * string is not marking the end of a CDATA section.
4198 *
4199 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4200 */
4201
4202void
4203xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004204 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004205 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004206 int line = ctxt->input->line;
4207 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004208 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004209
4210 SHRINK;
4211 GROW;
4212 /*
4213 * Accelerated common case where input don't need to be
4214 * modified before passing it to the handler.
4215 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004216 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004217 in = ctxt->input->cur;
4218 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004219get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004220 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004221 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004222 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004223 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004224 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004225 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004226 goto get_more_space;
4227 }
4228 if (*in == '<') {
4229 nbchar = in - ctxt->input->cur;
4230 if (nbchar > 0) {
4231 const xmlChar *tmp = ctxt->input->cur;
4232 ctxt->input->cur = in;
4233
Daniel Veillard34099b42004-11-04 17:34:35 +00004234 if ((ctxt->sax != NULL) &&
4235 (ctxt->sax->ignorableWhitespace !=
4236 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004237 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004238 if (ctxt->sax->ignorableWhitespace != NULL)
4239 ctxt->sax->ignorableWhitespace(ctxt->userData,
4240 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004241 } else {
4242 if (ctxt->sax->characters != NULL)
4243 ctxt->sax->characters(ctxt->userData,
4244 tmp, nbchar);
4245 if (*ctxt->space == -1)
4246 *ctxt->space = -2;
4247 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004248 } else if ((ctxt->sax != NULL) &&
4249 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004250 ctxt->sax->characters(ctxt->userData,
4251 tmp, nbchar);
4252 }
4253 }
4254 return;
4255 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004256
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004257get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004258 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004259 while (test_char_data[*in]) {
4260 in++;
4261 ccol++;
4262 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004263 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004264 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004265 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004266 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004267 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004268 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004269 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004270 }
4271 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004272 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004273 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004274 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004275 return;
4276 }
4277 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004278 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004279 goto get_more;
4280 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004281 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004282 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004283 if ((ctxt->sax != NULL) &&
4284 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004285 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004286 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004287 const xmlChar *tmp = ctxt->input->cur;
4288 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004289
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004290 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004291 if (ctxt->sax->ignorableWhitespace != NULL)
4292 ctxt->sax->ignorableWhitespace(ctxt->userData,
4293 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004294 } else {
4295 if (ctxt->sax->characters != NULL)
4296 ctxt->sax->characters(ctxt->userData,
4297 tmp, nbchar);
4298 if (*ctxt->space == -1)
4299 *ctxt->space = -2;
4300 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004301 line = ctxt->input->line;
4302 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004303 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004304 if (ctxt->sax->characters != NULL)
4305 ctxt->sax->characters(ctxt->userData,
4306 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004307 line = ctxt->input->line;
4308 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004309 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004310 /* something really bad happened in the SAX callback */
4311 if (ctxt->instate != XML_PARSER_CONTENT)
4312 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004313 }
4314 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004315 if (*in == 0xD) {
4316 in++;
4317 if (*in == 0xA) {
4318 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004319 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004320 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004321 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004322 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004323 in--;
4324 }
4325 if (*in == '<') {
4326 return;
4327 }
4328 if (*in == '&') {
4329 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004330 }
4331 SHRINK;
4332 GROW;
4333 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004334 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004335 nbchar = 0;
4336 }
Daniel Veillard50582112001-03-26 22:52:16 +00004337 ctxt->input->line = line;
4338 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004339 xmlParseCharDataComplex(ctxt, cdata);
4340}
4341
Daniel Veillard01c13b52002-12-10 15:19:08 +00004342/**
4343 * xmlParseCharDataComplex:
4344 * @ctxt: an XML parser context
4345 * @cdata: int indicating whether we are within a CDATA section
4346 *
4347 * parse a CharData section.this is the fallback function
4348 * of xmlParseCharData() when the parsing requires handling
4349 * of non-ASCII characters.
4350 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004351static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004352xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004353 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4354 int nbchar = 0;
4355 int cur, l;
4356 int count = 0;
4357
4358 SHRINK;
4359 GROW;
4360 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004361 while ((cur != '<') && /* checked */
4362 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004363 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004364 if ((cur == ']') && (NXT(1) == ']') &&
4365 (NXT(2) == '>')) {
4366 if (cdata) break;
4367 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004368 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004369 }
4370 }
4371 COPY_BUF(l,buf,nbchar,cur);
4372 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004373 buf[nbchar] = 0;
4374
Owen Taylor3473f882001-02-23 17:55:21 +00004375 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004376 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004377 */
4378 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004379 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004380 if (ctxt->sax->ignorableWhitespace != NULL)
4381 ctxt->sax->ignorableWhitespace(ctxt->userData,
4382 buf, nbchar);
4383 } else {
4384 if (ctxt->sax->characters != NULL)
4385 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004386 if ((ctxt->sax->characters !=
4387 ctxt->sax->ignorableWhitespace) &&
4388 (*ctxt->space == -1))
4389 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004390 }
4391 }
4392 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004393 /* something really bad happened in the SAX callback */
4394 if (ctxt->instate != XML_PARSER_CONTENT)
4395 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004396 }
4397 count++;
4398 if (count > 50) {
4399 GROW;
4400 count = 0;
4401 }
4402 NEXTL(l);
4403 cur = CUR_CHAR(l);
4404 }
4405 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004406 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004407 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004408 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004409 */
4410 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004411 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004412 if (ctxt->sax->ignorableWhitespace != NULL)
4413 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4414 } else {
4415 if (ctxt->sax->characters != NULL)
4416 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004417 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4418 (*ctxt->space == -1))
4419 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004420 }
4421 }
4422 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004423 if ((cur != 0) && (!IS_CHAR(cur))) {
4424 /* Generate the error and skip the offending character */
4425 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4426 "PCDATA invalid Char value %d\n",
4427 cur);
4428 NEXTL(l);
4429 }
Owen Taylor3473f882001-02-23 17:55:21 +00004430}
4431
4432/**
4433 * xmlParseExternalID:
4434 * @ctxt: an XML parser context
4435 * @publicID: a xmlChar** receiving PubidLiteral
4436 * @strict: indicate whether we should restrict parsing to only
4437 * production [75], see NOTE below
4438 *
4439 * Parse an External ID or a Public ID
4440 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004441 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004442 * 'PUBLIC' S PubidLiteral S SystemLiteral
4443 *
4444 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4445 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4446 *
4447 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4448 *
4449 * Returns the function returns SystemLiteral and in the second
4450 * case publicID receives PubidLiteral, is strict is off
4451 * it is possible to return NULL and have publicID set.
4452 */
4453
4454xmlChar *
4455xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4456 xmlChar *URI = NULL;
4457
4458 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004459
4460 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004461 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004462 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004463 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004464 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4465 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004466 }
4467 SKIP_BLANKS;
4468 URI = xmlParseSystemLiteral(ctxt);
4469 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004470 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004471 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004472 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004473 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004474 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004475 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004476 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004477 }
4478 SKIP_BLANKS;
4479 *publicID = xmlParsePubidLiteral(ctxt);
4480 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004481 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004482 }
4483 if (strict) {
4484 /*
4485 * We don't handle [83] so "S SystemLiteral" is required.
4486 */
William M. Brack76e95df2003-10-18 16:20:14 +00004487 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004489 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004490 }
4491 } else {
4492 /*
4493 * We handle [83] so we return immediately, if
4494 * "S SystemLiteral" is not detected. From a purely parsing
4495 * point of view that's a nice mess.
4496 */
4497 const xmlChar *ptr;
4498 GROW;
4499
4500 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004501 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004502
William M. Brack76e95df2003-10-18 16:20:14 +00004503 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004504 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4505 }
4506 SKIP_BLANKS;
4507 URI = xmlParseSystemLiteral(ctxt);
4508 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004509 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004510 }
4511 }
4512 return(URI);
4513}
4514
4515/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004516 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004517 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004518 * @buf: the already parsed part of the buffer
4519 * @len: number of bytes filles in the buffer
4520 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004521 *
4522 * Skip an XML (SGML) comment <!-- .... -->
4523 * The spec says that "For compatibility, the string "--" (double-hyphen)
4524 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004525 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004526 *
4527 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4528 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004529static void
4530xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004531 int q, ql;
4532 int r, rl;
4533 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004534 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004535 int inputid;
4536
4537 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004538
Owen Taylor3473f882001-02-23 17:55:21 +00004539 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004540 len = 0;
4541 size = XML_PARSER_BUFFER_SIZE;
4542 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4543 if (buf == NULL) {
4544 xmlErrMemory(ctxt, NULL);
4545 return;
4546 }
Owen Taylor3473f882001-02-23 17:55:21 +00004547 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004548 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004549 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004550 if (q == 0)
4551 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004552 if (!IS_CHAR(q)) {
4553 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4554 "xmlParseComment: invalid xmlChar value %d\n",
4555 q);
4556 xmlFree (buf);
4557 return;
4558 }
Owen Taylor3473f882001-02-23 17:55:21 +00004559 NEXTL(ql);
4560 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004561 if (r == 0)
4562 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004563 if (!IS_CHAR(r)) {
4564 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4565 "xmlParseComment: invalid xmlChar value %d\n",
4566 q);
4567 xmlFree (buf);
4568 return;
4569 }
Owen Taylor3473f882001-02-23 17:55:21 +00004570 NEXTL(rl);
4571 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004572 if (cur == 0)
4573 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004574 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004575 ((cur != '>') ||
4576 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004577 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004578 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004579 }
4580 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004581 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004582 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004583 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4584 if (new_buf == NULL) {
4585 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004586 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004587 return;
4588 }
William M. Bracka3215c72004-07-31 16:24:01 +00004589 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004590 }
4591 COPY_BUF(ql,buf,len,q);
4592 q = r;
4593 ql = rl;
4594 r = cur;
4595 rl = l;
4596
4597 count++;
4598 if (count > 50) {
4599 GROW;
4600 count = 0;
4601 }
4602 NEXTL(l);
4603 cur = CUR_CHAR(l);
4604 if (cur == 0) {
4605 SHRINK;
4606 GROW;
4607 cur = CUR_CHAR(l);
4608 }
4609 }
4610 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004611 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004612 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004613 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004614 } else if (!IS_CHAR(cur)) {
4615 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4616 "xmlParseComment: invalid xmlChar value %d\n",
4617 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004618 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004619 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004620 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4621 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004622 }
4623 NEXT;
4624 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4625 (!ctxt->disableSAX))
4626 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004627 }
Daniel Veillardda629342007-08-01 07:49:06 +00004628 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004629 return;
4630not_terminated:
4631 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4632 "Comment not terminated\n", NULL);
4633 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004634 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004635}
Daniel Veillardda629342007-08-01 07:49:06 +00004636
Daniel Veillard4c778d82005-01-23 17:37:44 +00004637/**
4638 * xmlParseComment:
4639 * @ctxt: an XML parser context
4640 *
4641 * Skip an XML (SGML) comment <!-- .... -->
4642 * The spec says that "For compatibility, the string "--" (double-hyphen)
4643 * must not occur within comments. "
4644 *
4645 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4646 */
4647void
4648xmlParseComment(xmlParserCtxtPtr ctxt) {
4649 xmlChar *buf = NULL;
4650 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004651 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004652 xmlParserInputState state;
4653 const xmlChar *in;
4654 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004655 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004656
4657 /*
4658 * Check that there is a comment right here.
4659 */
4660 if ((RAW != '<') || (NXT(1) != '!') ||
4661 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004662 state = ctxt->instate;
4663 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004664 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004665 SKIP(4);
4666 SHRINK;
4667 GROW;
4668
4669 /*
4670 * Accelerated common case where input don't need to be
4671 * modified before passing it to the handler.
4672 */
4673 in = ctxt->input->cur;
4674 do {
4675 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004676 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004677 ctxt->input->line++; ctxt->input->col = 1;
4678 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004679 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004680 }
4681get_more:
4682 ccol = ctxt->input->col;
4683 while (((*in > '-') && (*in <= 0x7F)) ||
4684 ((*in >= 0x20) && (*in < '-')) ||
4685 (*in == 0x09)) {
4686 in++;
4687 ccol++;
4688 }
4689 ctxt->input->col = ccol;
4690 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004691 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004692 ctxt->input->line++; ctxt->input->col = 1;
4693 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004694 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004695 goto get_more;
4696 }
4697 nbchar = in - ctxt->input->cur;
4698 /*
4699 * save current set of data
4700 */
4701 if (nbchar > 0) {
4702 if ((ctxt->sax != NULL) &&
4703 (ctxt->sax->comment != NULL)) {
4704 if (buf == NULL) {
4705 if ((*in == '-') && (in[1] == '-'))
4706 size = nbchar + 1;
4707 else
4708 size = XML_PARSER_BUFFER_SIZE + nbchar;
4709 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4710 if (buf == NULL) {
4711 xmlErrMemory(ctxt, NULL);
4712 ctxt->instate = state;
4713 return;
4714 }
4715 len = 0;
4716 } else if (len + nbchar + 1 >= size) {
4717 xmlChar *new_buf;
4718 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4719 new_buf = (xmlChar *) xmlRealloc(buf,
4720 size * sizeof(xmlChar));
4721 if (new_buf == NULL) {
4722 xmlFree (buf);
4723 xmlErrMemory(ctxt, NULL);
4724 ctxt->instate = state;
4725 return;
4726 }
4727 buf = new_buf;
4728 }
4729 memcpy(&buf[len], ctxt->input->cur, nbchar);
4730 len += nbchar;
4731 buf[len] = 0;
4732 }
4733 }
4734 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004735 if (*in == 0xA) {
4736 in++;
4737 ctxt->input->line++; ctxt->input->col = 1;
4738 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004739 if (*in == 0xD) {
4740 in++;
4741 if (*in == 0xA) {
4742 ctxt->input->cur = in;
4743 in++;
4744 ctxt->input->line++; ctxt->input->col = 1;
4745 continue; /* while */
4746 }
4747 in--;
4748 }
4749 SHRINK;
4750 GROW;
4751 in = ctxt->input->cur;
4752 if (*in == '-') {
4753 if (in[1] == '-') {
4754 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004755 if (ctxt->input->id != inputid) {
4756 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4757 "comment doesn't start and stop in the same entity\n");
4758 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004759 SKIP(3);
4760 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4761 (!ctxt->disableSAX)) {
4762 if (buf != NULL)
4763 ctxt->sax->comment(ctxt->userData, buf);
4764 else
4765 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4766 }
4767 if (buf != NULL)
4768 xmlFree(buf);
4769 ctxt->instate = state;
4770 return;
4771 }
4772 if (buf != NULL)
4773 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4774 "Comment not terminated \n<!--%.50s\n",
4775 buf);
4776 else
4777 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4778 "Comment not terminated \n", NULL);
4779 in++;
4780 ctxt->input->col++;
4781 }
4782 in++;
4783 ctxt->input->col++;
4784 goto get_more;
4785 }
4786 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4787 xmlParseCommentComplex(ctxt, buf, len, size);
4788 ctxt->instate = state;
4789 return;
4790}
4791
Owen Taylor3473f882001-02-23 17:55:21 +00004792
4793/**
4794 * xmlParsePITarget:
4795 * @ctxt: an XML parser context
4796 *
4797 * parse the name of a PI
4798 *
4799 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4800 *
4801 * Returns the PITarget name or NULL
4802 */
4803
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004804const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004805xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004806 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004807
4808 name = xmlParseName(ctxt);
4809 if ((name != NULL) &&
4810 ((name[0] == 'x') || (name[0] == 'X')) &&
4811 ((name[1] == 'm') || (name[1] == 'M')) &&
4812 ((name[2] == 'l') || (name[2] == 'L'))) {
4813 int i;
4814 if ((name[0] == 'x') && (name[1] == 'm') &&
4815 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004816 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004817 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004818 return(name);
4819 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004820 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004821 return(name);
4822 }
4823 for (i = 0;;i++) {
4824 if (xmlW3CPIs[i] == NULL) break;
4825 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4826 return(name);
4827 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004828 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4829 "xmlParsePITarget: invalid name prefix 'xml'\n",
4830 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004831 }
Daniel Veillard37334572008-07-31 08:20:02 +00004832 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4833 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4834 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4835 }
Owen Taylor3473f882001-02-23 17:55:21 +00004836 return(name);
4837}
4838
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004839#ifdef LIBXML_CATALOG_ENABLED
4840/**
4841 * xmlParseCatalogPI:
4842 * @ctxt: an XML parser context
4843 * @catalog: the PI value string
4844 *
4845 * parse an XML Catalog Processing Instruction.
4846 *
4847 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4848 *
4849 * Occurs only if allowed by the user and if happening in the Misc
4850 * part of the document before any doctype informations
4851 * This will add the given catalog to the parsing context in order
4852 * to be used if there is a resolution need further down in the document
4853 */
4854
4855static void
4856xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4857 xmlChar *URL = NULL;
4858 const xmlChar *tmp, *base;
4859 xmlChar marker;
4860
4861 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004862 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004863 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4864 goto error;
4865 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004866 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004867 if (*tmp != '=') {
4868 return;
4869 }
4870 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004871 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004872 marker = *tmp;
4873 if ((marker != '\'') && (marker != '"'))
4874 goto error;
4875 tmp++;
4876 base = tmp;
4877 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4878 if (*tmp == 0)
4879 goto error;
4880 URL = xmlStrndup(base, tmp - base);
4881 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004882 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004883 if (*tmp != 0)
4884 goto error;
4885
4886 if (URL != NULL) {
4887 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4888 xmlFree(URL);
4889 }
4890 return;
4891
4892error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004893 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4894 "Catalog PI syntax error: %s\n",
4895 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004896 if (URL != NULL)
4897 xmlFree(URL);
4898}
4899#endif
4900
Owen Taylor3473f882001-02-23 17:55:21 +00004901/**
4902 * xmlParsePI:
4903 * @ctxt: an XML parser context
4904 *
4905 * parse an XML Processing Instruction.
4906 *
4907 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4908 *
4909 * The processing is transfered to SAX once parsed.
4910 */
4911
4912void
4913xmlParsePI(xmlParserCtxtPtr ctxt) {
4914 xmlChar *buf = NULL;
4915 int len = 0;
4916 int size = XML_PARSER_BUFFER_SIZE;
4917 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004918 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004919 xmlParserInputState state;
4920 int count = 0;
4921
4922 if ((RAW == '<') && (NXT(1) == '?')) {
4923 xmlParserInputPtr input = ctxt->input;
4924 state = ctxt->instate;
4925 ctxt->instate = XML_PARSER_PI;
4926 /*
4927 * this is a Processing Instruction.
4928 */
4929 SKIP(2);
4930 SHRINK;
4931
4932 /*
4933 * Parse the target name and check for special support like
4934 * namespace.
4935 */
4936 target = xmlParsePITarget(ctxt);
4937 if (target != NULL) {
4938 if ((RAW == '?') && (NXT(1) == '>')) {
4939 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004940 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4941 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004942 }
4943 SKIP(2);
4944
4945 /*
4946 * SAX: PI detected.
4947 */
4948 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4949 (ctxt->sax->processingInstruction != NULL))
4950 ctxt->sax->processingInstruction(ctxt->userData,
4951 target, NULL);
4952 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004953 return;
4954 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004955 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004956 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004957 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004958 ctxt->instate = state;
4959 return;
4960 }
4961 cur = CUR;
4962 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004963 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4964 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004965 }
4966 SKIP_BLANKS;
4967 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004968 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004969 ((cur != '?') || (NXT(1) != '>'))) {
4970 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004971 xmlChar *tmp;
4972
Owen Taylor3473f882001-02-23 17:55:21 +00004973 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004974 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4975 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004976 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004977 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004978 ctxt->instate = state;
4979 return;
4980 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004981 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004982 }
4983 count++;
4984 if (count > 50) {
4985 GROW;
4986 count = 0;
4987 }
4988 COPY_BUF(l,buf,len,cur);
4989 NEXTL(l);
4990 cur = CUR_CHAR(l);
4991 if (cur == 0) {
4992 SHRINK;
4993 GROW;
4994 cur = CUR_CHAR(l);
4995 }
4996 }
4997 buf[len] = 0;
4998 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004999 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5000 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005001 } else {
5002 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005003 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5004 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005005 }
5006 SKIP(2);
5007
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005008#ifdef LIBXML_CATALOG_ENABLED
5009 if (((state == XML_PARSER_MISC) ||
5010 (state == XML_PARSER_START)) &&
5011 (xmlStrEqual(target, XML_CATALOG_PI))) {
5012 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5013 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5014 (allow == XML_CATA_ALLOW_ALL))
5015 xmlParseCatalogPI(ctxt, buf);
5016 }
5017#endif
5018
5019
Owen Taylor3473f882001-02-23 17:55:21 +00005020 /*
5021 * SAX: PI detected.
5022 */
5023 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5024 (ctxt->sax->processingInstruction != NULL))
5025 ctxt->sax->processingInstruction(ctxt->userData,
5026 target, buf);
5027 }
5028 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005029 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005030 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005031 }
5032 ctxt->instate = state;
5033 }
5034}
5035
5036/**
5037 * xmlParseNotationDecl:
5038 * @ctxt: an XML parser context
5039 *
5040 * parse a notation declaration
5041 *
5042 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5043 *
5044 * Hence there is actually 3 choices:
5045 * 'PUBLIC' S PubidLiteral
5046 * 'PUBLIC' S PubidLiteral S SystemLiteral
5047 * and 'SYSTEM' S SystemLiteral
5048 *
5049 * See the NOTE on xmlParseExternalID().
5050 */
5051
5052void
5053xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005054 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005055 xmlChar *Pubid;
5056 xmlChar *Systemid;
5057
Daniel Veillarda07050d2003-10-19 14:46:32 +00005058 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005059 xmlParserInputPtr input = ctxt->input;
5060 SHRINK;
5061 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005062 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005063 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5064 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005065 return;
5066 }
5067 SKIP_BLANKS;
5068
Daniel Veillard76d66f42001-05-16 21:05:17 +00005069 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005070 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005071 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005072 return;
5073 }
William M. Brack76e95df2003-10-18 16:20:14 +00005074 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005075 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005076 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005077 return;
5078 }
Daniel Veillard37334572008-07-31 08:20:02 +00005079 if (xmlStrchr(name, ':') != NULL) {
5080 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5081 "colon are forbidden from notation names '%s'\n",
5082 name, NULL, NULL);
5083 }
Owen Taylor3473f882001-02-23 17:55:21 +00005084 SKIP_BLANKS;
5085
5086 /*
5087 * Parse the IDs.
5088 */
5089 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5090 SKIP_BLANKS;
5091
5092 if (RAW == '>') {
5093 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005094 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5095 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005096 }
5097 NEXT;
5098 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5099 (ctxt->sax->notationDecl != NULL))
5100 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5101 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005102 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005103 }
Owen Taylor3473f882001-02-23 17:55:21 +00005104 if (Systemid != NULL) xmlFree(Systemid);
5105 if (Pubid != NULL) xmlFree(Pubid);
5106 }
5107}
5108
5109/**
5110 * xmlParseEntityDecl:
5111 * @ctxt: an XML parser context
5112 *
5113 * parse <!ENTITY declarations
5114 *
5115 * [70] EntityDecl ::= GEDecl | PEDecl
5116 *
5117 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5118 *
5119 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5120 *
5121 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5122 *
5123 * [74] PEDef ::= EntityValue | ExternalID
5124 *
5125 * [76] NDataDecl ::= S 'NDATA' S Name
5126 *
5127 * [ VC: Notation Declared ]
5128 * The Name must match the declared name of a notation.
5129 */
5130
5131void
5132xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005133 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005134 xmlChar *value = NULL;
5135 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005136 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005137 int isParameter = 0;
5138 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005139 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005140
Daniel Veillard4c778d82005-01-23 17:37:44 +00005141 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005142 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005143 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005144 SHRINK;
5145 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005146 skipped = SKIP_BLANKS;
5147 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005148 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5149 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005150 }
Owen Taylor3473f882001-02-23 17:55:21 +00005151
5152 if (RAW == '%') {
5153 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005154 skipped = SKIP_BLANKS;
5155 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005156 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5157 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005158 }
Owen Taylor3473f882001-02-23 17:55:21 +00005159 isParameter = 1;
5160 }
5161
Daniel Veillard76d66f42001-05-16 21:05:17 +00005162 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005163 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005164 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5165 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005166 return;
5167 }
Daniel Veillard37334572008-07-31 08:20:02 +00005168 if (xmlStrchr(name, ':') != NULL) {
5169 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5170 "colon are forbidden from entities names '%s'\n",
5171 name, NULL, NULL);
5172 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005173 skipped = SKIP_BLANKS;
5174 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005175 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5176 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005177 }
Owen Taylor3473f882001-02-23 17:55:21 +00005178
Daniel Veillardf5582f12002-06-11 10:08:16 +00005179 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005180 /*
5181 * handle the various case of definitions...
5182 */
5183 if (isParameter) {
5184 if ((RAW == '"') || (RAW == '\'')) {
5185 value = xmlParseEntityValue(ctxt, &orig);
5186 if (value) {
5187 if ((ctxt->sax != NULL) &&
5188 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5189 ctxt->sax->entityDecl(ctxt->userData, name,
5190 XML_INTERNAL_PARAMETER_ENTITY,
5191 NULL, NULL, value);
5192 }
5193 } else {
5194 URI = xmlParseExternalID(ctxt, &literal, 1);
5195 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005196 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005197 }
5198 if (URI) {
5199 xmlURIPtr uri;
5200
5201 uri = xmlParseURI((const char *) URI);
5202 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005203 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5204 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005205 /*
5206 * This really ought to be a well formedness error
5207 * but the XML Core WG decided otherwise c.f. issue
5208 * E26 of the XML erratas.
5209 */
Owen Taylor3473f882001-02-23 17:55:21 +00005210 } else {
5211 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005212 /*
5213 * Okay this is foolish to block those but not
5214 * invalid URIs.
5215 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005216 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005217 } else {
5218 if ((ctxt->sax != NULL) &&
5219 (!ctxt->disableSAX) &&
5220 (ctxt->sax->entityDecl != NULL))
5221 ctxt->sax->entityDecl(ctxt->userData, name,
5222 XML_EXTERNAL_PARAMETER_ENTITY,
5223 literal, URI, NULL);
5224 }
5225 xmlFreeURI(uri);
5226 }
5227 }
5228 }
5229 } else {
5230 if ((RAW == '"') || (RAW == '\'')) {
5231 value = xmlParseEntityValue(ctxt, &orig);
5232 if ((ctxt->sax != NULL) &&
5233 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5234 ctxt->sax->entityDecl(ctxt->userData, name,
5235 XML_INTERNAL_GENERAL_ENTITY,
5236 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005237 /*
5238 * For expat compatibility in SAX mode.
5239 */
5240 if ((ctxt->myDoc == NULL) ||
5241 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5242 if (ctxt->myDoc == NULL) {
5243 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005244 if (ctxt->myDoc == NULL) {
5245 xmlErrMemory(ctxt, "New Doc failed");
5246 return;
5247 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005248 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005249 }
5250 if (ctxt->myDoc->intSubset == NULL)
5251 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5252 BAD_CAST "fake", NULL, NULL);
5253
Daniel Veillard1af9a412003-08-20 22:54:39 +00005254 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5255 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005256 }
Owen Taylor3473f882001-02-23 17:55:21 +00005257 } else {
5258 URI = xmlParseExternalID(ctxt, &literal, 1);
5259 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005260 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005261 }
5262 if (URI) {
5263 xmlURIPtr uri;
5264
5265 uri = xmlParseURI((const char *)URI);
5266 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005267 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5268 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005269 /*
5270 * This really ought to be a well formedness error
5271 * but the XML Core WG decided otherwise c.f. issue
5272 * E26 of the XML erratas.
5273 */
Owen Taylor3473f882001-02-23 17:55:21 +00005274 } else {
5275 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005276 /*
5277 * Okay this is foolish to block those but not
5278 * invalid URIs.
5279 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005280 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005281 }
5282 xmlFreeURI(uri);
5283 }
5284 }
William M. Brack76e95df2003-10-18 16:20:14 +00005285 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005286 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5287 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005288 }
5289 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005290 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005291 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005292 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005293 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5294 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005295 }
5296 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005297 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005298 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299 (ctxt->sax->unparsedEntityDecl != NULL))
5300 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5301 literal, URI, ndata);
5302 } else {
5303 if ((ctxt->sax != NULL) &&
5304 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5305 ctxt->sax->entityDecl(ctxt->userData, name,
5306 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5307 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005308 /*
5309 * For expat compatibility in SAX mode.
5310 * assuming the entity repalcement was asked for
5311 */
5312 if ((ctxt->replaceEntities != 0) &&
5313 ((ctxt->myDoc == NULL) ||
5314 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5315 if (ctxt->myDoc == NULL) {
5316 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005317 if (ctxt->myDoc == NULL) {
5318 xmlErrMemory(ctxt, "New Doc failed");
5319 return;
5320 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005321 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005322 }
5323
5324 if (ctxt->myDoc->intSubset == NULL)
5325 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5326 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005327 xmlSAX2EntityDecl(ctxt, name,
5328 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5329 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005330 }
Owen Taylor3473f882001-02-23 17:55:21 +00005331 }
5332 }
5333 }
5334 SKIP_BLANKS;
5335 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005336 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005337 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005338 } else {
5339 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005340 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5341 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005342 }
5343 NEXT;
5344 }
5345 if (orig != NULL) {
5346 /*
5347 * Ugly mechanism to save the raw entity value.
5348 */
5349 xmlEntityPtr cur = NULL;
5350
5351 if (isParameter) {
5352 if ((ctxt->sax != NULL) &&
5353 (ctxt->sax->getParameterEntity != NULL))
5354 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5355 } else {
5356 if ((ctxt->sax != NULL) &&
5357 (ctxt->sax->getEntity != NULL))
5358 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005359 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005360 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005361 }
Owen Taylor3473f882001-02-23 17:55:21 +00005362 }
5363 if (cur != NULL) {
5364 if (cur->orig != NULL)
5365 xmlFree(orig);
5366 else
5367 cur->orig = orig;
5368 } else
5369 xmlFree(orig);
5370 }
Owen Taylor3473f882001-02-23 17:55:21 +00005371 if (value != NULL) xmlFree(value);
5372 if (URI != NULL) xmlFree(URI);
5373 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005374 }
5375}
5376
5377/**
5378 * xmlParseDefaultDecl:
5379 * @ctxt: an XML parser context
5380 * @value: Receive a possible fixed default value for the attribute
5381 *
5382 * Parse an attribute default declaration
5383 *
5384 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5385 *
5386 * [ VC: Required Attribute ]
5387 * if the default declaration is the keyword #REQUIRED, then the
5388 * attribute must be specified for all elements of the type in the
5389 * attribute-list declaration.
5390 *
5391 * [ VC: Attribute Default Legal ]
5392 * The declared default value must meet the lexical constraints of
5393 * the declared attribute type c.f. xmlValidateAttributeDecl()
5394 *
5395 * [ VC: Fixed Attribute Default ]
5396 * if an attribute has a default value declared with the #FIXED
5397 * keyword, instances of that attribute must match the default value.
5398 *
5399 * [ WFC: No < in Attribute Values ]
5400 * handled in xmlParseAttValue()
5401 *
5402 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5403 * or XML_ATTRIBUTE_FIXED.
5404 */
5405
5406int
5407xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5408 int val;
5409 xmlChar *ret;
5410
5411 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005412 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005413 SKIP(9);
5414 return(XML_ATTRIBUTE_REQUIRED);
5415 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005416 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005417 SKIP(8);
5418 return(XML_ATTRIBUTE_IMPLIED);
5419 }
5420 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005421 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005422 SKIP(6);
5423 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005424 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005425 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5426 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005427 }
5428 SKIP_BLANKS;
5429 }
5430 ret = xmlParseAttValue(ctxt);
5431 ctxt->instate = XML_PARSER_DTD;
5432 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005433 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005434 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005435 } else
5436 *value = ret;
5437 return(val);
5438}
5439
5440/**
5441 * xmlParseNotationType:
5442 * @ctxt: an XML parser context
5443 *
5444 * parse an Notation attribute type.
5445 *
5446 * Note: the leading 'NOTATION' S part has already being parsed...
5447 *
5448 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5449 *
5450 * [ VC: Notation Attributes ]
5451 * Values of this type must match one of the notation names included
5452 * in the declaration; all notation names in the declaration must be declared.
5453 *
5454 * Returns: the notation attribute tree built while parsing
5455 */
5456
5457xmlEnumerationPtr
5458xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005459 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005460 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005461
5462 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005463 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005464 return(NULL);
5465 }
5466 SHRINK;
5467 do {
5468 NEXT;
5469 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005470 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005471 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005472 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5473 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005474 xmlFreeEnumeration(ret);
5475 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005476 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005477 tmp = ret;
5478 while (tmp != NULL) {
5479 if (xmlStrEqual(name, tmp->name)) {
5480 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5481 "standalone: attribute notation value token %s duplicated\n",
5482 name, NULL);
5483 if (!xmlDictOwns(ctxt->dict, name))
5484 xmlFree((xmlChar *) name);
5485 break;
5486 }
5487 tmp = tmp->next;
5488 }
5489 if (tmp == NULL) {
5490 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005491 if (cur == NULL) {
5492 xmlFreeEnumeration(ret);
5493 return(NULL);
5494 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005495 if (last == NULL) ret = last = cur;
5496 else {
5497 last->next = cur;
5498 last = cur;
5499 }
Owen Taylor3473f882001-02-23 17:55:21 +00005500 }
5501 SKIP_BLANKS;
5502 } while (RAW == '|');
5503 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005504 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005505 xmlFreeEnumeration(ret);
5506 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005507 }
5508 NEXT;
5509 return(ret);
5510}
5511
5512/**
5513 * xmlParseEnumerationType:
5514 * @ctxt: an XML parser context
5515 *
5516 * parse an Enumeration attribute type.
5517 *
5518 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5519 *
5520 * [ VC: Enumeration ]
5521 * Values of this type must match one of the Nmtoken tokens in
5522 * the declaration
5523 *
5524 * Returns: the enumeration attribute tree built while parsing
5525 */
5526
5527xmlEnumerationPtr
5528xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5529 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005530 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005531
5532 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005533 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005534 return(NULL);
5535 }
5536 SHRINK;
5537 do {
5538 NEXT;
5539 SKIP_BLANKS;
5540 name = xmlParseNmtoken(ctxt);
5541 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005542 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005543 return(ret);
5544 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005545 tmp = ret;
5546 while (tmp != NULL) {
5547 if (xmlStrEqual(name, tmp->name)) {
5548 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5549 "standalone: attribute enumeration value token %s duplicated\n",
5550 name, NULL);
5551 if (!xmlDictOwns(ctxt->dict, name))
5552 xmlFree(name);
5553 break;
5554 }
5555 tmp = tmp->next;
5556 }
5557 if (tmp == NULL) {
5558 cur = xmlCreateEnumeration(name);
5559 if (!xmlDictOwns(ctxt->dict, name))
5560 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005561 if (cur == NULL) {
5562 xmlFreeEnumeration(ret);
5563 return(NULL);
5564 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005565 if (last == NULL) ret = last = cur;
5566 else {
5567 last->next = cur;
5568 last = cur;
5569 }
Owen Taylor3473f882001-02-23 17:55:21 +00005570 }
5571 SKIP_BLANKS;
5572 } while (RAW == '|');
5573 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005574 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005575 return(ret);
5576 }
5577 NEXT;
5578 return(ret);
5579}
5580
5581/**
5582 * xmlParseEnumeratedType:
5583 * @ctxt: an XML parser context
5584 * @tree: the enumeration tree built while parsing
5585 *
5586 * parse an Enumerated attribute type.
5587 *
5588 * [57] EnumeratedType ::= NotationType | Enumeration
5589 *
5590 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5591 *
5592 *
5593 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5594 */
5595
5596int
5597xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005598 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005599 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005600 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005601 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5602 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005603 return(0);
5604 }
5605 SKIP_BLANKS;
5606 *tree = xmlParseNotationType(ctxt);
5607 if (*tree == NULL) return(0);
5608 return(XML_ATTRIBUTE_NOTATION);
5609 }
5610 *tree = xmlParseEnumerationType(ctxt);
5611 if (*tree == NULL) return(0);
5612 return(XML_ATTRIBUTE_ENUMERATION);
5613}
5614
5615/**
5616 * xmlParseAttributeType:
5617 * @ctxt: an XML parser context
5618 * @tree: the enumeration tree built while parsing
5619 *
5620 * parse the Attribute list def for an element
5621 *
5622 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5623 *
5624 * [55] StringType ::= 'CDATA'
5625 *
5626 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5627 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5628 *
5629 * Validity constraints for attribute values syntax are checked in
5630 * xmlValidateAttributeValue()
5631 *
5632 * [ VC: ID ]
5633 * Values of type ID must match the Name production. A name must not
5634 * appear more than once in an XML document as a value of this type;
5635 * i.e., ID values must uniquely identify the elements which bear them.
5636 *
5637 * [ VC: One ID per Element Type ]
5638 * No element type may have more than one ID attribute specified.
5639 *
5640 * [ VC: ID Attribute Default ]
5641 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5642 *
5643 * [ VC: IDREF ]
5644 * Values of type IDREF must match the Name production, and values
5645 * of type IDREFS must match Names; each IDREF Name must match the value
5646 * of an ID attribute on some element in the XML document; i.e. IDREF
5647 * values must match the value of some ID attribute.
5648 *
5649 * [ VC: Entity Name ]
5650 * Values of type ENTITY must match the Name production, values
5651 * of type ENTITIES must match Names; each Entity Name must match the
5652 * name of an unparsed entity declared in the DTD.
5653 *
5654 * [ VC: Name Token ]
5655 * Values of type NMTOKEN must match the Nmtoken production; values
5656 * of type NMTOKENS must match Nmtokens.
5657 *
5658 * Returns the attribute type
5659 */
5660int
5661xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5662 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005663 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005664 SKIP(5);
5665 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005666 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005667 SKIP(6);
5668 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005669 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005670 SKIP(5);
5671 return(XML_ATTRIBUTE_IDREF);
5672 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5673 SKIP(2);
5674 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005675 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005676 SKIP(6);
5677 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005678 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005679 SKIP(8);
5680 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005681 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005682 SKIP(8);
5683 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005684 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005685 SKIP(7);
5686 return(XML_ATTRIBUTE_NMTOKEN);
5687 }
5688 return(xmlParseEnumeratedType(ctxt, tree));
5689}
5690
5691/**
5692 * xmlParseAttributeListDecl:
5693 * @ctxt: an XML parser context
5694 *
5695 * : parse the Attribute list def for an element
5696 *
5697 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5698 *
5699 * [53] AttDef ::= S Name S AttType S DefaultDecl
5700 *
5701 */
5702void
5703xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005704 const xmlChar *elemName;
5705 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005706 xmlEnumerationPtr tree;
5707
Daniel Veillarda07050d2003-10-19 14:46:32 +00005708 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005709 xmlParserInputPtr input = ctxt->input;
5710
5711 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005712 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005714 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005715 }
5716 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005717 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005718 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005719 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5720 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005721 return;
5722 }
5723 SKIP_BLANKS;
5724 GROW;
5725 while (RAW != '>') {
5726 const xmlChar *check = CUR_PTR;
5727 int type;
5728 int def;
5729 xmlChar *defaultValue = NULL;
5730
5731 GROW;
5732 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005733 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005734 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005735 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5736 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005737 break;
5738 }
5739 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005740 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005741 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005742 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005743 break;
5744 }
5745 SKIP_BLANKS;
5746
5747 type = xmlParseAttributeType(ctxt, &tree);
5748 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005749 break;
5750 }
5751
5752 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005753 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005754 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5755 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005756 if (tree != NULL)
5757 xmlFreeEnumeration(tree);
5758 break;
5759 }
5760 SKIP_BLANKS;
5761
5762 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5763 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005764 if (defaultValue != NULL)
5765 xmlFree(defaultValue);
5766 if (tree != NULL)
5767 xmlFreeEnumeration(tree);
5768 break;
5769 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005770 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5771 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005772
5773 GROW;
5774 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005775 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005776 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005777 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005778 if (defaultValue != NULL)
5779 xmlFree(defaultValue);
5780 if (tree != NULL)
5781 xmlFreeEnumeration(tree);
5782 break;
5783 }
5784 SKIP_BLANKS;
5785 }
5786 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005787 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5788 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005789 if (defaultValue != NULL)
5790 xmlFree(defaultValue);
5791 if (tree != NULL)
5792 xmlFreeEnumeration(tree);
5793 break;
5794 }
5795 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5796 (ctxt->sax->attributeDecl != NULL))
5797 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5798 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005799 else if (tree != NULL)
5800 xmlFreeEnumeration(tree);
5801
5802 if ((ctxt->sax2) && (defaultValue != NULL) &&
5803 (def != XML_ATTRIBUTE_IMPLIED) &&
5804 (def != XML_ATTRIBUTE_REQUIRED)) {
5805 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5806 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005807 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005808 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5809 }
Owen Taylor3473f882001-02-23 17:55:21 +00005810 if (defaultValue != NULL)
5811 xmlFree(defaultValue);
5812 GROW;
5813 }
5814 if (RAW == '>') {
5815 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005816 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5817 "Attribute list declaration doesn't start and stop in the same entity\n",
5818 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005819 }
5820 NEXT;
5821 }
Owen Taylor3473f882001-02-23 17:55:21 +00005822 }
5823}
5824
5825/**
5826 * xmlParseElementMixedContentDecl:
5827 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005828 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005829 *
5830 * parse the declaration for a Mixed Element content
5831 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5832 *
5833 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5834 * '(' S? '#PCDATA' S? ')'
5835 *
5836 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5837 *
5838 * [ VC: No Duplicate Types ]
5839 * The same name must not appear more than once in a single
5840 * mixed-content declaration.
5841 *
5842 * returns: the list of the xmlElementContentPtr describing the element choices
5843 */
5844xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005845xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005846 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005847 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005848
5849 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005850 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005851 SKIP(7);
5852 SKIP_BLANKS;
5853 SHRINK;
5854 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005855 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005856 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5857"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005858 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005859 }
Owen Taylor3473f882001-02-23 17:55:21 +00005860 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005861 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005862 if (ret == NULL)
5863 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005864 if (RAW == '*') {
5865 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5866 NEXT;
5867 }
5868 return(ret);
5869 }
5870 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005871 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005872 if (ret == NULL) return(NULL);
5873 }
5874 while (RAW == '|') {
5875 NEXT;
5876 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005877 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005878 if (ret == NULL) return(NULL);
5879 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005880 if (cur != NULL)
5881 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005882 cur = ret;
5883 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005884 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005885 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005886 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005887 if (n->c1 != NULL)
5888 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005889 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005890 if (n != NULL)
5891 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005892 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005893 }
5894 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005895 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005896 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005897 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005898 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005899 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005900 return(NULL);
5901 }
5902 SKIP_BLANKS;
5903 GROW;
5904 }
5905 if ((RAW == ')') && (NXT(1) == '*')) {
5906 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005907 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005908 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005909 if (cur->c2 != NULL)
5910 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005911 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02005912 if (ret != NULL)
5913 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005914 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005915 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5916"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005917 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005918 }
Owen Taylor3473f882001-02-23 17:55:21 +00005919 SKIP(2);
5920 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005921 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005922 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005923 return(NULL);
5924 }
5925
5926 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005927 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005928 }
5929 return(ret);
5930}
5931
5932/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005933 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005934 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005935 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005936 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005937 *
5938 * parse the declaration for a Mixed Element content
5939 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5940 *
5941 *
5942 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5943 *
5944 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5945 *
5946 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5947 *
5948 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5949 *
5950 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5951 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005952 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005953 * opening or closing parentheses in a choice, seq, or Mixed
5954 * construct is contained in the replacement text for a parameter
5955 * entity, both must be contained in the same replacement text. For
5956 * interoperability, if a parameter-entity reference appears in a
5957 * choice, seq, or Mixed construct, its replacement text should not
5958 * be empty, and neither the first nor last non-blank character of
5959 * the replacement text should be a connector (| or ,).
5960 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005961 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005962 * hierarchy.
5963 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005964static xmlElementContentPtr
5965xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5966 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005967 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005968 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005969 xmlChar type = 0;
5970
Daniel Veillard489f9672009-08-10 16:49:30 +02005971 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5972 (depth > 2048)) {
5973 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5974"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5975 depth);
5976 return(NULL);
5977 }
Owen Taylor3473f882001-02-23 17:55:21 +00005978 SKIP_BLANKS;
5979 GROW;
5980 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005981 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005982
Owen Taylor3473f882001-02-23 17:55:21 +00005983 /* Recurse on first child */
5984 NEXT;
5985 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005986 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5987 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005988 SKIP_BLANKS;
5989 GROW;
5990 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005991 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005992 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005993 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005994 return(NULL);
5995 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005996 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005997 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005998 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005999 return(NULL);
6000 }
Owen Taylor3473f882001-02-23 17:55:21 +00006001 GROW;
6002 if (RAW == '?') {
6003 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6004 NEXT;
6005 } else if (RAW == '*') {
6006 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6007 NEXT;
6008 } else if (RAW == '+') {
6009 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6010 NEXT;
6011 } else {
6012 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6013 }
Owen Taylor3473f882001-02-23 17:55:21 +00006014 GROW;
6015 }
6016 SKIP_BLANKS;
6017 SHRINK;
6018 while (RAW != ')') {
6019 /*
6020 * Each loop we parse one separator and one element.
6021 */
6022 if (RAW == ',') {
6023 if (type == 0) type = CUR;
6024
6025 /*
6026 * Detect "Name | Name , Name" error
6027 */
6028 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006029 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006030 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006031 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006032 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006033 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006034 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006035 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006036 return(NULL);
6037 }
6038 NEXT;
6039
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006040 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006041 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006042 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006043 xmlFreeDocElementContent(ctxt->myDoc, last);
6044 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006045 return(NULL);
6046 }
6047 if (last == NULL) {
6048 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006049 if (ret != NULL)
6050 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006051 ret = cur = op;
6052 } else {
6053 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006054 if (op != NULL)
6055 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006056 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006057 if (last != NULL)
6058 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006059 cur =op;
6060 last = NULL;
6061 }
6062 } else if (RAW == '|') {
6063 if (type == 0) type = CUR;
6064
6065 /*
6066 * Detect "Name , Name | Name" error
6067 */
6068 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006069 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006070 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006071 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006072 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006073 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006074 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006075 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006076 return(NULL);
6077 }
6078 NEXT;
6079
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006080 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006081 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006082 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006083 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006084 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006085 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006086 return(NULL);
6087 }
6088 if (last == NULL) {
6089 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006090 if (ret != NULL)
6091 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006092 ret = cur = op;
6093 } else {
6094 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006095 if (op != NULL)
6096 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006097 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006098 if (last != NULL)
6099 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006100 cur =op;
6101 last = NULL;
6102 }
6103 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006104 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006105 if ((last != NULL) && (last != ret))
6106 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006107 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006108 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006109 return(NULL);
6110 }
6111 GROW;
6112 SKIP_BLANKS;
6113 GROW;
6114 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006115 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006116 /* Recurse on second child */
6117 NEXT;
6118 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006119 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6120 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006121 SKIP_BLANKS;
6122 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006123 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006124 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006125 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006126 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006127 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006128 return(NULL);
6129 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006130 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006131 if (last == NULL) {
6132 if (ret != NULL)
6133 xmlFreeDocElementContent(ctxt->myDoc, ret);
6134 return(NULL);
6135 }
Owen Taylor3473f882001-02-23 17:55:21 +00006136 if (RAW == '?') {
6137 last->ocur = XML_ELEMENT_CONTENT_OPT;
6138 NEXT;
6139 } else if (RAW == '*') {
6140 last->ocur = XML_ELEMENT_CONTENT_MULT;
6141 NEXT;
6142 } else if (RAW == '+') {
6143 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6144 NEXT;
6145 } else {
6146 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6147 }
6148 }
6149 SKIP_BLANKS;
6150 GROW;
6151 }
6152 if ((cur != NULL) && (last != NULL)) {
6153 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006154 if (last != NULL)
6155 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006156 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006157 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006158 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6159"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006160 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006161 }
Owen Taylor3473f882001-02-23 17:55:21 +00006162 NEXT;
6163 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006164 if (ret != NULL) {
6165 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6166 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6167 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6168 else
6169 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6170 }
Owen Taylor3473f882001-02-23 17:55:21 +00006171 NEXT;
6172 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006173 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006174 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006175 cur = ret;
6176 /*
6177 * Some normalization:
6178 * (a | b* | c?)* == (a | b | c)*
6179 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006180 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006181 if ((cur->c1 != NULL) &&
6182 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6183 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6184 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6185 if ((cur->c2 != NULL) &&
6186 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6187 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6188 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6189 cur = cur->c2;
6190 }
6191 }
Owen Taylor3473f882001-02-23 17:55:21 +00006192 NEXT;
6193 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006194 if (ret != NULL) {
6195 int found = 0;
6196
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006197 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6198 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6199 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006200 else
6201 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006202 /*
6203 * Some normalization:
6204 * (a | b*)+ == (a | b)*
6205 * (a | b?)+ == (a | b)*
6206 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006207 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006208 if ((cur->c1 != NULL) &&
6209 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6211 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6212 found = 1;
6213 }
6214 if ((cur->c2 != NULL) &&
6215 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6216 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6217 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6218 found = 1;
6219 }
6220 cur = cur->c2;
6221 }
6222 if (found)
6223 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6224 }
Owen Taylor3473f882001-02-23 17:55:21 +00006225 NEXT;
6226 }
6227 return(ret);
6228}
6229
6230/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006231 * xmlParseElementChildrenContentDecl:
6232 * @ctxt: an XML parser context
6233 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006234 *
6235 * parse the declaration for a Mixed Element content
6236 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6237 *
6238 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6239 *
6240 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6241 *
6242 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6243 *
6244 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6245 *
6246 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6247 * TODO Parameter-entity replacement text must be properly nested
6248 * with parenthesized groups. That is to say, if either of the
6249 * opening or closing parentheses in a choice, seq, or Mixed
6250 * construct is contained in the replacement text for a parameter
6251 * entity, both must be contained in the same replacement text. For
6252 * interoperability, if a parameter-entity reference appears in a
6253 * choice, seq, or Mixed construct, its replacement text should not
6254 * be empty, and neither the first nor last non-blank character of
6255 * the replacement text should be a connector (| or ,).
6256 *
6257 * Returns the tree of xmlElementContentPtr describing the element
6258 * hierarchy.
6259 */
6260xmlElementContentPtr
6261xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6262 /* stub left for API/ABI compat */
6263 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6264}
6265
6266/**
Owen Taylor3473f882001-02-23 17:55:21 +00006267 * xmlParseElementContentDecl:
6268 * @ctxt: an XML parser context
6269 * @name: the name of the element being defined.
6270 * @result: the Element Content pointer will be stored here if any
6271 *
6272 * parse the declaration for an Element content either Mixed or Children,
6273 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6274 *
6275 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6276 *
6277 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6278 */
6279
6280int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006281xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006282 xmlElementContentPtr *result) {
6283
6284 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006285 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006286 int res;
6287
6288 *result = NULL;
6289
6290 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006291 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006292 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006293 return(-1);
6294 }
6295 NEXT;
6296 GROW;
6297 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006298 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006299 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006300 res = XML_ELEMENT_TYPE_MIXED;
6301 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006302 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006303 res = XML_ELEMENT_TYPE_ELEMENT;
6304 }
Owen Taylor3473f882001-02-23 17:55:21 +00006305 SKIP_BLANKS;
6306 *result = tree;
6307 return(res);
6308}
6309
6310/**
6311 * xmlParseElementDecl:
6312 * @ctxt: an XML parser context
6313 *
6314 * parse an Element declaration.
6315 *
6316 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6317 *
6318 * [ VC: Unique Element Type Declaration ]
6319 * No element type may be declared more than once
6320 *
6321 * Returns the type of the element, or -1 in case of error
6322 */
6323int
6324xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006325 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006326 int ret = -1;
6327 xmlElementContentPtr content = NULL;
6328
Daniel Veillard4c778d82005-01-23 17:37:44 +00006329 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006330 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006331 xmlParserInputPtr input = ctxt->input;
6332
6333 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006334 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006335 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6336 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006337 }
6338 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006339 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006340 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006341 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6342 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006343 return(-1);
6344 }
6345 while ((RAW == 0) && (ctxt->inputNr > 1))
6346 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006347 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006348 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6349 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006350 }
6351 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006352 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006353 SKIP(5);
6354 /*
6355 * Element must always be empty.
6356 */
6357 ret = XML_ELEMENT_TYPE_EMPTY;
6358 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6359 (NXT(2) == 'Y')) {
6360 SKIP(3);
6361 /*
6362 * Element is a generic container.
6363 */
6364 ret = XML_ELEMENT_TYPE_ANY;
6365 } else if (RAW == '(') {
6366 ret = xmlParseElementContentDecl(ctxt, name, &content);
6367 } else {
6368 /*
6369 * [ WFC: PEs in Internal Subset ] error handling.
6370 */
6371 if ((RAW == '%') && (ctxt->external == 0) &&
6372 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006373 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006374 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006375 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006376 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006377 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6378 }
Owen Taylor3473f882001-02-23 17:55:21 +00006379 return(-1);
6380 }
6381
6382 SKIP_BLANKS;
6383 /*
6384 * Pop-up of finished entities.
6385 */
6386 while ((RAW == 0) && (ctxt->inputNr > 1))
6387 xmlPopInput(ctxt);
6388 SKIP_BLANKS;
6389
6390 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006391 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006392 if (content != NULL) {
6393 xmlFreeDocElementContent(ctxt->myDoc, content);
6394 }
Owen Taylor3473f882001-02-23 17:55:21 +00006395 } else {
6396 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006397 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6398 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006399 }
6400
6401 NEXT;
6402 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006403 (ctxt->sax->elementDecl != NULL)) {
6404 if (content != NULL)
6405 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006406 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6407 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006408 if ((content != NULL) && (content->parent == NULL)) {
6409 /*
6410 * this is a trick: if xmlAddElementDecl is called,
6411 * instead of copying the full tree it is plugged directly
6412 * if called from the parser. Avoid duplicating the
6413 * interfaces or change the API/ABI
6414 */
6415 xmlFreeDocElementContent(ctxt->myDoc, content);
6416 }
6417 } else if (content != NULL) {
6418 xmlFreeDocElementContent(ctxt->myDoc, content);
6419 }
Owen Taylor3473f882001-02-23 17:55:21 +00006420 }
Owen Taylor3473f882001-02-23 17:55:21 +00006421 }
6422 return(ret);
6423}
6424
6425/**
Owen Taylor3473f882001-02-23 17:55:21 +00006426 * xmlParseConditionalSections
6427 * @ctxt: an XML parser context
6428 *
6429 * [61] conditionalSect ::= includeSect | ignoreSect
6430 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6431 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6432 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6433 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6434 */
6435
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006436static void
Owen Taylor3473f882001-02-23 17:55:21 +00006437xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006438 int id = ctxt->input->id;
6439
Owen Taylor3473f882001-02-23 17:55:21 +00006440 SKIP(3);
6441 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006442 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006443 SKIP(7);
6444 SKIP_BLANKS;
6445 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006446 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006447 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006448 if (ctxt->input->id != id) {
6449 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6450 "All markup of the conditional section is not in the same entity\n",
6451 NULL, NULL);
6452 }
Owen Taylor3473f882001-02-23 17:55:21 +00006453 NEXT;
6454 }
6455 if (xmlParserDebugEntities) {
6456 if ((ctxt->input != NULL) && (ctxt->input->filename))
6457 xmlGenericError(xmlGenericErrorContext,
6458 "%s(%d): ", ctxt->input->filename,
6459 ctxt->input->line);
6460 xmlGenericError(xmlGenericErrorContext,
6461 "Entering INCLUDE Conditional Section\n");
6462 }
6463
6464 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6465 (NXT(2) != '>'))) {
6466 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006467 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006468
6469 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6470 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006471 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006472 NEXT;
6473 } else if (RAW == '%') {
6474 xmlParsePEReference(ctxt);
6475 } else
6476 xmlParseMarkupDecl(ctxt);
6477
6478 /*
6479 * Pop-up of finished entities.
6480 */
6481 while ((RAW == 0) && (ctxt->inputNr > 1))
6482 xmlPopInput(ctxt);
6483
Daniel Veillardfdc91562002-07-01 21:52:03 +00006484 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006485 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006486 break;
6487 }
6488 }
6489 if (xmlParserDebugEntities) {
6490 if ((ctxt->input != NULL) && (ctxt->input->filename))
6491 xmlGenericError(xmlGenericErrorContext,
6492 "%s(%d): ", ctxt->input->filename,
6493 ctxt->input->line);
6494 xmlGenericError(xmlGenericErrorContext,
6495 "Leaving INCLUDE Conditional Section\n");
6496 }
6497
Daniel Veillarda07050d2003-10-19 14:46:32 +00006498 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006499 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006500 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006501 int depth = 0;
6502
6503 SKIP(6);
6504 SKIP_BLANKS;
6505 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006506 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006507 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006508 if (ctxt->input->id != id) {
6509 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6510 "All markup of the conditional section is not in the same entity\n",
6511 NULL, NULL);
6512 }
Owen Taylor3473f882001-02-23 17:55:21 +00006513 NEXT;
6514 }
6515 if (xmlParserDebugEntities) {
6516 if ((ctxt->input != NULL) && (ctxt->input->filename))
6517 xmlGenericError(xmlGenericErrorContext,
6518 "%s(%d): ", ctxt->input->filename,
6519 ctxt->input->line);
6520 xmlGenericError(xmlGenericErrorContext,
6521 "Entering IGNORE Conditional Section\n");
6522 }
6523
6524 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006525 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006526 * But disable SAX event generating DTD building in the meantime
6527 */
6528 state = ctxt->disableSAX;
6529 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006530 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006531 ctxt->instate = XML_PARSER_IGNORE;
6532
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006533 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006534 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6535 depth++;
6536 SKIP(3);
6537 continue;
6538 }
6539 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6540 if (--depth >= 0) SKIP(3);
6541 continue;
6542 }
6543 NEXT;
6544 continue;
6545 }
6546
6547 ctxt->disableSAX = state;
6548 ctxt->instate = instate;
6549
6550 if (xmlParserDebugEntities) {
6551 if ((ctxt->input != NULL) && (ctxt->input->filename))
6552 xmlGenericError(xmlGenericErrorContext,
6553 "%s(%d): ", ctxt->input->filename,
6554 ctxt->input->line);
6555 xmlGenericError(xmlGenericErrorContext,
6556 "Leaving IGNORE Conditional Section\n");
6557 }
6558
6559 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006560 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006561 }
6562
6563 if (RAW == 0)
6564 SHRINK;
6565
6566 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006567 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006568 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006569 if (ctxt->input->id != id) {
6570 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6571 "All markup of the conditional section is not in the same entity\n",
6572 NULL, NULL);
6573 }
Owen Taylor3473f882001-02-23 17:55:21 +00006574 SKIP(3);
6575 }
6576}
6577
6578/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006579 * xmlParseMarkupDecl:
6580 * @ctxt: an XML parser context
6581 *
6582 * parse Markup declarations
6583 *
6584 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6585 * NotationDecl | PI | Comment
6586 *
6587 * [ VC: Proper Declaration/PE Nesting ]
6588 * Parameter-entity replacement text must be properly nested with
6589 * markup declarations. That is to say, if either the first character
6590 * or the last character of a markup declaration (markupdecl above) is
6591 * contained in the replacement text for a parameter-entity reference,
6592 * both must be contained in the same replacement text.
6593 *
6594 * [ WFC: PEs in Internal Subset ]
6595 * In the internal DTD subset, parameter-entity references can occur
6596 * only where markup declarations can occur, not within markup declarations.
6597 * (This does not apply to references that occur in external parameter
6598 * entities or to the external subset.)
6599 */
6600void
6601xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6602 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006603 if (CUR == '<') {
6604 if (NXT(1) == '!') {
6605 switch (NXT(2)) {
6606 case 'E':
6607 if (NXT(3) == 'L')
6608 xmlParseElementDecl(ctxt);
6609 else if (NXT(3) == 'N')
6610 xmlParseEntityDecl(ctxt);
6611 break;
6612 case 'A':
6613 xmlParseAttributeListDecl(ctxt);
6614 break;
6615 case 'N':
6616 xmlParseNotationDecl(ctxt);
6617 break;
6618 case '-':
6619 xmlParseComment(ctxt);
6620 break;
6621 default:
6622 /* there is an error but it will be detected later */
6623 break;
6624 }
6625 } else if (NXT(1) == '?') {
6626 xmlParsePI(ctxt);
6627 }
6628 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006629 /*
6630 * This is only for internal subset. On external entities,
6631 * the replacement is done before parsing stage
6632 */
6633 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6634 xmlParsePEReference(ctxt);
6635
6636 /*
6637 * Conditional sections are allowed from entities included
6638 * by PE References in the internal subset.
6639 */
6640 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6641 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6642 xmlParseConditionalSections(ctxt);
6643 }
6644 }
6645
6646 ctxt->instate = XML_PARSER_DTD;
6647}
6648
6649/**
6650 * xmlParseTextDecl:
6651 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006652 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006653 * parse an XML declaration header for external entities
6654 *
6655 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006656 */
6657
6658void
6659xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6660 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006661 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006662
6663 /*
6664 * We know that '<?xml' is here.
6665 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006666 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006667 SKIP(5);
6668 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006669 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006670 return;
6671 }
6672
William M. Brack76e95df2003-10-18 16:20:14 +00006673 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006674 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6675 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006676 }
6677 SKIP_BLANKS;
6678
6679 /*
6680 * We may have the VersionInfo here.
6681 */
6682 version = xmlParseVersionInfo(ctxt);
6683 if (version == NULL)
6684 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006685 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006686 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6688 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006689 }
6690 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006691 ctxt->input->version = version;
6692
6693 /*
6694 * We must have the encoding declaration
6695 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006696 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006697 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6698 /*
6699 * The XML REC instructs us to stop parsing right here
6700 */
6701 return;
6702 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006703 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6704 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6705 "Missing encoding in text declaration\n");
6706 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006707
6708 SKIP_BLANKS;
6709 if ((RAW == '?') && (NXT(1) == '>')) {
6710 SKIP(2);
6711 } else if (RAW == '>') {
6712 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006713 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006714 NEXT;
6715 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006716 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006717 MOVETO_ENDTAG(CUR_PTR);
6718 NEXT;
6719 }
6720}
6721
6722/**
Owen Taylor3473f882001-02-23 17:55:21 +00006723 * xmlParseExternalSubset:
6724 * @ctxt: an XML parser context
6725 * @ExternalID: the external identifier
6726 * @SystemID: the system identifier (or URL)
6727 *
6728 * parse Markup declarations from an external subset
6729 *
6730 * [30] extSubset ::= textDecl? extSubsetDecl
6731 *
6732 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6733 */
6734void
6735xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6736 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006737 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006738 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006739
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006740 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006741 (ctxt->input->end - ctxt->input->cur >= 4)) {
6742 xmlChar start[4];
6743 xmlCharEncoding enc;
6744
6745 start[0] = RAW;
6746 start[1] = NXT(1);
6747 start[2] = NXT(2);
6748 start[3] = NXT(3);
6749 enc = xmlDetectCharEncoding(start, 4);
6750 if (enc != XML_CHAR_ENCODING_NONE)
6751 xmlSwitchEncoding(ctxt, enc);
6752 }
6753
Daniel Veillarda07050d2003-10-19 14:46:32 +00006754 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006755 xmlParseTextDecl(ctxt);
6756 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6757 /*
6758 * The XML REC instructs us to stop parsing right here
6759 */
6760 ctxt->instate = XML_PARSER_EOF;
6761 return;
6762 }
6763 }
6764 if (ctxt->myDoc == NULL) {
6765 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006766 if (ctxt->myDoc == NULL) {
6767 xmlErrMemory(ctxt, "New Doc failed");
6768 return;
6769 }
6770 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006771 }
6772 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6773 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6774
6775 ctxt->instate = XML_PARSER_DTD;
6776 ctxt->external = 1;
6777 while (((RAW == '<') && (NXT(1) == '?')) ||
6778 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006779 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006780 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006781 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006782
6783 GROW;
6784 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6785 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006786 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006787 NEXT;
6788 } else if (RAW == '%') {
6789 xmlParsePEReference(ctxt);
6790 } else
6791 xmlParseMarkupDecl(ctxt);
6792
6793 /*
6794 * Pop-up of finished entities.
6795 */
6796 while ((RAW == 0) && (ctxt->inputNr > 1))
6797 xmlPopInput(ctxt);
6798
Daniel Veillardfdc91562002-07-01 21:52:03 +00006799 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006800 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006801 break;
6802 }
6803 }
6804
6805 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006806 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006807 }
6808
6809}
6810
6811/**
6812 * xmlParseReference:
6813 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006814 *
Owen Taylor3473f882001-02-23 17:55:21 +00006815 * parse and handle entity references in content, depending on the SAX
6816 * interface, this may end-up in a call to character() if this is a
6817 * CharRef, a predefined entity, if there is no reference() callback.
6818 * or if the parser was asked to switch to that mode.
6819 *
6820 * [67] Reference ::= EntityRef | CharRef
6821 */
6822void
6823xmlParseReference(xmlParserCtxtPtr ctxt) {
6824 xmlEntityPtr ent;
6825 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006826 int was_checked;
6827 xmlNodePtr list = NULL;
6828 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006829
Daniel Veillard0161e632008-08-28 15:36:32 +00006830
6831 if (RAW != '&')
6832 return;
6833
6834 /*
6835 * Simple case of a CharRef
6836 */
Owen Taylor3473f882001-02-23 17:55:21 +00006837 if (NXT(1) == '#') {
6838 int i = 0;
6839 xmlChar out[10];
6840 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006841 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006842
Daniel Veillarddc171602008-03-26 17:41:38 +00006843 if (value == 0)
6844 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006845 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6846 /*
6847 * So we are using non-UTF-8 buffers
6848 * Check that the char fit on 8bits, if not
6849 * generate a CharRef.
6850 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006851 if (value <= 0xFF) {
6852 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006853 out[1] = 0;
6854 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6855 (!ctxt->disableSAX))
6856 ctxt->sax->characters(ctxt->userData, out, 1);
6857 } else {
6858 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006859 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006860 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006861 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006862 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6863 (!ctxt->disableSAX))
6864 ctxt->sax->reference(ctxt->userData, out);
6865 }
6866 } else {
6867 /*
6868 * Just encode the value in UTF-8
6869 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006870 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006871 out[i] = 0;
6872 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6873 (!ctxt->disableSAX))
6874 ctxt->sax->characters(ctxt->userData, out, i);
6875 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006876 return;
6877 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006878
Daniel Veillard0161e632008-08-28 15:36:32 +00006879 /*
6880 * We are seeing an entity reference
6881 */
6882 ent = xmlParseEntityRef(ctxt);
6883 if (ent == NULL) return;
6884 if (!ctxt->wellFormed)
6885 return;
6886 was_checked = ent->checked;
6887
6888 /* special case of predefined entities */
6889 if ((ent->name == NULL) ||
6890 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6891 val = ent->content;
6892 if (val == NULL) return;
6893 /*
6894 * inline the entity.
6895 */
6896 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6897 (!ctxt->disableSAX))
6898 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6899 return;
6900 }
6901
6902 /*
6903 * The first reference to the entity trigger a parsing phase
6904 * where the ent->children is filled with the result from
6905 * the parsing.
6906 */
6907 if (ent->checked == 0) {
6908 unsigned long oldnbent = ctxt->nbentities;
6909
6910 /*
6911 * This is a bit hackish but this seems the best
6912 * way to make sure both SAX and DOM entity support
6913 * behaves okay.
6914 */
6915 void *user_data;
6916 if (ctxt->userData == ctxt)
6917 user_data = NULL;
6918 else
6919 user_data = ctxt->userData;
6920
6921 /*
6922 * Check that this entity is well formed
6923 * 4.3.2: An internal general parsed entity is well-formed
6924 * if its replacement text matches the production labeled
6925 * content.
6926 */
6927 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6928 ctxt->depth++;
6929 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6930 user_data, &list);
6931 ctxt->depth--;
6932
6933 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6934 ctxt->depth++;
6935 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6936 user_data, ctxt->depth, ent->URI,
6937 ent->ExternalID, &list);
6938 ctxt->depth--;
6939 } else {
6940 ret = XML_ERR_ENTITY_PE_INTERNAL;
6941 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6942 "invalid entity type found\n", NULL);
6943 }
6944
6945 /*
6946 * Store the number of entities needing parsing for this entity
6947 * content and do checkings
6948 */
6949 ent->checked = ctxt->nbentities - oldnbent;
6950 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006951 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006952 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006953 return;
6954 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006955 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6956 xmlFreeNodeList(list);
6957 return;
6958 }
Owen Taylor3473f882001-02-23 17:55:21 +00006959
Daniel Veillard0161e632008-08-28 15:36:32 +00006960 if ((ret == XML_ERR_OK) && (list != NULL)) {
6961 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6962 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6963 (ent->children == NULL)) {
6964 ent->children = list;
6965 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006966 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006967 * Prune it directly in the generated document
6968 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006969 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006970 if (((list->type == XML_TEXT_NODE) &&
6971 (list->next == NULL)) ||
6972 (ctxt->parseMode == XML_PARSE_READER)) {
6973 list->parent = (xmlNodePtr) ent;
6974 list = NULL;
6975 ent->owner = 1;
6976 } else {
6977 ent->owner = 0;
6978 while (list != NULL) {
6979 list->parent = (xmlNodePtr) ctxt->node;
6980 list->doc = ctxt->myDoc;
6981 if (list->next == NULL)
6982 ent->last = list;
6983 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006984 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006985 list = ent->children;
6986#ifdef LIBXML_LEGACY_ENABLED
6987 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6988 xmlAddEntityReference(ent, list, NULL);
6989#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006990 }
6991 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006992 ent->owner = 1;
6993 while (list != NULL) {
6994 list->parent = (xmlNodePtr) ent;
6995 if (list->next == NULL)
6996 ent->last = list;
6997 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006998 }
6999 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007000 } else {
7001 xmlFreeNodeList(list);
7002 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007003 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007004 } else if ((ret != XML_ERR_OK) &&
7005 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7006 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7007 "Entity '%s' failed to parse\n", ent->name);
7008 } else if (list != NULL) {
7009 xmlFreeNodeList(list);
7010 list = NULL;
7011 }
7012 if (ent->checked == 0)
7013 ent->checked = 1;
7014 } else if (ent->checked != 1) {
7015 ctxt->nbentities += ent->checked;
7016 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007017
Daniel Veillard0161e632008-08-28 15:36:32 +00007018 /*
7019 * Now that the entity content has been gathered
7020 * provide it to the application, this can take different forms based
7021 * on the parsing modes.
7022 */
7023 if (ent->children == NULL) {
7024 /*
7025 * Probably running in SAX mode and the callbacks don't
7026 * build the entity content. So unless we already went
7027 * though parsing for first checking go though the entity
7028 * content to generate callbacks associated to the entity
7029 */
7030 if (was_checked != 0) {
7031 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007032 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007033 * This is a bit hackish but this seems the best
7034 * way to make sure both SAX and DOM entity support
7035 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007036 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007037 if (ctxt->userData == ctxt)
7038 user_data = NULL;
7039 else
7040 user_data = ctxt->userData;
7041
7042 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7043 ctxt->depth++;
7044 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7045 ent->content, user_data, NULL);
7046 ctxt->depth--;
7047 } else if (ent->etype ==
7048 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7049 ctxt->depth++;
7050 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7051 ctxt->sax, user_data, ctxt->depth,
7052 ent->URI, ent->ExternalID, NULL);
7053 ctxt->depth--;
7054 } else {
7055 ret = XML_ERR_ENTITY_PE_INTERNAL;
7056 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7057 "invalid entity type found\n", NULL);
7058 }
7059 if (ret == XML_ERR_ENTITY_LOOP) {
7060 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7061 return;
7062 }
7063 }
7064 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7065 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7066 /*
7067 * Entity reference callback comes second, it's somewhat
7068 * superfluous but a compatibility to historical behaviour
7069 */
7070 ctxt->sax->reference(ctxt->userData, ent->name);
7071 }
7072 return;
7073 }
7074
7075 /*
7076 * If we didn't get any children for the entity being built
7077 */
7078 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7079 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7080 /*
7081 * Create a node.
7082 */
7083 ctxt->sax->reference(ctxt->userData, ent->name);
7084 return;
7085 }
7086
7087 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7088 /*
7089 * There is a problem on the handling of _private for entities
7090 * (bug 155816): Should we copy the content of the field from
7091 * the entity (possibly overwriting some value set by the user
7092 * when a copy is created), should we leave it alone, or should
7093 * we try to take care of different situations? The problem
7094 * is exacerbated by the usage of this field by the xmlReader.
7095 * To fix this bug, we look at _private on the created node
7096 * and, if it's NULL, we copy in whatever was in the entity.
7097 * If it's not NULL we leave it alone. This is somewhat of a
7098 * hack - maybe we should have further tests to determine
7099 * what to do.
7100 */
7101 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7102 /*
7103 * Seems we are generating the DOM content, do
7104 * a simple tree copy for all references except the first
7105 * In the first occurrence list contains the replacement.
7106 * progressive == 2 means we are operating on the Reader
7107 * and since nodes are discarded we must copy all the time.
7108 */
7109 if (((list == NULL) && (ent->owner == 0)) ||
7110 (ctxt->parseMode == XML_PARSE_READER)) {
7111 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7112
7113 /*
7114 * when operating on a reader, the entities definitions
7115 * are always owning the entities subtree.
7116 if (ctxt->parseMode == XML_PARSE_READER)
7117 ent->owner = 1;
7118 */
7119
7120 cur = ent->children;
7121 while (cur != NULL) {
7122 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7123 if (nw != NULL) {
7124 if (nw->_private == NULL)
7125 nw->_private = cur->_private;
7126 if (firstChild == NULL){
7127 firstChild = nw;
7128 }
7129 nw = xmlAddChild(ctxt->node, nw);
7130 }
7131 if (cur == ent->last) {
7132 /*
7133 * needed to detect some strange empty
7134 * node cases in the reader tests
7135 */
7136 if ((ctxt->parseMode == XML_PARSE_READER) &&
7137 (nw != NULL) &&
7138 (nw->type == XML_ELEMENT_NODE) &&
7139 (nw->children == NULL))
7140 nw->extra = 1;
7141
7142 break;
7143 }
7144 cur = cur->next;
7145 }
7146#ifdef LIBXML_LEGACY_ENABLED
7147 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7148 xmlAddEntityReference(ent, firstChild, nw);
7149#endif /* LIBXML_LEGACY_ENABLED */
7150 } else if (list == NULL) {
7151 xmlNodePtr nw = NULL, cur, next, last,
7152 firstChild = NULL;
7153 /*
7154 * Copy the entity child list and make it the new
7155 * entity child list. The goal is to make sure any
7156 * ID or REF referenced will be the one from the
7157 * document content and not the entity copy.
7158 */
7159 cur = ent->children;
7160 ent->children = NULL;
7161 last = ent->last;
7162 ent->last = NULL;
7163 while (cur != NULL) {
7164 next = cur->next;
7165 cur->next = NULL;
7166 cur->parent = NULL;
7167 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7168 if (nw != NULL) {
7169 if (nw->_private == NULL)
7170 nw->_private = cur->_private;
7171 if (firstChild == NULL){
7172 firstChild = cur;
7173 }
7174 xmlAddChild((xmlNodePtr) ent, nw);
7175 xmlAddChild(ctxt->node, cur);
7176 }
7177 if (cur == last)
7178 break;
7179 cur = next;
7180 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007181 if (ent->owner == 0)
7182 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007183#ifdef LIBXML_LEGACY_ENABLED
7184 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7185 xmlAddEntityReference(ent, firstChild, nw);
7186#endif /* LIBXML_LEGACY_ENABLED */
7187 } else {
7188 const xmlChar *nbktext;
7189
7190 /*
7191 * the name change is to avoid coalescing of the
7192 * node with a possible previous text one which
7193 * would make ent->children a dangling pointer
7194 */
7195 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7196 -1);
7197 if (ent->children->type == XML_TEXT_NODE)
7198 ent->children->name = nbktext;
7199 if ((ent->last != ent->children) &&
7200 (ent->last->type == XML_TEXT_NODE))
7201 ent->last->name = nbktext;
7202 xmlAddChildList(ctxt->node, ent->children);
7203 }
7204
7205 /*
7206 * This is to avoid a nasty side effect, see
7207 * characters() in SAX.c
7208 */
7209 ctxt->nodemem = 0;
7210 ctxt->nodelen = 0;
7211 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007212 }
7213 }
7214}
7215
7216/**
7217 * xmlParseEntityRef:
7218 * @ctxt: an XML parser context
7219 *
7220 * parse ENTITY references declarations
7221 *
7222 * [68] EntityRef ::= '&' Name ';'
7223 *
7224 * [ WFC: Entity Declared ]
7225 * In a document without any DTD, a document with only an internal DTD
7226 * subset which contains no parameter entity references, or a document
7227 * with "standalone='yes'", the Name given in the entity reference
7228 * must match that in an entity declaration, except that well-formed
7229 * documents need not declare any of the following entities: amp, lt,
7230 * gt, apos, quot. The declaration of a parameter entity must precede
7231 * any reference to it. Similarly, the declaration of a general entity
7232 * must precede any reference to it which appears in a default value in an
7233 * attribute-list declaration. Note that if entities are declared in the
7234 * external subset or in external parameter entities, a non-validating
7235 * processor is not obligated to read and process their declarations;
7236 * for such documents, the rule that an entity must be declared is a
7237 * well-formedness constraint only if standalone='yes'.
7238 *
7239 * [ WFC: Parsed Entity ]
7240 * An entity reference must not contain the name of an unparsed entity
7241 *
7242 * Returns the xmlEntityPtr if found, or NULL otherwise.
7243 */
7244xmlEntityPtr
7245xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007246 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007247 xmlEntityPtr ent = NULL;
7248
7249 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007250
Daniel Veillard0161e632008-08-28 15:36:32 +00007251 if (RAW != '&')
7252 return(NULL);
7253 NEXT;
7254 name = xmlParseName(ctxt);
7255 if (name == NULL) {
7256 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7257 "xmlParseEntityRef: no name\n");
7258 return(NULL);
7259 }
7260 if (RAW != ';') {
7261 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7262 return(NULL);
7263 }
7264 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007265
Daniel Veillard0161e632008-08-28 15:36:32 +00007266 /*
7267 * Predefined entites override any extra definition
7268 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007269 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7270 ent = xmlGetPredefinedEntity(name);
7271 if (ent != NULL)
7272 return(ent);
7273 }
Owen Taylor3473f882001-02-23 17:55:21 +00007274
Daniel Veillard0161e632008-08-28 15:36:32 +00007275 /*
7276 * Increate the number of entity references parsed
7277 */
7278 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007279
Daniel Veillard0161e632008-08-28 15:36:32 +00007280 /*
7281 * Ask first SAX for entity resolution, otherwise try the
7282 * entities which may have stored in the parser context.
7283 */
7284 if (ctxt->sax != NULL) {
7285 if (ctxt->sax->getEntity != NULL)
7286 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007287 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7288 (ctxt->options & XML_PARSE_OLDSAX))
7289 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007290 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7291 (ctxt->userData==ctxt)) {
7292 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007293 }
7294 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007295 /*
7296 * [ WFC: Entity Declared ]
7297 * In a document without any DTD, a document with only an
7298 * internal DTD subset which contains no parameter entity
7299 * references, or a document with "standalone='yes'", the
7300 * Name given in the entity reference must match that in an
7301 * entity declaration, except that well-formed documents
7302 * need not declare any of the following entities: amp, lt,
7303 * gt, apos, quot.
7304 * The declaration of a parameter entity must precede any
7305 * reference to it.
7306 * Similarly, the declaration of a general entity must
7307 * precede any reference to it which appears in a default
7308 * value in an attribute-list declaration. Note that if
7309 * entities are declared in the external subset or in
7310 * external parameter entities, a non-validating processor
7311 * is not obligated to read and process their declarations;
7312 * for such documents, the rule that an entity must be
7313 * declared is a well-formedness constraint only if
7314 * standalone='yes'.
7315 */
7316 if (ent == NULL) {
7317 if ((ctxt->standalone == 1) ||
7318 ((ctxt->hasExternalSubset == 0) &&
7319 (ctxt->hasPErefs == 0))) {
7320 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7321 "Entity '%s' not defined\n", name);
7322 } else {
7323 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7324 "Entity '%s' not defined\n", name);
7325 if ((ctxt->inSubset == 0) &&
7326 (ctxt->sax != NULL) &&
7327 (ctxt->sax->reference != NULL)) {
7328 ctxt->sax->reference(ctxt->userData, name);
7329 }
7330 }
7331 ctxt->valid = 0;
7332 }
7333
7334 /*
7335 * [ WFC: Parsed Entity ]
7336 * An entity reference must not contain the name of an
7337 * unparsed entity
7338 */
7339 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7340 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7341 "Entity reference to unparsed entity %s\n", name);
7342 }
7343
7344 /*
7345 * [ WFC: No External Entity References ]
7346 * Attribute values cannot contain direct or indirect
7347 * entity references to external entities.
7348 */
7349 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7350 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7351 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7352 "Attribute references external entity '%s'\n", name);
7353 }
7354 /*
7355 * [ WFC: No < in Attribute Values ]
7356 * The replacement text of any entity referred to directly or
7357 * indirectly in an attribute value (other than "&lt;") must
7358 * not contain a <.
7359 */
7360 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7361 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007362 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007363 (xmlStrchr(ent->content, '<'))) {
7364 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7365 "'<' in entity '%s' is not allowed in attributes values\n", name);
7366 }
7367
7368 /*
7369 * Internal check, no parameter entities here ...
7370 */
7371 else {
7372 switch (ent->etype) {
7373 case XML_INTERNAL_PARAMETER_ENTITY:
7374 case XML_EXTERNAL_PARAMETER_ENTITY:
7375 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7376 "Attempt to reference the parameter entity '%s'\n",
7377 name);
7378 break;
7379 default:
7380 break;
7381 }
7382 }
7383
7384 /*
7385 * [ WFC: No Recursion ]
7386 * A parsed entity must not contain a recursive reference
7387 * to itself, either directly or indirectly.
7388 * Done somewhere else
7389 */
Owen Taylor3473f882001-02-23 17:55:21 +00007390 return(ent);
7391}
7392
7393/**
7394 * xmlParseStringEntityRef:
7395 * @ctxt: an XML parser context
7396 * @str: a pointer to an index in the string
7397 *
7398 * parse ENTITY references declarations, but this version parses it from
7399 * a string value.
7400 *
7401 * [68] EntityRef ::= '&' Name ';'
7402 *
7403 * [ WFC: Entity Declared ]
7404 * In a document without any DTD, a document with only an internal DTD
7405 * subset which contains no parameter entity references, or a document
7406 * with "standalone='yes'", the Name given in the entity reference
7407 * must match that in an entity declaration, except that well-formed
7408 * documents need not declare any of the following entities: amp, lt,
7409 * gt, apos, quot. The declaration of a parameter entity must precede
7410 * any reference to it. Similarly, the declaration of a general entity
7411 * must precede any reference to it which appears in a default value in an
7412 * attribute-list declaration. Note that if entities are declared in the
7413 * external subset or in external parameter entities, a non-validating
7414 * processor is not obligated to read and process their declarations;
7415 * for such documents, the rule that an entity must be declared is a
7416 * well-formedness constraint only if standalone='yes'.
7417 *
7418 * [ WFC: Parsed Entity ]
7419 * An entity reference must not contain the name of an unparsed entity
7420 *
7421 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7422 * is updated to the current location in the string.
7423 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007424static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007425xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7426 xmlChar *name;
7427 const xmlChar *ptr;
7428 xmlChar cur;
7429 xmlEntityPtr ent = NULL;
7430
7431 if ((str == NULL) || (*str == NULL))
7432 return(NULL);
7433 ptr = *str;
7434 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007435 if (cur != '&')
7436 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007437
Daniel Veillard0161e632008-08-28 15:36:32 +00007438 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007439 name = xmlParseStringName(ctxt, &ptr);
7440 if (name == NULL) {
7441 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7442 "xmlParseStringEntityRef: no name\n");
7443 *str = ptr;
7444 return(NULL);
7445 }
7446 if (*ptr != ';') {
7447 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007448 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007449 *str = ptr;
7450 return(NULL);
7451 }
7452 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007453
Owen Taylor3473f882001-02-23 17:55:21 +00007454
Daniel Veillard0161e632008-08-28 15:36:32 +00007455 /*
7456 * Predefined entites override any extra definition
7457 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007458 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7459 ent = xmlGetPredefinedEntity(name);
7460 if (ent != NULL) {
7461 xmlFree(name);
7462 *str = ptr;
7463 return(ent);
7464 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007465 }
Owen Taylor3473f882001-02-23 17:55:21 +00007466
Daniel Veillard0161e632008-08-28 15:36:32 +00007467 /*
7468 * Increate the number of entity references parsed
7469 */
7470 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007471
Daniel Veillard0161e632008-08-28 15:36:32 +00007472 /*
7473 * Ask first SAX for entity resolution, otherwise try the
7474 * entities which may have stored in the parser context.
7475 */
7476 if (ctxt->sax != NULL) {
7477 if (ctxt->sax->getEntity != NULL)
7478 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007479 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7480 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007481 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7482 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007483 }
7484 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007485
7486 /*
7487 * [ WFC: Entity Declared ]
7488 * In a document without any DTD, a document with only an
7489 * internal DTD subset which contains no parameter entity
7490 * references, or a document with "standalone='yes'", the
7491 * Name given in the entity reference must match that in an
7492 * entity declaration, except that well-formed documents
7493 * need not declare any of the following entities: amp, lt,
7494 * gt, apos, quot.
7495 * The declaration of a parameter entity must precede any
7496 * reference to it.
7497 * Similarly, the declaration of a general entity must
7498 * precede any reference to it which appears in a default
7499 * value in an attribute-list declaration. Note that if
7500 * entities are declared in the external subset or in
7501 * external parameter entities, a non-validating processor
7502 * is not obligated to read and process their declarations;
7503 * for such documents, the rule that an entity must be
7504 * declared is a well-formedness constraint only if
7505 * standalone='yes'.
7506 */
7507 if (ent == NULL) {
7508 if ((ctxt->standalone == 1) ||
7509 ((ctxt->hasExternalSubset == 0) &&
7510 (ctxt->hasPErefs == 0))) {
7511 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7512 "Entity '%s' not defined\n", name);
7513 } else {
7514 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7515 "Entity '%s' not defined\n",
7516 name);
7517 }
7518 /* TODO ? check regressions ctxt->valid = 0; */
7519 }
7520
7521 /*
7522 * [ WFC: Parsed Entity ]
7523 * An entity reference must not contain the name of an
7524 * unparsed entity
7525 */
7526 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7527 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7528 "Entity reference to unparsed entity %s\n", name);
7529 }
7530
7531 /*
7532 * [ WFC: No External Entity References ]
7533 * Attribute values cannot contain direct or indirect
7534 * entity references to external entities.
7535 */
7536 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7537 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7538 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7539 "Attribute references external entity '%s'\n", name);
7540 }
7541 /*
7542 * [ WFC: No < in Attribute Values ]
7543 * The replacement text of any entity referred to directly or
7544 * indirectly in an attribute value (other than "&lt;") must
7545 * not contain a <.
7546 */
7547 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7548 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007549 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007550 (xmlStrchr(ent->content, '<'))) {
7551 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7552 "'<' in entity '%s' is not allowed in attributes values\n",
7553 name);
7554 }
7555
7556 /*
7557 * Internal check, no parameter entities here ...
7558 */
7559 else {
7560 switch (ent->etype) {
7561 case XML_INTERNAL_PARAMETER_ENTITY:
7562 case XML_EXTERNAL_PARAMETER_ENTITY:
7563 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7564 "Attempt to reference the parameter entity '%s'\n",
7565 name);
7566 break;
7567 default:
7568 break;
7569 }
7570 }
7571
7572 /*
7573 * [ WFC: No Recursion ]
7574 * A parsed entity must not contain a recursive reference
7575 * to itself, either directly or indirectly.
7576 * Done somewhere else
7577 */
7578
7579 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007580 *str = ptr;
7581 return(ent);
7582}
7583
7584/**
7585 * xmlParsePEReference:
7586 * @ctxt: an XML parser context
7587 *
7588 * parse PEReference declarations
7589 * The entity content is handled directly by pushing it's content as
7590 * a new input stream.
7591 *
7592 * [69] PEReference ::= '%' Name ';'
7593 *
7594 * [ WFC: No Recursion ]
7595 * A parsed entity must not contain a recursive
7596 * reference to itself, either directly or indirectly.
7597 *
7598 * [ WFC: Entity Declared ]
7599 * In a document without any DTD, a document with only an internal DTD
7600 * subset which contains no parameter entity references, or a document
7601 * with "standalone='yes'", ... ... The declaration of a parameter
7602 * entity must precede any reference to it...
7603 *
7604 * [ VC: Entity Declared ]
7605 * In a document with an external subset or external parameter entities
7606 * with "standalone='no'", ... ... The declaration of a parameter entity
7607 * must precede any reference to it...
7608 *
7609 * [ WFC: In DTD ]
7610 * Parameter-entity references may only appear in the DTD.
7611 * NOTE: misleading but this is handled.
7612 */
7613void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007614xmlParsePEReference(xmlParserCtxtPtr ctxt)
7615{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007616 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007617 xmlEntityPtr entity = NULL;
7618 xmlParserInputPtr input;
7619
Daniel Veillard0161e632008-08-28 15:36:32 +00007620 if (RAW != '%')
7621 return;
7622 NEXT;
7623 name = xmlParseName(ctxt);
7624 if (name == NULL) {
7625 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7626 "xmlParsePEReference: no name\n");
7627 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007628 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007629 if (RAW != ';') {
7630 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7631 return;
7632 }
7633
7634 NEXT;
7635
7636 /*
7637 * Increate the number of entity references parsed
7638 */
7639 ctxt->nbentities++;
7640
7641 /*
7642 * Request the entity from SAX
7643 */
7644 if ((ctxt->sax != NULL) &&
7645 (ctxt->sax->getParameterEntity != NULL))
7646 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7647 name);
7648 if (entity == NULL) {
7649 /*
7650 * [ WFC: Entity Declared ]
7651 * In a document without any DTD, a document with only an
7652 * internal DTD subset which contains no parameter entity
7653 * references, or a document with "standalone='yes'", ...
7654 * ... The declaration of a parameter entity must precede
7655 * any reference to it...
7656 */
7657 if ((ctxt->standalone == 1) ||
7658 ((ctxt->hasExternalSubset == 0) &&
7659 (ctxt->hasPErefs == 0))) {
7660 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7661 "PEReference: %%%s; not found\n",
7662 name);
7663 } else {
7664 /*
7665 * [ VC: Entity Declared ]
7666 * In a document with an external subset or external
7667 * parameter entities with "standalone='no'", ...
7668 * ... The declaration of a parameter entity must
7669 * precede any reference to it...
7670 */
7671 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7672 "PEReference: %%%s; not found\n",
7673 name, NULL);
7674 ctxt->valid = 0;
7675 }
7676 } else {
7677 /*
7678 * Internal checking in case the entity quest barfed
7679 */
7680 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7681 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7682 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7683 "Internal: %%%s; is not a parameter entity\n",
7684 name, NULL);
7685 } else if (ctxt->input->free != deallocblankswrapper) {
7686 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7687 if (xmlPushInput(ctxt, input) < 0)
7688 return;
7689 } else {
7690 /*
7691 * TODO !!!
7692 * handle the extra spaces added before and after
7693 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7694 */
7695 input = xmlNewEntityInputStream(ctxt, entity);
7696 if (xmlPushInput(ctxt, input) < 0)
7697 return;
7698 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7699 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7700 (IS_BLANK_CH(NXT(5)))) {
7701 xmlParseTextDecl(ctxt);
7702 if (ctxt->errNo ==
7703 XML_ERR_UNSUPPORTED_ENCODING) {
7704 /*
7705 * The XML REC instructs us to stop parsing
7706 * right here
7707 */
7708 ctxt->instate = XML_PARSER_EOF;
7709 return;
7710 }
7711 }
7712 }
7713 }
7714 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007715}
7716
7717/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007718 * xmlLoadEntityContent:
7719 * @ctxt: an XML parser context
7720 * @entity: an unloaded system entity
7721 *
7722 * Load the original content of the given system entity from the
7723 * ExternalID/SystemID given. This is to be used for Included in Literal
7724 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7725 *
7726 * Returns 0 in case of success and -1 in case of failure
7727 */
7728static int
7729xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7730 xmlParserInputPtr input;
7731 xmlBufferPtr buf;
7732 int l, c;
7733 int count = 0;
7734
7735 if ((ctxt == NULL) || (entity == NULL) ||
7736 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7737 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7738 (entity->content != NULL)) {
7739 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7740 "xmlLoadEntityContent parameter error");
7741 return(-1);
7742 }
7743
7744 if (xmlParserDebugEntities)
7745 xmlGenericError(xmlGenericErrorContext,
7746 "Reading %s entity content input\n", entity->name);
7747
7748 buf = xmlBufferCreate();
7749 if (buf == NULL) {
7750 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7751 "xmlLoadEntityContent parameter error");
7752 return(-1);
7753 }
7754
7755 input = xmlNewEntityInputStream(ctxt, entity);
7756 if (input == NULL) {
7757 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7758 "xmlLoadEntityContent input error");
7759 xmlBufferFree(buf);
7760 return(-1);
7761 }
7762
7763 /*
7764 * Push the entity as the current input, read char by char
7765 * saving to the buffer until the end of the entity or an error
7766 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007767 if (xmlPushInput(ctxt, input) < 0) {
7768 xmlBufferFree(buf);
7769 return(-1);
7770 }
7771
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007772 GROW;
7773 c = CUR_CHAR(l);
7774 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7775 (IS_CHAR(c))) {
7776 xmlBufferAdd(buf, ctxt->input->cur, l);
7777 if (count++ > 100) {
7778 count = 0;
7779 GROW;
7780 }
7781 NEXTL(l);
7782 c = CUR_CHAR(l);
7783 }
7784
7785 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7786 xmlPopInput(ctxt);
7787 } else if (!IS_CHAR(c)) {
7788 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7789 "xmlLoadEntityContent: invalid char value %d\n",
7790 c);
7791 xmlBufferFree(buf);
7792 return(-1);
7793 }
7794 entity->content = buf->content;
7795 buf->content = NULL;
7796 xmlBufferFree(buf);
7797
7798 return(0);
7799}
7800
7801/**
Owen Taylor3473f882001-02-23 17:55:21 +00007802 * xmlParseStringPEReference:
7803 * @ctxt: an XML parser context
7804 * @str: a pointer to an index in the string
7805 *
7806 * parse PEReference declarations
7807 *
7808 * [69] PEReference ::= '%' Name ';'
7809 *
7810 * [ WFC: No Recursion ]
7811 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007812 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007813 *
7814 * [ WFC: Entity Declared ]
7815 * In a document without any DTD, a document with only an internal DTD
7816 * subset which contains no parameter entity references, or a document
7817 * with "standalone='yes'", ... ... The declaration of a parameter
7818 * entity must precede any reference to it...
7819 *
7820 * [ VC: Entity Declared ]
7821 * In a document with an external subset or external parameter entities
7822 * with "standalone='no'", ... ... The declaration of a parameter entity
7823 * must precede any reference to it...
7824 *
7825 * [ WFC: In DTD ]
7826 * Parameter-entity references may only appear in the DTD.
7827 * NOTE: misleading but this is handled.
7828 *
7829 * Returns the string of the entity content.
7830 * str is updated to the current value of the index
7831 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007832static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007833xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7834 const xmlChar *ptr;
7835 xmlChar cur;
7836 xmlChar *name;
7837 xmlEntityPtr entity = NULL;
7838
7839 if ((str == NULL) || (*str == NULL)) return(NULL);
7840 ptr = *str;
7841 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007842 if (cur != '%')
7843 return(NULL);
7844 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007845 name = xmlParseStringName(ctxt, &ptr);
7846 if (name == NULL) {
7847 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7848 "xmlParseStringPEReference: no name\n");
7849 *str = ptr;
7850 return(NULL);
7851 }
7852 cur = *ptr;
7853 if (cur != ';') {
7854 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7855 xmlFree(name);
7856 *str = ptr;
7857 return(NULL);
7858 }
7859 ptr++;
7860
7861 /*
7862 * Increate the number of entity references parsed
7863 */
7864 ctxt->nbentities++;
7865
7866 /*
7867 * Request the entity from SAX
7868 */
7869 if ((ctxt->sax != NULL) &&
7870 (ctxt->sax->getParameterEntity != NULL))
7871 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7872 name);
7873 if (entity == NULL) {
7874 /*
7875 * [ WFC: Entity Declared ]
7876 * In a document without any DTD, a document with only an
7877 * internal DTD subset which contains no parameter entity
7878 * references, or a document with "standalone='yes'", ...
7879 * ... The declaration of a parameter entity must precede
7880 * any reference to it...
7881 */
7882 if ((ctxt->standalone == 1) ||
7883 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7884 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7885 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007886 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007887 /*
7888 * [ VC: Entity Declared ]
7889 * In a document with an external subset or external
7890 * parameter entities with "standalone='no'", ...
7891 * ... The declaration of a parameter entity must
7892 * precede any reference to it...
7893 */
7894 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7895 "PEReference: %%%s; not found\n",
7896 name, NULL);
7897 ctxt->valid = 0;
7898 }
7899 } else {
7900 /*
7901 * Internal checking in case the entity quest barfed
7902 */
7903 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7904 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7905 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7906 "%%%s; is not a parameter entity\n",
7907 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007908 }
7909 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007910 ctxt->hasPErefs = 1;
7911 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007912 *str = ptr;
7913 return(entity);
7914}
7915
7916/**
7917 * xmlParseDocTypeDecl:
7918 * @ctxt: an XML parser context
7919 *
7920 * parse a DOCTYPE declaration
7921 *
7922 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7923 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7924 *
7925 * [ VC: Root Element Type ]
7926 * The Name in the document type declaration must match the element
7927 * type of the root element.
7928 */
7929
7930void
7931xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007932 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007933 xmlChar *ExternalID = NULL;
7934 xmlChar *URI = NULL;
7935
7936 /*
7937 * We know that '<!DOCTYPE' has been detected.
7938 */
7939 SKIP(9);
7940
7941 SKIP_BLANKS;
7942
7943 /*
7944 * Parse the DOCTYPE name.
7945 */
7946 name = xmlParseName(ctxt);
7947 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007948 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7949 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007950 }
7951 ctxt->intSubName = name;
7952
7953 SKIP_BLANKS;
7954
7955 /*
7956 * Check for SystemID and ExternalID
7957 */
7958 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7959
7960 if ((URI != NULL) || (ExternalID != NULL)) {
7961 ctxt->hasExternalSubset = 1;
7962 }
7963 ctxt->extSubURI = URI;
7964 ctxt->extSubSystem = ExternalID;
7965
7966 SKIP_BLANKS;
7967
7968 /*
7969 * Create and update the internal subset.
7970 */
7971 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7972 (!ctxt->disableSAX))
7973 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7974
7975 /*
7976 * Is there any internal subset declarations ?
7977 * they are handled separately in xmlParseInternalSubset()
7978 */
7979 if (RAW == '[')
7980 return;
7981
7982 /*
7983 * We should be at the end of the DOCTYPE declaration.
7984 */
7985 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007986 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007987 }
7988 NEXT;
7989}
7990
7991/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007992 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007993 * @ctxt: an XML parser context
7994 *
7995 * parse the internal subset declaration
7996 *
7997 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7998 */
7999
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008000static void
Owen Taylor3473f882001-02-23 17:55:21 +00008001xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8002 /*
8003 * Is there any DTD definition ?
8004 */
8005 if (RAW == '[') {
8006 ctxt->instate = XML_PARSER_DTD;
8007 NEXT;
8008 /*
8009 * Parse the succession of Markup declarations and
8010 * PEReferences.
8011 * Subsequence (markupdecl | PEReference | S)*
8012 */
8013 while (RAW != ']') {
8014 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008015 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008016
8017 SKIP_BLANKS;
8018 xmlParseMarkupDecl(ctxt);
8019 xmlParsePEReference(ctxt);
8020
8021 /*
8022 * Pop-up of finished entities.
8023 */
8024 while ((RAW == 0) && (ctxt->inputNr > 1))
8025 xmlPopInput(ctxt);
8026
8027 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008028 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008029 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008030 break;
8031 }
8032 }
8033 if (RAW == ']') {
8034 NEXT;
8035 SKIP_BLANKS;
8036 }
8037 }
8038
8039 /*
8040 * We should be at the end of the DOCTYPE declaration.
8041 */
8042 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008043 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008044 }
8045 NEXT;
8046}
8047
Daniel Veillard81273902003-09-30 00:43:48 +00008048#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008049/**
8050 * xmlParseAttribute:
8051 * @ctxt: an XML parser context
8052 * @value: a xmlChar ** used to store the value of the attribute
8053 *
8054 * parse an attribute
8055 *
8056 * [41] Attribute ::= Name Eq AttValue
8057 *
8058 * [ WFC: No External Entity References ]
8059 * Attribute values cannot contain direct or indirect entity references
8060 * to external entities.
8061 *
8062 * [ WFC: No < in Attribute Values ]
8063 * The replacement text of any entity referred to directly or indirectly in
8064 * an attribute value (other than "&lt;") must not contain a <.
8065 *
8066 * [ VC: Attribute Value Type ]
8067 * The attribute must have been declared; the value must be of the type
8068 * declared for it.
8069 *
8070 * [25] Eq ::= S? '=' S?
8071 *
8072 * With namespace:
8073 *
8074 * [NS 11] Attribute ::= QName Eq AttValue
8075 *
8076 * Also the case QName == xmlns:??? is handled independently as a namespace
8077 * definition.
8078 *
8079 * Returns the attribute name, and the value in *value.
8080 */
8081
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008082const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008083xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008084 const xmlChar *name;
8085 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008086
8087 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008088 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008089 name = xmlParseName(ctxt);
8090 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008091 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008092 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008093 return(NULL);
8094 }
8095
8096 /*
8097 * read the value
8098 */
8099 SKIP_BLANKS;
8100 if (RAW == '=') {
8101 NEXT;
8102 SKIP_BLANKS;
8103 val = xmlParseAttValue(ctxt);
8104 ctxt->instate = XML_PARSER_CONTENT;
8105 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008106 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008107 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008108 return(NULL);
8109 }
8110
8111 /*
8112 * Check that xml:lang conforms to the specification
8113 * No more registered as an error, just generate a warning now
8114 * since this was deprecated in XML second edition
8115 */
8116 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8117 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008118 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8119 "Malformed value for xml:lang : %s\n",
8120 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008121 }
8122 }
8123
8124 /*
8125 * Check that xml:space conforms to the specification
8126 */
8127 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8128 if (xmlStrEqual(val, BAD_CAST "default"))
8129 *(ctxt->space) = 0;
8130 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8131 *(ctxt->space) = 1;
8132 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008133 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008134"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008135 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008136 }
8137 }
8138
8139 *value = val;
8140 return(name);
8141}
8142
8143/**
8144 * xmlParseStartTag:
8145 * @ctxt: an XML parser context
8146 *
8147 * parse a start of tag either for rule element or
8148 * EmptyElement. In both case we don't parse the tag closing chars.
8149 *
8150 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8151 *
8152 * [ WFC: Unique Att Spec ]
8153 * No attribute name may appear more than once in the same start-tag or
8154 * empty-element tag.
8155 *
8156 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8157 *
8158 * [ WFC: Unique Att Spec ]
8159 * No attribute name may appear more than once in the same start-tag or
8160 * empty-element tag.
8161 *
8162 * With namespace:
8163 *
8164 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8165 *
8166 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8167 *
8168 * Returns the element name parsed
8169 */
8170
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008171const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008172xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008173 const xmlChar *name;
8174 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008175 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008176 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008177 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008178 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008179 int i;
8180
8181 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008182 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008183
8184 name = xmlParseName(ctxt);
8185 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008186 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008187 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008188 return(NULL);
8189 }
8190
8191 /*
8192 * Now parse the attributes, it ends up with the ending
8193 *
8194 * (S Attribute)* S?
8195 */
8196 SKIP_BLANKS;
8197 GROW;
8198
Daniel Veillard21a0f912001-02-25 19:54:14 +00008199 while ((RAW != '>') &&
8200 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008201 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008202 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008203 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008204
8205 attname = xmlParseAttribute(ctxt, &attvalue);
8206 if ((attname != NULL) && (attvalue != NULL)) {
8207 /*
8208 * [ WFC: Unique Att Spec ]
8209 * No attribute name may appear more than once in the same
8210 * start-tag or empty-element tag.
8211 */
8212 for (i = 0; i < nbatts;i += 2) {
8213 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008214 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008215 xmlFree(attvalue);
8216 goto failed;
8217 }
8218 }
Owen Taylor3473f882001-02-23 17:55:21 +00008219 /*
8220 * Add the pair to atts
8221 */
8222 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008223 maxatts = 22; /* allow for 10 attrs by default */
8224 atts = (const xmlChar **)
8225 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008226 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008227 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008228 if (attvalue != NULL)
8229 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008230 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008231 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008232 ctxt->atts = atts;
8233 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008234 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008235 const xmlChar **n;
8236
Owen Taylor3473f882001-02-23 17:55:21 +00008237 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008238 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008239 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008240 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008241 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008242 if (attvalue != NULL)
8243 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008244 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008245 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008246 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008247 ctxt->atts = atts;
8248 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008249 }
8250 atts[nbatts++] = attname;
8251 atts[nbatts++] = attvalue;
8252 atts[nbatts] = NULL;
8253 atts[nbatts + 1] = NULL;
8254 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008255 if (attvalue != NULL)
8256 xmlFree(attvalue);
8257 }
8258
8259failed:
8260
Daniel Veillard3772de32002-12-17 10:31:45 +00008261 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008262 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8263 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008264 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008265 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8266 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008267 }
8268 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008269 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8270 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008271 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8272 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008273 break;
8274 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008275 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008276 GROW;
8277 }
8278
8279 /*
8280 * SAX: Start of Element !
8281 */
8282 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008283 (!ctxt->disableSAX)) {
8284 if (nbatts > 0)
8285 ctxt->sax->startElement(ctxt->userData, name, atts);
8286 else
8287 ctxt->sax->startElement(ctxt->userData, name, NULL);
8288 }
Owen Taylor3473f882001-02-23 17:55:21 +00008289
8290 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008291 /* Free only the content strings */
8292 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008293 if (atts[i] != NULL)
8294 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008295 }
8296 return(name);
8297}
8298
8299/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008300 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008301 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008302 * @line: line of the start tag
8303 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008304 *
8305 * parse an end of tag
8306 *
8307 * [42] ETag ::= '</' Name S? '>'
8308 *
8309 * With namespace
8310 *
8311 * [NS 9] ETag ::= '</' QName S? '>'
8312 */
8313
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008314static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008315xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008316 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008317
8318 GROW;
8319 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008320 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008321 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008322 return;
8323 }
8324 SKIP(2);
8325
Daniel Veillard46de64e2002-05-29 08:21:33 +00008326 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008327
8328 /*
8329 * We should definitely be at the ending "S? '>'" part
8330 */
8331 GROW;
8332 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008333 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008334 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008335 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008336 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008337
8338 /*
8339 * [ WFC: Element Type Match ]
8340 * The Name in an element's end-tag must match the element type in the
8341 * start-tag.
8342 *
8343 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008344 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008345 if (name == NULL) name = BAD_CAST "unparseable";
8346 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008347 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008348 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008349 }
8350
8351 /*
8352 * SAX: End of Tag
8353 */
8354 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8355 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008356 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008357
Daniel Veillarde57ec792003-09-10 10:50:59 +00008358 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008359 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008360 return;
8361}
8362
8363/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008364 * xmlParseEndTag:
8365 * @ctxt: an XML parser context
8366 *
8367 * parse an end of tag
8368 *
8369 * [42] ETag ::= '</' Name S? '>'
8370 *
8371 * With namespace
8372 *
8373 * [NS 9] ETag ::= '</' QName S? '>'
8374 */
8375
8376void
8377xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008378 xmlParseEndTag1(ctxt, 0);
8379}
Daniel Veillard81273902003-09-30 00:43:48 +00008380#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008381
8382/************************************************************************
8383 * *
8384 * SAX 2 specific operations *
8385 * *
8386 ************************************************************************/
8387
Daniel Veillard0fb18932003-09-07 09:14:37 +00008388/*
8389 * xmlGetNamespace:
8390 * @ctxt: an XML parser context
8391 * @prefix: the prefix to lookup
8392 *
8393 * Lookup the namespace name for the @prefix (which ca be NULL)
8394 * The prefix must come from the @ctxt->dict dictionnary
8395 *
8396 * Returns the namespace name or NULL if not bound
8397 */
8398static const xmlChar *
8399xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8400 int i;
8401
Daniel Veillarde57ec792003-09-10 10:50:59 +00008402 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008403 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008404 if (ctxt->nsTab[i] == prefix) {
8405 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8406 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008407 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008408 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008409 return(NULL);
8410}
8411
8412/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008413 * xmlParseQName:
8414 * @ctxt: an XML parser context
8415 * @prefix: pointer to store the prefix part
8416 *
8417 * parse an XML Namespace QName
8418 *
8419 * [6] QName ::= (Prefix ':')? LocalPart
8420 * [7] Prefix ::= NCName
8421 * [8] LocalPart ::= NCName
8422 *
8423 * Returns the Name parsed or NULL
8424 */
8425
8426static const xmlChar *
8427xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8428 const xmlChar *l, *p;
8429
8430 GROW;
8431
8432 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008433 if (l == NULL) {
8434 if (CUR == ':') {
8435 l = xmlParseName(ctxt);
8436 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008437 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8438 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008439 *prefix = NULL;
8440 return(l);
8441 }
8442 }
8443 return(NULL);
8444 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008445 if (CUR == ':') {
8446 NEXT;
8447 p = l;
8448 l = xmlParseNCName(ctxt);
8449 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008450 xmlChar *tmp;
8451
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008452 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8453 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008454 l = xmlParseNmtoken(ctxt);
8455 if (l == NULL)
8456 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8457 else {
8458 tmp = xmlBuildQName(l, p, NULL, 0);
8459 xmlFree((char *)l);
8460 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008461 p = xmlDictLookup(ctxt->dict, tmp, -1);
8462 if (tmp != NULL) xmlFree(tmp);
8463 *prefix = NULL;
8464 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008465 }
8466 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008467 xmlChar *tmp;
8468
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008469 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8470 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008471 NEXT;
8472 tmp = (xmlChar *) xmlParseName(ctxt);
8473 if (tmp != NULL) {
8474 tmp = xmlBuildQName(tmp, l, NULL, 0);
8475 l = xmlDictLookup(ctxt->dict, tmp, -1);
8476 if (tmp != NULL) xmlFree(tmp);
8477 *prefix = p;
8478 return(l);
8479 }
8480 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8481 l = xmlDictLookup(ctxt->dict, tmp, -1);
8482 if (tmp != NULL) xmlFree(tmp);
8483 *prefix = p;
8484 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008485 }
8486 *prefix = p;
8487 } else
8488 *prefix = NULL;
8489 return(l);
8490}
8491
8492/**
8493 * xmlParseQNameAndCompare:
8494 * @ctxt: an XML parser context
8495 * @name: the localname
8496 * @prefix: the prefix, if any.
8497 *
8498 * parse an XML name and compares for match
8499 * (specialized for endtag parsing)
8500 *
8501 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8502 * and the name for mismatch
8503 */
8504
8505static const xmlChar *
8506xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8507 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008508 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008509 const xmlChar *in;
8510 const xmlChar *ret;
8511 const xmlChar *prefix2;
8512
8513 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8514
8515 GROW;
8516 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008517
Daniel Veillard0fb18932003-09-07 09:14:37 +00008518 cmp = prefix;
8519 while (*in != 0 && *in == *cmp) {
8520 ++in;
8521 ++cmp;
8522 }
8523 if ((*cmp == 0) && (*in == ':')) {
8524 in++;
8525 cmp = name;
8526 while (*in != 0 && *in == *cmp) {
8527 ++in;
8528 ++cmp;
8529 }
William M. Brack76e95df2003-10-18 16:20:14 +00008530 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008531 /* success */
8532 ctxt->input->cur = in;
8533 return((const xmlChar*) 1);
8534 }
8535 }
8536 /*
8537 * all strings coms from the dictionary, equality can be done directly
8538 */
8539 ret = xmlParseQName (ctxt, &prefix2);
8540 if ((ret == name) && (prefix == prefix2))
8541 return((const xmlChar*) 1);
8542 return ret;
8543}
8544
8545/**
8546 * xmlParseAttValueInternal:
8547 * @ctxt: an XML parser context
8548 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008549 * @alloc: whether the attribute was reallocated as a new string
8550 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008551 *
8552 * parse a value for an attribute.
8553 * NOTE: if no normalization is needed, the routine will return pointers
8554 * directly from the data buffer.
8555 *
8556 * 3.3.3 Attribute-Value Normalization:
8557 * Before the value of an attribute is passed to the application or
8558 * checked for validity, the XML processor must normalize it as follows:
8559 * - a character reference is processed by appending the referenced
8560 * character to the attribute value
8561 * - an entity reference is processed by recursively processing the
8562 * replacement text of the entity
8563 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8564 * appending #x20 to the normalized value, except that only a single
8565 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8566 * parsed entity or the literal entity value of an internal parsed entity
8567 * - other characters are processed by appending them to the normalized value
8568 * If the declared value is not CDATA, then the XML processor must further
8569 * process the normalized attribute value by discarding any leading and
8570 * trailing space (#x20) characters, and by replacing sequences of space
8571 * (#x20) characters by a single space (#x20) character.
8572 * All attributes for which no declaration has been read should be treated
8573 * by a non-validating parser as if declared CDATA.
8574 *
8575 * Returns the AttValue parsed or NULL. The value has to be freed by the
8576 * caller if it was copied, this can be detected by val[*len] == 0.
8577 */
8578
8579static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008580xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8581 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008582{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008583 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008584 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008585 xmlChar *ret = NULL;
8586
8587 GROW;
8588 in = (xmlChar *) CUR_PTR;
8589 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008590 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008591 return (NULL);
8592 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008593 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008594
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008595 /*
8596 * try to handle in this routine the most common case where no
8597 * allocation of a new string is required and where content is
8598 * pure ASCII.
8599 */
8600 limit = *in++;
8601 end = ctxt->input->end;
8602 start = in;
8603 if (in >= end) {
8604 const xmlChar *oldbase = ctxt->input->base;
8605 GROW;
8606 if (oldbase != ctxt->input->base) {
8607 long delta = ctxt->input->base - oldbase;
8608 start = start + delta;
8609 in = in + delta;
8610 }
8611 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008612 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008613 if (normalize) {
8614 /*
8615 * Skip any leading spaces
8616 */
8617 while ((in < end) && (*in != limit) &&
8618 ((*in == 0x20) || (*in == 0x9) ||
8619 (*in == 0xA) || (*in == 0xD))) {
8620 in++;
8621 start = in;
8622 if (in >= end) {
8623 const xmlChar *oldbase = ctxt->input->base;
8624 GROW;
8625 if (oldbase != ctxt->input->base) {
8626 long delta = ctxt->input->base - oldbase;
8627 start = start + delta;
8628 in = in + delta;
8629 }
8630 end = ctxt->input->end;
8631 }
8632 }
8633 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8634 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8635 if ((*in++ == 0x20) && (*in == 0x20)) break;
8636 if (in >= end) {
8637 const xmlChar *oldbase = ctxt->input->base;
8638 GROW;
8639 if (oldbase != ctxt->input->base) {
8640 long delta = ctxt->input->base - oldbase;
8641 start = start + delta;
8642 in = in + delta;
8643 }
8644 end = ctxt->input->end;
8645 }
8646 }
8647 last = in;
8648 /*
8649 * skip the trailing blanks
8650 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008651 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008652 while ((in < end) && (*in != limit) &&
8653 ((*in == 0x20) || (*in == 0x9) ||
8654 (*in == 0xA) || (*in == 0xD))) {
8655 in++;
8656 if (in >= end) {
8657 const xmlChar *oldbase = ctxt->input->base;
8658 GROW;
8659 if (oldbase != ctxt->input->base) {
8660 long delta = ctxt->input->base - oldbase;
8661 start = start + delta;
8662 in = in + delta;
8663 last = last + delta;
8664 }
8665 end = ctxt->input->end;
8666 }
8667 }
8668 if (*in != limit) goto need_complex;
8669 } else {
8670 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8671 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8672 in++;
8673 if (in >= end) {
8674 const xmlChar *oldbase = ctxt->input->base;
8675 GROW;
8676 if (oldbase != ctxt->input->base) {
8677 long delta = ctxt->input->base - oldbase;
8678 start = start + delta;
8679 in = in + delta;
8680 }
8681 end = ctxt->input->end;
8682 }
8683 }
8684 last = in;
8685 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008686 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008687 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008688 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008689 *len = last - start;
8690 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008691 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008692 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008693 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008694 }
8695 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008696 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008697 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008698need_complex:
8699 if (alloc) *alloc = 1;
8700 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008701}
8702
8703/**
8704 * xmlParseAttribute2:
8705 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008706 * @pref: the element prefix
8707 * @elem: the element name
8708 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008709 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008710 * @len: an int * to save the length of the attribute
8711 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008712 *
8713 * parse an attribute in the new SAX2 framework.
8714 *
8715 * Returns the attribute name, and the value in *value, .
8716 */
8717
8718static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008719xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008720 const xmlChar * pref, const xmlChar * elem,
8721 const xmlChar ** prefix, xmlChar ** value,
8722 int *len, int *alloc)
8723{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008724 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008725 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008726 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008727
8728 *value = NULL;
8729 GROW;
8730 name = xmlParseQName(ctxt, prefix);
8731 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008732 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8733 "error parsing attribute name\n");
8734 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008735 }
8736
8737 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008738 * get the type if needed
8739 */
8740 if (ctxt->attsSpecial != NULL) {
8741 int type;
8742
8743 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008744 pref, elem, *prefix, name);
8745 if (type != 0)
8746 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008747 }
8748
8749 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008750 * read the value
8751 */
8752 SKIP_BLANKS;
8753 if (RAW == '=') {
8754 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008755 SKIP_BLANKS;
8756 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8757 if (normalize) {
8758 /*
8759 * Sometimes a second normalisation pass for spaces is needed
8760 * but that only happens if charrefs or entities refernces
8761 * have been used in the attribute value, i.e. the attribute
8762 * value have been extracted in an allocated string already.
8763 */
8764 if (*alloc) {
8765 const xmlChar *val2;
8766
8767 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008768 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008769 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008770 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008771 }
8772 }
8773 }
8774 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008775 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008776 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8777 "Specification mandate value for attribute %s\n",
8778 name);
8779 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008780 }
8781
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008782 if (*prefix == ctxt->str_xml) {
8783 /*
8784 * Check that xml:lang conforms to the specification
8785 * No more registered as an error, just generate a warning now
8786 * since this was deprecated in XML second edition
8787 */
8788 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8789 internal_val = xmlStrndup(val, *len);
8790 if (!xmlCheckLanguageID(internal_val)) {
8791 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8792 "Malformed value for xml:lang : %s\n",
8793 internal_val, NULL);
8794 }
8795 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008796
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008797 /*
8798 * Check that xml:space conforms to the specification
8799 */
8800 if (xmlStrEqual(name, BAD_CAST "space")) {
8801 internal_val = xmlStrndup(val, *len);
8802 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8803 *(ctxt->space) = 0;
8804 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8805 *(ctxt->space) = 1;
8806 else {
8807 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8808 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8809 internal_val, NULL);
8810 }
8811 }
8812 if (internal_val) {
8813 xmlFree(internal_val);
8814 }
8815 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008816
8817 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008818 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008819}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008820/**
8821 * xmlParseStartTag2:
8822 * @ctxt: an XML parser context
8823 *
8824 * parse a start of tag either for rule element or
8825 * EmptyElement. In both case we don't parse the tag closing chars.
8826 * This routine is called when running SAX2 parsing
8827 *
8828 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8829 *
8830 * [ WFC: Unique Att Spec ]
8831 * No attribute name may appear more than once in the same start-tag or
8832 * empty-element tag.
8833 *
8834 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8835 *
8836 * [ WFC: Unique Att Spec ]
8837 * No attribute name may appear more than once in the same start-tag or
8838 * empty-element tag.
8839 *
8840 * With namespace:
8841 *
8842 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8843 *
8844 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8845 *
8846 * Returns the element name parsed
8847 */
8848
8849static const xmlChar *
8850xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008851 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008852 const xmlChar *localname;
8853 const xmlChar *prefix;
8854 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008855 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008856 const xmlChar *nsname;
8857 xmlChar *attvalue;
8858 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008860 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008861 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008862 const xmlChar *base;
8863 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008864 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008865
8866 if (RAW != '<') return(NULL);
8867 NEXT1;
8868
8869 /*
8870 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8871 * point since the attribute values may be stored as pointers to
8872 * the buffer and calling SHRINK would destroy them !
8873 * The Shrinking is only possible once the full set of attribute
8874 * callbacks have been done.
8875 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008876reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008877 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008878 base = ctxt->input->base;
8879 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008880 oldline = ctxt->input->line;
8881 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008882 nbatts = 0;
8883 nratts = 0;
8884 nbdef = 0;
8885 nbNs = 0;
8886 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008887 /* Forget any namespaces added during an earlier parse of this element. */
8888 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008889
8890 localname = xmlParseQName(ctxt, &prefix);
8891 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008892 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8893 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008894 return(NULL);
8895 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008896 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008897
8898 /*
8899 * Now parse the attributes, it ends up with the ending
8900 *
8901 * (S Attribute)* S?
8902 */
8903 SKIP_BLANKS;
8904 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008905 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008906
8907 while ((RAW != '>') &&
8908 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008909 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008910 const xmlChar *q = CUR_PTR;
8911 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008912 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008913
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008914 attname = xmlParseAttribute2(ctxt, prefix, localname,
8915 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008916 if (ctxt->input->base != base) {
8917 if ((attvalue != NULL) && (alloc != 0))
8918 xmlFree(attvalue);
8919 attvalue = NULL;
8920 goto base_changed;
8921 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008922 if ((attname != NULL) && (attvalue != NULL)) {
8923 if (len < 0) len = xmlStrlen(attvalue);
8924 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008925 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8926 xmlURIPtr uri;
8927
8928 if (*URL != 0) {
8929 uri = xmlParseURI((const char *) URL);
8930 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008931 xmlNsErr(ctxt, XML_WAR_NS_URI,
8932 "xmlns: '%s' is not a valid URI\n",
8933 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008934 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008935 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008936 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8937 "xmlns: URI %s is not absolute\n",
8938 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008939 }
8940 xmlFreeURI(uri);
8941 }
Daniel Veillard37334572008-07-31 08:20:02 +00008942 if (URL == ctxt->str_xml_ns) {
8943 if (attname != ctxt->str_xml) {
8944 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8945 "xml namespace URI cannot be the default namespace\n",
8946 NULL, NULL, NULL);
8947 }
8948 goto skip_default_ns;
8949 }
8950 if ((len == 29) &&
8951 (xmlStrEqual(URL,
8952 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8953 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8954 "reuse of the xmlns namespace name is forbidden\n",
8955 NULL, NULL, NULL);
8956 goto skip_default_ns;
8957 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008958 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008959 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008960 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008961 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008962 for (j = 1;j <= nbNs;j++)
8963 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8964 break;
8965 if (j <= nbNs)
8966 xmlErrAttributeDup(ctxt, NULL, attname);
8967 else
8968 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008969skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008970 if (alloc != 0) xmlFree(attvalue);
8971 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008972 continue;
8973 }
8974 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008975 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8976 xmlURIPtr uri;
8977
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008978 if (attname == ctxt->str_xml) {
8979 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008980 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8981 "xml namespace prefix mapped to wrong URI\n",
8982 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008983 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008984 /*
8985 * Do not keep a namespace definition node
8986 */
Daniel Veillard37334572008-07-31 08:20:02 +00008987 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008988 }
Daniel Veillard37334572008-07-31 08:20:02 +00008989 if (URL == ctxt->str_xml_ns) {
8990 if (attname != ctxt->str_xml) {
8991 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8992 "xml namespace URI mapped to wrong prefix\n",
8993 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008994 }
Daniel Veillard37334572008-07-31 08:20:02 +00008995 goto skip_ns;
8996 }
8997 if (attname == ctxt->str_xmlns) {
8998 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8999 "redefinition of the xmlns prefix is forbidden\n",
9000 NULL, NULL, NULL);
9001 goto skip_ns;
9002 }
9003 if ((len == 29) &&
9004 (xmlStrEqual(URL,
9005 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9006 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9007 "reuse of the xmlns namespace name is forbidden\n",
9008 NULL, NULL, NULL);
9009 goto skip_ns;
9010 }
9011 if ((URL == NULL) || (URL[0] == 0)) {
9012 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9013 "xmlns:%s: Empty XML namespace is not allowed\n",
9014 attname, NULL, NULL);
9015 goto skip_ns;
9016 } else {
9017 uri = xmlParseURI((const char *) URL);
9018 if (uri == NULL) {
9019 xmlNsErr(ctxt, XML_WAR_NS_URI,
9020 "xmlns:%s: '%s' is not a valid URI\n",
9021 attname, URL, NULL);
9022 } else {
9023 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9024 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9025 "xmlns:%s: URI %s is not absolute\n",
9026 attname, URL, NULL);
9027 }
9028 xmlFreeURI(uri);
9029 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009030 }
9031
Daniel Veillard0fb18932003-09-07 09:14:37 +00009032 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009033 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009034 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009035 for (j = 1;j <= nbNs;j++)
9036 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9037 break;
9038 if (j <= nbNs)
9039 xmlErrAttributeDup(ctxt, aprefix, attname);
9040 else
9041 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009042skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009043 if (alloc != 0) xmlFree(attvalue);
9044 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009045 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009046 continue;
9047 }
9048
9049 /*
9050 * Add the pair to atts
9051 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009052 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9053 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009054 if (attvalue[len] == 0)
9055 xmlFree(attvalue);
9056 goto failed;
9057 }
9058 maxatts = ctxt->maxatts;
9059 atts = ctxt->atts;
9060 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009061 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009062 atts[nbatts++] = attname;
9063 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009064 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009065 atts[nbatts++] = attvalue;
9066 attvalue += len;
9067 atts[nbatts++] = attvalue;
9068 /*
9069 * tag if some deallocation is needed
9070 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009071 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009072 } else {
9073 if ((attvalue != NULL) && (attvalue[len] == 0))
9074 xmlFree(attvalue);
9075 }
9076
Daniel Veillard37334572008-07-31 08:20:02 +00009077failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009078
9079 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00009080 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009081 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9082 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009083 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009084 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9085 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009086 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009087 }
9088 SKIP_BLANKS;
9089 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9090 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009092 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009093 break;
9094 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009095 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009096 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009097 }
9098
Daniel Veillard0fb18932003-09-07 09:14:37 +00009099 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009100 * The attributes defaulting
9101 */
9102 if (ctxt->attsDefault != NULL) {
9103 xmlDefAttrsPtr defaults;
9104
9105 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9106 if (defaults != NULL) {
9107 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009108 attname = defaults->values[5 * i];
9109 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009110
9111 /*
9112 * special work for namespaces defaulted defs
9113 */
9114 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9115 /*
9116 * check that it's not a defined namespace
9117 */
9118 for (j = 1;j <= nbNs;j++)
9119 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9120 break;
9121 if (j <= nbNs) continue;
9122
9123 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009124 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009125 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009126 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009127 nbNs++;
9128 }
9129 } else if (aprefix == ctxt->str_xmlns) {
9130 /*
9131 * check that it's not a defined namespace
9132 */
9133 for (j = 1;j <= nbNs;j++)
9134 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9135 break;
9136 if (j <= nbNs) continue;
9137
9138 nsname = xmlGetNamespace(ctxt, attname);
9139 if (nsname != defaults->values[2]) {
9140 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009141 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009142 nbNs++;
9143 }
9144 } else {
9145 /*
9146 * check that it's not a defined attribute
9147 */
9148 for (j = 0;j < nbatts;j+=5) {
9149 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9150 break;
9151 }
9152 if (j < nbatts) continue;
9153
9154 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9155 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009156 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009157 }
9158 maxatts = ctxt->maxatts;
9159 atts = ctxt->atts;
9160 }
9161 atts[nbatts++] = attname;
9162 atts[nbatts++] = aprefix;
9163 if (aprefix == NULL)
9164 atts[nbatts++] = NULL;
9165 else
9166 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009167 atts[nbatts++] = defaults->values[5 * i + 2];
9168 atts[nbatts++] = defaults->values[5 * i + 3];
9169 if ((ctxt->standalone == 1) &&
9170 (defaults->values[5 * i + 4] != NULL)) {
9171 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9172 "standalone: attribute %s on %s defaulted from external subset\n",
9173 attname, localname);
9174 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009175 nbdef++;
9176 }
9177 }
9178 }
9179 }
9180
Daniel Veillarde70c8772003-11-25 07:21:18 +00009181 /*
9182 * The attributes checkings
9183 */
9184 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009185 /*
9186 * The default namespace does not apply to attribute names.
9187 */
9188 if (atts[i + 1] != NULL) {
9189 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9190 if (nsname == NULL) {
9191 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9192 "Namespace prefix %s for %s on %s is not defined\n",
9193 atts[i + 1], atts[i], localname);
9194 }
9195 atts[i + 2] = nsname;
9196 } else
9197 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009198 /*
9199 * [ WFC: Unique Att Spec ]
9200 * No attribute name may appear more than once in the same
9201 * start-tag or empty-element tag.
9202 * As extended by the Namespace in XML REC.
9203 */
9204 for (j = 0; j < i;j += 5) {
9205 if (atts[i] == atts[j]) {
9206 if (atts[i+1] == atts[j+1]) {
9207 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9208 break;
9209 }
9210 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9211 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9212 "Namespaced Attribute %s in '%s' redefined\n",
9213 atts[i], nsname, NULL);
9214 break;
9215 }
9216 }
9217 }
9218 }
9219
Daniel Veillarde57ec792003-09-10 10:50:59 +00009220 nsname = xmlGetNamespace(ctxt, prefix);
9221 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009222 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9223 "Namespace prefix %s on %s is not defined\n",
9224 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009225 }
9226 *pref = prefix;
9227 *URI = nsname;
9228
9229 /*
9230 * SAX: Start of Element !
9231 */
9232 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9233 (!ctxt->disableSAX)) {
9234 if (nbNs > 0)
9235 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9236 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9237 nbatts / 5, nbdef, atts);
9238 else
9239 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9240 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9241 }
9242
9243 /*
9244 * Free up attribute allocated strings if needed
9245 */
9246 if (attval != 0) {
9247 for (i = 3,j = 0; j < nratts;i += 5,j++)
9248 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9249 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009250 }
9251
9252 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009253
9254base_changed:
9255 /*
9256 * the attribute strings are valid iif the base didn't changed
9257 */
9258 if (attval != 0) {
9259 for (i = 3,j = 0; j < nratts;i += 5,j++)
9260 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9261 xmlFree((xmlChar *) atts[i]);
9262 }
9263 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009264 ctxt->input->line = oldline;
9265 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009266 if (ctxt->wellFormed == 1) {
9267 goto reparse;
9268 }
9269 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009270}
9271
9272/**
9273 * xmlParseEndTag2:
9274 * @ctxt: an XML parser context
9275 * @line: line of the start tag
9276 * @nsNr: number of namespaces on the start tag
9277 *
9278 * parse an end of tag
9279 *
9280 * [42] ETag ::= '</' Name S? '>'
9281 *
9282 * With namespace
9283 *
9284 * [NS 9] ETag ::= '</' QName S? '>'
9285 */
9286
9287static void
9288xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009289 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009290 const xmlChar *name;
9291
9292 GROW;
9293 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009294 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009295 return;
9296 }
9297 SKIP(2);
9298
William M. Brack13dfa872004-09-18 04:52:08 +00009299 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009300 if (ctxt->input->cur[tlen] == '>') {
9301 ctxt->input->cur += tlen + 1;
9302 goto done;
9303 }
9304 ctxt->input->cur += tlen;
9305 name = (xmlChar*)1;
9306 } else {
9307 if (prefix == NULL)
9308 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9309 else
9310 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9311 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009312
9313 /*
9314 * We should definitely be at the ending "S? '>'" part
9315 */
9316 GROW;
9317 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009318 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009319 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009320 } else
9321 NEXT1;
9322
9323 /*
9324 * [ WFC: Element Type Match ]
9325 * The Name in an element's end-tag must match the element type in the
9326 * start-tag.
9327 *
9328 */
9329 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009330 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009331 if ((line == 0) && (ctxt->node != NULL))
9332 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009333 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009334 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009335 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009336 }
9337
9338 /*
9339 * SAX: End of Tag
9340 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009341done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009342 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9343 (!ctxt->disableSAX))
9344 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9345
Daniel Veillard0fb18932003-09-07 09:14:37 +00009346 spacePop(ctxt);
9347 if (nsNr != 0)
9348 nsPop(ctxt, nsNr);
9349 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009350}
9351
9352/**
Owen Taylor3473f882001-02-23 17:55:21 +00009353 * xmlParseCDSect:
9354 * @ctxt: an XML parser context
9355 *
9356 * Parse escaped pure raw content.
9357 *
9358 * [18] CDSect ::= CDStart CData CDEnd
9359 *
9360 * [19] CDStart ::= '<![CDATA['
9361 *
9362 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9363 *
9364 * [21] CDEnd ::= ']]>'
9365 */
9366void
9367xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9368 xmlChar *buf = NULL;
9369 int len = 0;
9370 int size = XML_PARSER_BUFFER_SIZE;
9371 int r, rl;
9372 int s, sl;
9373 int cur, l;
9374 int count = 0;
9375
Daniel Veillard8f597c32003-10-06 08:19:27 +00009376 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009377 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009378 SKIP(9);
9379 } else
9380 return;
9381
9382 ctxt->instate = XML_PARSER_CDATA_SECTION;
9383 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009384 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009385 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009386 ctxt->instate = XML_PARSER_CONTENT;
9387 return;
9388 }
9389 NEXTL(rl);
9390 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009391 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009392 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009393 ctxt->instate = XML_PARSER_CONTENT;
9394 return;
9395 }
9396 NEXTL(sl);
9397 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009398 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009399 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009400 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009401 return;
9402 }
William M. Brack871611b2003-10-18 04:53:14 +00009403 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009404 ((r != ']') || (s != ']') || (cur != '>'))) {
9405 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009406 xmlChar *tmp;
9407
Owen Taylor3473f882001-02-23 17:55:21 +00009408 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009409 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9410 if (tmp == NULL) {
9411 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009412 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009413 return;
9414 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009415 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009416 }
9417 COPY_BUF(rl,buf,len,r);
9418 r = s;
9419 rl = sl;
9420 s = cur;
9421 sl = l;
9422 count++;
9423 if (count > 50) {
9424 GROW;
9425 count = 0;
9426 }
9427 NEXTL(l);
9428 cur = CUR_CHAR(l);
9429 }
9430 buf[len] = 0;
9431 ctxt->instate = XML_PARSER_CONTENT;
9432 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009433 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009434 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009435 xmlFree(buf);
9436 return;
9437 }
9438 NEXTL(l);
9439
9440 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009441 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009442 */
9443 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9444 if (ctxt->sax->cdataBlock != NULL)
9445 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009446 else if (ctxt->sax->characters != NULL)
9447 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009448 }
9449 xmlFree(buf);
9450}
9451
9452/**
9453 * xmlParseContent:
9454 * @ctxt: an XML parser context
9455 *
9456 * Parse a content:
9457 *
9458 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9459 */
9460
9461void
9462xmlParseContent(xmlParserCtxtPtr ctxt) {
9463 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009464 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009465 ((RAW != '<') || (NXT(1) != '/')) &&
9466 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009467 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009468 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009469 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009470
9471 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009472 * First case : a Processing Instruction.
9473 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009474 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009475 xmlParsePI(ctxt);
9476 }
9477
9478 /*
9479 * Second case : a CDSection
9480 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009481 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009482 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009483 xmlParseCDSect(ctxt);
9484 }
9485
9486 /*
9487 * Third case : a comment
9488 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009489 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009490 (NXT(2) == '-') && (NXT(3) == '-')) {
9491 xmlParseComment(ctxt);
9492 ctxt->instate = XML_PARSER_CONTENT;
9493 }
9494
9495 /*
9496 * Fourth case : a sub-element.
9497 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009498 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009499 xmlParseElement(ctxt);
9500 }
9501
9502 /*
9503 * Fifth case : a reference. If if has not been resolved,
9504 * parsing returns it's Name, create the node
9505 */
9506
Daniel Veillard21a0f912001-02-25 19:54:14 +00009507 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009508 xmlParseReference(ctxt);
9509 }
9510
9511 /*
9512 * Last case, text. Note that References are handled directly.
9513 */
9514 else {
9515 xmlParseCharData(ctxt, 0);
9516 }
9517
9518 GROW;
9519 /*
9520 * Pop-up of finished entities.
9521 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009522 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009523 xmlPopInput(ctxt);
9524 SHRINK;
9525
Daniel Veillardfdc91562002-07-01 21:52:03 +00009526 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009527 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9528 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009529 ctxt->instate = XML_PARSER_EOF;
9530 break;
9531 }
9532 }
9533}
9534
9535/**
9536 * xmlParseElement:
9537 * @ctxt: an XML parser context
9538 *
9539 * parse an XML element, this is highly recursive
9540 *
9541 * [39] element ::= EmptyElemTag | STag content ETag
9542 *
9543 * [ WFC: Element Type Match ]
9544 * The Name in an element's end-tag must match the element type in the
9545 * start-tag.
9546 *
Owen Taylor3473f882001-02-23 17:55:21 +00009547 */
9548
9549void
9550xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009551 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009552 const xmlChar *prefix = NULL;
9553 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009554 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009555 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009556 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009557 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009558
Daniel Veillard8915c152008-08-26 13:05:34 +00009559 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9560 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9561 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9562 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9563 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009564 ctxt->instate = XML_PARSER_EOF;
9565 return;
9566 }
9567
Owen Taylor3473f882001-02-23 17:55:21 +00009568 /* Capture start position */
9569 if (ctxt->record_info) {
9570 node_info.begin_pos = ctxt->input->consumed +
9571 (CUR_PTR - ctxt->input->base);
9572 node_info.begin_line = ctxt->input->line;
9573 }
9574
9575 if (ctxt->spaceNr == 0)
9576 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009577 else if (*ctxt->space == -2)
9578 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009579 else
9580 spacePush(ctxt, *ctxt->space);
9581
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009582 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009583#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009584 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009585#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009586 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009587#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009588 else
9589 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009590#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009591 if (name == NULL) {
9592 spacePop(ctxt);
9593 return;
9594 }
9595 namePush(ctxt, name);
9596 ret = ctxt->node;
9597
Daniel Veillard4432df22003-09-28 18:58:27 +00009598#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009599 /*
9600 * [ VC: Root Element Type ]
9601 * The Name in the document type declaration must match the element
9602 * type of the root element.
9603 */
9604 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9605 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9606 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009607#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009608
9609 /*
9610 * Check for an Empty Element.
9611 */
9612 if ((RAW == '/') && (NXT(1) == '>')) {
9613 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009614 if (ctxt->sax2) {
9615 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9616 (!ctxt->disableSAX))
9617 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009618#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009619 } else {
9620 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9621 (!ctxt->disableSAX))
9622 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009623#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009624 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009625 namePop(ctxt);
9626 spacePop(ctxt);
9627 if (nsNr != ctxt->nsNr)
9628 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009629 if ( ret != NULL && ctxt->record_info ) {
9630 node_info.end_pos = ctxt->input->consumed +
9631 (CUR_PTR - ctxt->input->base);
9632 node_info.end_line = ctxt->input->line;
9633 node_info.node = ret;
9634 xmlParserAddNodeInfo(ctxt, &node_info);
9635 }
9636 return;
9637 }
9638 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009639 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009640 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009641 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9642 "Couldn't find end of Start Tag %s line %d\n",
9643 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009644
9645 /*
9646 * end of parsing of this node.
9647 */
9648 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009649 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009650 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009651 if (nsNr != ctxt->nsNr)
9652 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009653
9654 /*
9655 * Capture end position and add node
9656 */
9657 if ( ret != NULL && ctxt->record_info ) {
9658 node_info.end_pos = ctxt->input->consumed +
9659 (CUR_PTR - ctxt->input->base);
9660 node_info.end_line = ctxt->input->line;
9661 node_info.node = ret;
9662 xmlParserAddNodeInfo(ctxt, &node_info);
9663 }
9664 return;
9665 }
9666
9667 /*
9668 * Parse the content of the element:
9669 */
9670 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009671 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009672 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009673 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009674 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009675
9676 /*
9677 * end of parsing of this node.
9678 */
9679 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009680 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009681 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009682 if (nsNr != ctxt->nsNr)
9683 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009684 return;
9685 }
9686
9687 /*
9688 * parse the end of tag: '</' should be here.
9689 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009690 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009691 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009692 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009693 }
9694#ifdef LIBXML_SAX1_ENABLED
9695 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009696 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009697#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009698
9699 /*
9700 * Capture end position and add node
9701 */
9702 if ( ret != NULL && ctxt->record_info ) {
9703 node_info.end_pos = ctxt->input->consumed +
9704 (CUR_PTR - ctxt->input->base);
9705 node_info.end_line = ctxt->input->line;
9706 node_info.node = ret;
9707 xmlParserAddNodeInfo(ctxt, &node_info);
9708 }
9709}
9710
9711/**
9712 * xmlParseVersionNum:
9713 * @ctxt: an XML parser context
9714 *
9715 * parse the XML version value.
9716 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009717 * [26] VersionNum ::= '1.' [0-9]+
9718 *
9719 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009720 *
9721 * Returns the string giving the XML version number, or NULL
9722 */
9723xmlChar *
9724xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9725 xmlChar *buf = NULL;
9726 int len = 0;
9727 int size = 10;
9728 xmlChar cur;
9729
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009730 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009731 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009732 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009733 return(NULL);
9734 }
9735 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009736 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009737 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009738 return(NULL);
9739 }
9740 buf[len++] = cur;
9741 NEXT;
9742 cur=CUR;
9743 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009744 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009745 return(NULL);
9746 }
9747 buf[len++] = cur;
9748 NEXT;
9749 cur=CUR;
9750 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009751 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009752 xmlChar *tmp;
9753
Owen Taylor3473f882001-02-23 17:55:21 +00009754 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009755 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9756 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009757 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009758 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009759 return(NULL);
9760 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009761 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009762 }
9763 buf[len++] = cur;
9764 NEXT;
9765 cur=CUR;
9766 }
9767 buf[len] = 0;
9768 return(buf);
9769}
9770
9771/**
9772 * xmlParseVersionInfo:
9773 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009774 *
Owen Taylor3473f882001-02-23 17:55:21 +00009775 * parse the XML version.
9776 *
9777 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009778 *
Owen Taylor3473f882001-02-23 17:55:21 +00009779 * [25] Eq ::= S? '=' S?
9780 *
9781 * Returns the version string, e.g. "1.0"
9782 */
9783
9784xmlChar *
9785xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9786 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009787
Daniel Veillarda07050d2003-10-19 14:46:32 +00009788 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009789 SKIP(7);
9790 SKIP_BLANKS;
9791 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009792 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009793 return(NULL);
9794 }
9795 NEXT;
9796 SKIP_BLANKS;
9797 if (RAW == '"') {
9798 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009799 version = xmlParseVersionNum(ctxt);
9800 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009801 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009802 } else
9803 NEXT;
9804 } else if (RAW == '\''){
9805 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009806 version = xmlParseVersionNum(ctxt);
9807 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009808 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009809 } else
9810 NEXT;
9811 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009812 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009813 }
9814 }
9815 return(version);
9816}
9817
9818/**
9819 * xmlParseEncName:
9820 * @ctxt: an XML parser context
9821 *
9822 * parse the XML encoding name
9823 *
9824 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9825 *
9826 * Returns the encoding name value or NULL
9827 */
9828xmlChar *
9829xmlParseEncName(xmlParserCtxtPtr ctxt) {
9830 xmlChar *buf = NULL;
9831 int len = 0;
9832 int size = 10;
9833 xmlChar cur;
9834
9835 cur = CUR;
9836 if (((cur >= 'a') && (cur <= 'z')) ||
9837 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009838 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009839 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009840 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009841 return(NULL);
9842 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009843
Owen Taylor3473f882001-02-23 17:55:21 +00009844 buf[len++] = cur;
9845 NEXT;
9846 cur = CUR;
9847 while (((cur >= 'a') && (cur <= 'z')) ||
9848 ((cur >= 'A') && (cur <= 'Z')) ||
9849 ((cur >= '0') && (cur <= '9')) ||
9850 (cur == '.') || (cur == '_') ||
9851 (cur == '-')) {
9852 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009853 xmlChar *tmp;
9854
Owen Taylor3473f882001-02-23 17:55:21 +00009855 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009856 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9857 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009858 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009859 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009860 return(NULL);
9861 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009862 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009863 }
9864 buf[len++] = cur;
9865 NEXT;
9866 cur = CUR;
9867 if (cur == 0) {
9868 SHRINK;
9869 GROW;
9870 cur = CUR;
9871 }
9872 }
9873 buf[len] = 0;
9874 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009875 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009876 }
9877 return(buf);
9878}
9879
9880/**
9881 * xmlParseEncodingDecl:
9882 * @ctxt: an XML parser context
9883 *
9884 * parse the XML encoding declaration
9885 *
9886 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9887 *
9888 * this setups the conversion filters.
9889 *
9890 * Returns the encoding value or NULL
9891 */
9892
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009893const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009894xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9895 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009896
9897 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009898 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009899 SKIP(8);
9900 SKIP_BLANKS;
9901 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009902 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009903 return(NULL);
9904 }
9905 NEXT;
9906 SKIP_BLANKS;
9907 if (RAW == '"') {
9908 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009909 encoding = xmlParseEncName(ctxt);
9910 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009911 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009912 } else
9913 NEXT;
9914 } else if (RAW == '\''){
9915 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009916 encoding = xmlParseEncName(ctxt);
9917 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009918 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009919 } else
9920 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009921 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009922 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009923 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009924 /*
9925 * UTF-16 encoding stwich has already taken place at this stage,
9926 * more over the little-endian/big-endian selection is already done
9927 */
9928 if ((encoding != NULL) &&
9929 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9930 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009931 /*
9932 * If no encoding was passed to the parser, that we are
9933 * using UTF-16 and no decoder is present i.e. the
9934 * document is apparently UTF-8 compatible, then raise an
9935 * encoding mismatch fatal error
9936 */
9937 if ((ctxt->encoding == NULL) &&
9938 (ctxt->input->buf != NULL) &&
9939 (ctxt->input->buf->encoder == NULL)) {
9940 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9941 "Document labelled UTF-16 but has UTF-8 content\n");
9942 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009943 if (ctxt->encoding != NULL)
9944 xmlFree((xmlChar *) ctxt->encoding);
9945 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009946 }
9947 /*
9948 * UTF-8 encoding is handled natively
9949 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009950 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009951 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9952 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009953 if (ctxt->encoding != NULL)
9954 xmlFree((xmlChar *) ctxt->encoding);
9955 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009956 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009957 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009958 xmlCharEncodingHandlerPtr handler;
9959
9960 if (ctxt->input->encoding != NULL)
9961 xmlFree((xmlChar *) ctxt->input->encoding);
9962 ctxt->input->encoding = encoding;
9963
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009964 handler = xmlFindCharEncodingHandler((const char *) encoding);
9965 if (handler != NULL) {
9966 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009967 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009968 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009969 "Unsupported encoding %s\n", encoding);
9970 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009971 }
9972 }
9973 }
9974 return(encoding);
9975}
9976
9977/**
9978 * xmlParseSDDecl:
9979 * @ctxt: an XML parser context
9980 *
9981 * parse the XML standalone declaration
9982 *
9983 * [32] SDDecl ::= S 'standalone' Eq
9984 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9985 *
9986 * [ VC: Standalone Document Declaration ]
9987 * TODO The standalone document declaration must have the value "no"
9988 * if any external markup declarations contain declarations of:
9989 * - attributes with default values, if elements to which these
9990 * attributes apply appear in the document without specifications
9991 * of values for these attributes, or
9992 * - entities (other than amp, lt, gt, apos, quot), if references
9993 * to those entities appear in the document, or
9994 * - attributes with values subject to normalization, where the
9995 * attribute appears in the document with a value which will change
9996 * as a result of normalization, or
9997 * - element types with element content, if white space occurs directly
9998 * within any instance of those types.
9999 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010000 * Returns:
10001 * 1 if standalone="yes"
10002 * 0 if standalone="no"
10003 * -2 if standalone attribute is missing or invalid
10004 * (A standalone value of -2 means that the XML declaration was found,
10005 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010006 */
10007
10008int
10009xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010010 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010011
10012 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010013 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010014 SKIP(10);
10015 SKIP_BLANKS;
10016 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010017 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010018 return(standalone);
10019 }
10020 NEXT;
10021 SKIP_BLANKS;
10022 if (RAW == '\''){
10023 NEXT;
10024 if ((RAW == 'n') && (NXT(1) == 'o')) {
10025 standalone = 0;
10026 SKIP(2);
10027 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10028 (NXT(2) == 's')) {
10029 standalone = 1;
10030 SKIP(3);
10031 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010032 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010033 }
10034 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010035 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010036 } else
10037 NEXT;
10038 } else if (RAW == '"'){
10039 NEXT;
10040 if ((RAW == 'n') && (NXT(1) == 'o')) {
10041 standalone = 0;
10042 SKIP(2);
10043 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10044 (NXT(2) == 's')) {
10045 standalone = 1;
10046 SKIP(3);
10047 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010048 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010049 }
10050 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010051 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010052 } else
10053 NEXT;
10054 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010055 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010056 }
10057 }
10058 return(standalone);
10059}
10060
10061/**
10062 * xmlParseXMLDecl:
10063 * @ctxt: an XML parser context
10064 *
10065 * parse an XML declaration header
10066 *
10067 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10068 */
10069
10070void
10071xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10072 xmlChar *version;
10073
10074 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010075 * This value for standalone indicates that the document has an
10076 * XML declaration but it does not have a standalone attribute.
10077 * It will be overwritten later if a standalone attribute is found.
10078 */
10079 ctxt->input->standalone = -2;
10080
10081 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010082 * We know that '<?xml' is here.
10083 */
10084 SKIP(5);
10085
William M. Brack76e95df2003-10-18 16:20:14 +000010086 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010087 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10088 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010089 }
10090 SKIP_BLANKS;
10091
10092 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010093 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010094 */
10095 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010096 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010097 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010098 } else {
10099 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10100 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010101 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010102 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010103 if (ctxt->options & XML_PARSE_OLD10) {
10104 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10105 "Unsupported version '%s'\n",
10106 version);
10107 } else {
10108 if ((version[0] == '1') && ((version[1] == '.'))) {
10109 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10110 "Unsupported version '%s'\n",
10111 version, NULL);
10112 } else {
10113 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10114 "Unsupported version '%s'\n",
10115 version);
10116 }
10117 }
Daniel Veillard19840942001-11-29 16:11:38 +000010118 }
10119 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010120 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010121 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010122 }
Owen Taylor3473f882001-02-23 17:55:21 +000010123
10124 /*
10125 * We may have the encoding declaration
10126 */
William M. Brack76e95df2003-10-18 16:20:14 +000010127 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010128 if ((RAW == '?') && (NXT(1) == '>')) {
10129 SKIP(2);
10130 return;
10131 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010132 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010133 }
10134 xmlParseEncodingDecl(ctxt);
10135 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10136 /*
10137 * The XML REC instructs us to stop parsing right here
10138 */
10139 return;
10140 }
10141
10142 /*
10143 * We may have the standalone status.
10144 */
William M. Brack76e95df2003-10-18 16:20:14 +000010145 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010146 if ((RAW == '?') && (NXT(1) == '>')) {
10147 SKIP(2);
10148 return;
10149 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010150 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010151 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010152
10153 /*
10154 * We can grow the input buffer freely at that point
10155 */
10156 GROW;
10157
Owen Taylor3473f882001-02-23 17:55:21 +000010158 SKIP_BLANKS;
10159 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10160
10161 SKIP_BLANKS;
10162 if ((RAW == '?') && (NXT(1) == '>')) {
10163 SKIP(2);
10164 } else if (RAW == '>') {
10165 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010166 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010167 NEXT;
10168 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010169 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010170 MOVETO_ENDTAG(CUR_PTR);
10171 NEXT;
10172 }
10173}
10174
10175/**
10176 * xmlParseMisc:
10177 * @ctxt: an XML parser context
10178 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010179 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010180 *
10181 * [27] Misc ::= Comment | PI | S
10182 */
10183
10184void
10185xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010186 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010187 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010188 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010189 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010190 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010191 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010192 NEXT;
10193 } else
10194 xmlParseComment(ctxt);
10195 }
10196}
10197
10198/**
10199 * xmlParseDocument:
10200 * @ctxt: an XML parser context
10201 *
10202 * parse an XML document (and build a tree if using the standard SAX
10203 * interface).
10204 *
10205 * [1] document ::= prolog element Misc*
10206 *
10207 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10208 *
10209 * Returns 0, -1 in case of error. the parser context is augmented
10210 * as a result of the parsing.
10211 */
10212
10213int
10214xmlParseDocument(xmlParserCtxtPtr ctxt) {
10215 xmlChar start[4];
10216 xmlCharEncoding enc;
10217
10218 xmlInitParser();
10219
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010220 if ((ctxt == NULL) || (ctxt->input == NULL))
10221 return(-1);
10222
Owen Taylor3473f882001-02-23 17:55:21 +000010223 GROW;
10224
10225 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010226 * SAX: detecting the level.
10227 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010228 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010229
10230 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010231 * SAX: beginning of the document processing.
10232 */
10233 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10234 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10235
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010236 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010237 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010238 /*
10239 * Get the 4 first bytes and decode the charset
10240 * if enc != XML_CHAR_ENCODING_NONE
10241 * plug some encoding conversion routines.
10242 */
10243 start[0] = RAW;
10244 start[1] = NXT(1);
10245 start[2] = NXT(2);
10246 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010247 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010248 if (enc != XML_CHAR_ENCODING_NONE) {
10249 xmlSwitchEncoding(ctxt, enc);
10250 }
Owen Taylor3473f882001-02-23 17:55:21 +000010251 }
10252
10253
10254 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010255 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010256 }
10257
10258 /*
10259 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010260 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010261 * than just the first line, unless the amount of data is really
10262 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010263 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010264 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10265 GROW;
10266 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010267 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010268
10269 /*
10270 * Note that we will switch encoding on the fly.
10271 */
10272 xmlParseXMLDecl(ctxt);
10273 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10274 /*
10275 * The XML REC instructs us to stop parsing right here
10276 */
10277 return(-1);
10278 }
10279 ctxt->standalone = ctxt->input->standalone;
10280 SKIP_BLANKS;
10281 } else {
10282 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10283 }
10284 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10285 ctxt->sax->startDocument(ctxt->userData);
10286
10287 /*
10288 * The Misc part of the Prolog
10289 */
10290 GROW;
10291 xmlParseMisc(ctxt);
10292
10293 /*
10294 * Then possibly doc type declaration(s) and more Misc
10295 * (doctypedecl Misc*)?
10296 */
10297 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010298 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010299
10300 ctxt->inSubset = 1;
10301 xmlParseDocTypeDecl(ctxt);
10302 if (RAW == '[') {
10303 ctxt->instate = XML_PARSER_DTD;
10304 xmlParseInternalSubset(ctxt);
10305 }
10306
10307 /*
10308 * Create and update the external subset.
10309 */
10310 ctxt->inSubset = 2;
10311 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10312 (!ctxt->disableSAX))
10313 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10314 ctxt->extSubSystem, ctxt->extSubURI);
10315 ctxt->inSubset = 0;
10316
Daniel Veillardac4118d2008-01-11 05:27:32 +000010317 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010318
10319 ctxt->instate = XML_PARSER_PROLOG;
10320 xmlParseMisc(ctxt);
10321 }
10322
10323 /*
10324 * Time to start parsing the tree itself
10325 */
10326 GROW;
10327 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010328 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10329 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010330 } else {
10331 ctxt->instate = XML_PARSER_CONTENT;
10332 xmlParseElement(ctxt);
10333 ctxt->instate = XML_PARSER_EPILOG;
10334
10335
10336 /*
10337 * The Misc part at the end
10338 */
10339 xmlParseMisc(ctxt);
10340
Daniel Veillard561b7f82002-03-20 21:55:57 +000010341 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010342 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010343 }
10344 ctxt->instate = XML_PARSER_EOF;
10345 }
10346
10347 /*
10348 * SAX: end of the document processing.
10349 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010350 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010351 ctxt->sax->endDocument(ctxt->userData);
10352
Daniel Veillard5997aca2002-03-18 18:36:20 +000010353 /*
10354 * Remove locally kept entity definitions if the tree was not built
10355 */
10356 if ((ctxt->myDoc != NULL) &&
10357 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10358 xmlFreeDoc(ctxt->myDoc);
10359 ctxt->myDoc = NULL;
10360 }
10361
Daniel Veillardae0765b2008-07-31 19:54:59 +000010362 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10363 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10364 if (ctxt->valid)
10365 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10366 if (ctxt->nsWellFormed)
10367 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10368 if (ctxt->options & XML_PARSE_OLD10)
10369 ctxt->myDoc->properties |= XML_DOC_OLD10;
10370 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010371 if (! ctxt->wellFormed) {
10372 ctxt->valid = 0;
10373 return(-1);
10374 }
Owen Taylor3473f882001-02-23 17:55:21 +000010375 return(0);
10376}
10377
10378/**
10379 * xmlParseExtParsedEnt:
10380 * @ctxt: an XML parser context
10381 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010382 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010383 * An external general parsed entity is well-formed if it matches the
10384 * production labeled extParsedEnt.
10385 *
10386 * [78] extParsedEnt ::= TextDecl? content
10387 *
10388 * Returns 0, -1 in case of error. the parser context is augmented
10389 * as a result of the parsing.
10390 */
10391
10392int
10393xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10394 xmlChar start[4];
10395 xmlCharEncoding enc;
10396
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010397 if ((ctxt == NULL) || (ctxt->input == NULL))
10398 return(-1);
10399
Owen Taylor3473f882001-02-23 17:55:21 +000010400 xmlDefaultSAXHandlerInit();
10401
Daniel Veillard309f81d2003-09-23 09:02:53 +000010402 xmlDetectSAX2(ctxt);
10403
Owen Taylor3473f882001-02-23 17:55:21 +000010404 GROW;
10405
10406 /*
10407 * SAX: beginning of the document processing.
10408 */
10409 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10410 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10411
10412 /*
10413 * Get the 4 first bytes and decode the charset
10414 * if enc != XML_CHAR_ENCODING_NONE
10415 * plug some encoding conversion routines.
10416 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010417 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10418 start[0] = RAW;
10419 start[1] = NXT(1);
10420 start[2] = NXT(2);
10421 start[3] = NXT(3);
10422 enc = xmlDetectCharEncoding(start, 4);
10423 if (enc != XML_CHAR_ENCODING_NONE) {
10424 xmlSwitchEncoding(ctxt, enc);
10425 }
Owen Taylor3473f882001-02-23 17:55:21 +000010426 }
10427
10428
10429 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010430 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010431 }
10432
10433 /*
10434 * Check for the XMLDecl in the Prolog.
10435 */
10436 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010437 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010438
10439 /*
10440 * Note that we will switch encoding on the fly.
10441 */
10442 xmlParseXMLDecl(ctxt);
10443 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10444 /*
10445 * The XML REC instructs us to stop parsing right here
10446 */
10447 return(-1);
10448 }
10449 SKIP_BLANKS;
10450 } else {
10451 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10452 }
10453 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10454 ctxt->sax->startDocument(ctxt->userData);
10455
10456 /*
10457 * Doing validity checking on chunk doesn't make sense
10458 */
10459 ctxt->instate = XML_PARSER_CONTENT;
10460 ctxt->validate = 0;
10461 ctxt->loadsubset = 0;
10462 ctxt->depth = 0;
10463
10464 xmlParseContent(ctxt);
10465
10466 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010467 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010468 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010469 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010470 }
10471
10472 /*
10473 * SAX: end of the document processing.
10474 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010475 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010476 ctxt->sax->endDocument(ctxt->userData);
10477
10478 if (! ctxt->wellFormed) return(-1);
10479 return(0);
10480}
10481
Daniel Veillard73b013f2003-09-30 12:36:01 +000010482#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010483/************************************************************************
10484 * *
10485 * Progressive parsing interfaces *
10486 * *
10487 ************************************************************************/
10488
10489/**
10490 * xmlParseLookupSequence:
10491 * @ctxt: an XML parser context
10492 * @first: the first char to lookup
10493 * @next: the next char to lookup or zero
10494 * @third: the next char to lookup or zero
10495 *
10496 * Try to find if a sequence (first, next, third) or just (first next) or
10497 * (first) is available in the input stream.
10498 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10499 * to avoid rescanning sequences of bytes, it DOES change the state of the
10500 * parser, do not use liberally.
10501 *
10502 * Returns the index to the current parsing point if the full sequence
10503 * is available, -1 otherwise.
10504 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010505static int
Owen Taylor3473f882001-02-23 17:55:21 +000010506xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10507 xmlChar next, xmlChar third) {
10508 int base, len;
10509 xmlParserInputPtr in;
10510 const xmlChar *buf;
10511
10512 in = ctxt->input;
10513 if (in == NULL) return(-1);
10514 base = in->cur - in->base;
10515 if (base < 0) return(-1);
10516 if (ctxt->checkIndex > base)
10517 base = ctxt->checkIndex;
10518 if (in->buf == NULL) {
10519 buf = in->base;
10520 len = in->length;
10521 } else {
10522 buf = in->buf->buffer->content;
10523 len = in->buf->buffer->use;
10524 }
10525 /* take into account the sequence length */
10526 if (third) len -= 2;
10527 else if (next) len --;
10528 for (;base < len;base++) {
10529 if (buf[base] == first) {
10530 if (third != 0) {
10531 if ((buf[base + 1] != next) ||
10532 (buf[base + 2] != third)) continue;
10533 } else if (next != 0) {
10534 if (buf[base + 1] != next) continue;
10535 }
10536 ctxt->checkIndex = 0;
10537#ifdef DEBUG_PUSH
10538 if (next == 0)
10539 xmlGenericError(xmlGenericErrorContext,
10540 "PP: lookup '%c' found at %d\n",
10541 first, base);
10542 else if (third == 0)
10543 xmlGenericError(xmlGenericErrorContext,
10544 "PP: lookup '%c%c' found at %d\n",
10545 first, next, base);
10546 else
10547 xmlGenericError(xmlGenericErrorContext,
10548 "PP: lookup '%c%c%c' found at %d\n",
10549 first, next, third, base);
10550#endif
10551 return(base - (in->cur - in->base));
10552 }
10553 }
10554 ctxt->checkIndex = base;
10555#ifdef DEBUG_PUSH
10556 if (next == 0)
10557 xmlGenericError(xmlGenericErrorContext,
10558 "PP: lookup '%c' failed\n", first);
10559 else if (third == 0)
10560 xmlGenericError(xmlGenericErrorContext,
10561 "PP: lookup '%c%c' failed\n", first, next);
10562 else
10563 xmlGenericError(xmlGenericErrorContext,
10564 "PP: lookup '%c%c%c' failed\n", first, next, third);
10565#endif
10566 return(-1);
10567}
10568
10569/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010570 * xmlParseGetLasts:
10571 * @ctxt: an XML parser context
10572 * @lastlt: pointer to store the last '<' from the input
10573 * @lastgt: pointer to store the last '>' from the input
10574 *
10575 * Lookup the last < and > in the current chunk
10576 */
10577static void
10578xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10579 const xmlChar **lastgt) {
10580 const xmlChar *tmp;
10581
10582 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10583 xmlGenericError(xmlGenericErrorContext,
10584 "Internal error: xmlParseGetLasts\n");
10585 return;
10586 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010587 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010588 tmp = ctxt->input->end;
10589 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010590 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010591 if (tmp < ctxt->input->base) {
10592 *lastlt = NULL;
10593 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010594 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010595 *lastlt = tmp;
10596 tmp++;
10597 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10598 if (*tmp == '\'') {
10599 tmp++;
10600 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10601 if (tmp < ctxt->input->end) tmp++;
10602 } else if (*tmp == '"') {
10603 tmp++;
10604 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10605 if (tmp < ctxt->input->end) tmp++;
10606 } else
10607 tmp++;
10608 }
10609 if (tmp < ctxt->input->end)
10610 *lastgt = tmp;
10611 else {
10612 tmp = *lastlt;
10613 tmp--;
10614 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10615 if (tmp >= ctxt->input->base)
10616 *lastgt = tmp;
10617 else
10618 *lastgt = NULL;
10619 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010620 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010621 } else {
10622 *lastlt = NULL;
10623 *lastgt = NULL;
10624 }
10625}
10626/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010627 * xmlCheckCdataPush:
10628 * @cur: pointer to the bock of characters
10629 * @len: length of the block in bytes
10630 *
10631 * Check that the block of characters is okay as SCdata content [20]
10632 *
10633 * Returns the number of bytes to pass if okay, a negative index where an
10634 * UTF-8 error occured otherwise
10635 */
10636static int
10637xmlCheckCdataPush(const xmlChar *utf, int len) {
10638 int ix;
10639 unsigned char c;
10640 int codepoint;
10641
10642 if ((utf == NULL) || (len <= 0))
10643 return(0);
10644
10645 for (ix = 0; ix < len;) { /* string is 0-terminated */
10646 c = utf[ix];
10647 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10648 if (c >= 0x20)
10649 ix++;
10650 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10651 ix++;
10652 else
10653 return(-ix);
10654 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10655 if (ix + 2 > len) return(ix);
10656 if ((utf[ix+1] & 0xc0 ) != 0x80)
10657 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010658 codepoint = (utf[ix] & 0x1f) << 6;
10659 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010660 if (!xmlIsCharQ(codepoint))
10661 return(-ix);
10662 ix += 2;
10663 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10664 if (ix + 3 > len) return(ix);
10665 if (((utf[ix+1] & 0xc0) != 0x80) ||
10666 ((utf[ix+2] & 0xc0) != 0x80))
10667 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010668 codepoint = (utf[ix] & 0xf) << 12;
10669 codepoint |= (utf[ix+1] & 0x3f) << 6;
10670 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010671 if (!xmlIsCharQ(codepoint))
10672 return(-ix);
10673 ix += 3;
10674 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10675 if (ix + 4 > len) return(ix);
10676 if (((utf[ix+1] & 0xc0) != 0x80) ||
10677 ((utf[ix+2] & 0xc0) != 0x80) ||
10678 ((utf[ix+3] & 0xc0) != 0x80))
10679 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010680 codepoint = (utf[ix] & 0x7) << 18;
10681 codepoint |= (utf[ix+1] & 0x3f) << 12;
10682 codepoint |= (utf[ix+2] & 0x3f) << 6;
10683 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010684 if (!xmlIsCharQ(codepoint))
10685 return(-ix);
10686 ix += 4;
10687 } else /* unknown encoding */
10688 return(-ix);
10689 }
10690 return(ix);
10691}
10692
10693/**
Owen Taylor3473f882001-02-23 17:55:21 +000010694 * xmlParseTryOrFinish:
10695 * @ctxt: an XML parser context
10696 * @terminate: last chunk indicator
10697 *
10698 * Try to progress on parsing
10699 *
10700 * Returns zero if no parsing was possible
10701 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010702static int
Owen Taylor3473f882001-02-23 17:55:21 +000010703xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10704 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010705 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010706 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010707 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010708
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010709 if (ctxt->input == NULL)
10710 return(0);
10711
Owen Taylor3473f882001-02-23 17:55:21 +000010712#ifdef DEBUG_PUSH
10713 switch (ctxt->instate) {
10714 case XML_PARSER_EOF:
10715 xmlGenericError(xmlGenericErrorContext,
10716 "PP: try EOF\n"); break;
10717 case XML_PARSER_START:
10718 xmlGenericError(xmlGenericErrorContext,
10719 "PP: try START\n"); break;
10720 case XML_PARSER_MISC:
10721 xmlGenericError(xmlGenericErrorContext,
10722 "PP: try MISC\n");break;
10723 case XML_PARSER_COMMENT:
10724 xmlGenericError(xmlGenericErrorContext,
10725 "PP: try COMMENT\n");break;
10726 case XML_PARSER_PROLOG:
10727 xmlGenericError(xmlGenericErrorContext,
10728 "PP: try PROLOG\n");break;
10729 case XML_PARSER_START_TAG:
10730 xmlGenericError(xmlGenericErrorContext,
10731 "PP: try START_TAG\n");break;
10732 case XML_PARSER_CONTENT:
10733 xmlGenericError(xmlGenericErrorContext,
10734 "PP: try CONTENT\n");break;
10735 case XML_PARSER_CDATA_SECTION:
10736 xmlGenericError(xmlGenericErrorContext,
10737 "PP: try CDATA_SECTION\n");break;
10738 case XML_PARSER_END_TAG:
10739 xmlGenericError(xmlGenericErrorContext,
10740 "PP: try END_TAG\n");break;
10741 case XML_PARSER_ENTITY_DECL:
10742 xmlGenericError(xmlGenericErrorContext,
10743 "PP: try ENTITY_DECL\n");break;
10744 case XML_PARSER_ENTITY_VALUE:
10745 xmlGenericError(xmlGenericErrorContext,
10746 "PP: try ENTITY_VALUE\n");break;
10747 case XML_PARSER_ATTRIBUTE_VALUE:
10748 xmlGenericError(xmlGenericErrorContext,
10749 "PP: try ATTRIBUTE_VALUE\n");break;
10750 case XML_PARSER_DTD:
10751 xmlGenericError(xmlGenericErrorContext,
10752 "PP: try DTD\n");break;
10753 case XML_PARSER_EPILOG:
10754 xmlGenericError(xmlGenericErrorContext,
10755 "PP: try EPILOG\n");break;
10756 case XML_PARSER_PI:
10757 xmlGenericError(xmlGenericErrorContext,
10758 "PP: try PI\n");break;
10759 case XML_PARSER_IGNORE:
10760 xmlGenericError(xmlGenericErrorContext,
10761 "PP: try IGNORE\n");break;
10762 }
10763#endif
10764
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010765 if ((ctxt->input != NULL) &&
10766 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010767 xmlSHRINK(ctxt);
10768 ctxt->checkIndex = 0;
10769 }
10770 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010771
Daniel Veillarda880b122003-04-21 21:36:41 +000010772 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010773 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010774 return(0);
10775
10776
Owen Taylor3473f882001-02-23 17:55:21 +000010777 /*
10778 * Pop-up of finished entities.
10779 */
10780 while ((RAW == 0) && (ctxt->inputNr > 1))
10781 xmlPopInput(ctxt);
10782
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010783 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010784 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010785 avail = ctxt->input->length -
10786 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010787 else {
10788 /*
10789 * If we are operating on converted input, try to flush
10790 * remainng chars to avoid them stalling in the non-converted
10791 * buffer.
10792 */
10793 if ((ctxt->input->buf->raw != NULL) &&
10794 (ctxt->input->buf->raw->use > 0)) {
10795 int base = ctxt->input->base -
10796 ctxt->input->buf->buffer->content;
10797 int current = ctxt->input->cur - ctxt->input->base;
10798
10799 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10800 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10801 ctxt->input->cur = ctxt->input->base + current;
10802 ctxt->input->end =
10803 &ctxt->input->buf->buffer->content[
10804 ctxt->input->buf->buffer->use];
10805 }
10806 avail = ctxt->input->buf->buffer->use -
10807 (ctxt->input->cur - ctxt->input->base);
10808 }
Owen Taylor3473f882001-02-23 17:55:21 +000010809 if (avail < 1)
10810 goto done;
10811 switch (ctxt->instate) {
10812 case XML_PARSER_EOF:
10813 /*
10814 * Document parsing is done !
10815 */
10816 goto done;
10817 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010818 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10819 xmlChar start[4];
10820 xmlCharEncoding enc;
10821
10822 /*
10823 * Very first chars read from the document flow.
10824 */
10825 if (avail < 4)
10826 goto done;
10827
10828 /*
10829 * Get the 4 first bytes and decode the charset
10830 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010831 * plug some encoding conversion routines,
10832 * else xmlSwitchEncoding will set to (default)
10833 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010834 */
10835 start[0] = RAW;
10836 start[1] = NXT(1);
10837 start[2] = NXT(2);
10838 start[3] = NXT(3);
10839 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010840 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010841 break;
10842 }
Owen Taylor3473f882001-02-23 17:55:21 +000010843
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010844 if (avail < 2)
10845 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010846 cur = ctxt->input->cur[0];
10847 next = ctxt->input->cur[1];
10848 if (cur == 0) {
10849 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10850 ctxt->sax->setDocumentLocator(ctxt->userData,
10851 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010852 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010853 ctxt->instate = XML_PARSER_EOF;
10854#ifdef DEBUG_PUSH
10855 xmlGenericError(xmlGenericErrorContext,
10856 "PP: entering EOF\n");
10857#endif
10858 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10859 ctxt->sax->endDocument(ctxt->userData);
10860 goto done;
10861 }
10862 if ((cur == '<') && (next == '?')) {
10863 /* PI or XML decl */
10864 if (avail < 5) return(ret);
10865 if ((!terminate) &&
10866 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10867 return(ret);
10868 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10869 ctxt->sax->setDocumentLocator(ctxt->userData,
10870 &xmlDefaultSAXLocator);
10871 if ((ctxt->input->cur[2] == 'x') &&
10872 (ctxt->input->cur[3] == 'm') &&
10873 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010874 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010875 ret += 5;
10876#ifdef DEBUG_PUSH
10877 xmlGenericError(xmlGenericErrorContext,
10878 "PP: Parsing XML Decl\n");
10879#endif
10880 xmlParseXMLDecl(ctxt);
10881 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10882 /*
10883 * The XML REC instructs us to stop parsing right
10884 * here
10885 */
10886 ctxt->instate = XML_PARSER_EOF;
10887 return(0);
10888 }
10889 ctxt->standalone = ctxt->input->standalone;
10890 if ((ctxt->encoding == NULL) &&
10891 (ctxt->input->encoding != NULL))
10892 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10893 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10894 (!ctxt->disableSAX))
10895 ctxt->sax->startDocument(ctxt->userData);
10896 ctxt->instate = XML_PARSER_MISC;
10897#ifdef DEBUG_PUSH
10898 xmlGenericError(xmlGenericErrorContext,
10899 "PP: entering MISC\n");
10900#endif
10901 } else {
10902 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10903 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10904 (!ctxt->disableSAX))
10905 ctxt->sax->startDocument(ctxt->userData);
10906 ctxt->instate = XML_PARSER_MISC;
10907#ifdef DEBUG_PUSH
10908 xmlGenericError(xmlGenericErrorContext,
10909 "PP: entering MISC\n");
10910#endif
10911 }
10912 } else {
10913 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10914 ctxt->sax->setDocumentLocator(ctxt->userData,
10915 &xmlDefaultSAXLocator);
10916 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010917 if (ctxt->version == NULL) {
10918 xmlErrMemory(ctxt, NULL);
10919 break;
10920 }
Owen Taylor3473f882001-02-23 17:55:21 +000010921 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10922 (!ctxt->disableSAX))
10923 ctxt->sax->startDocument(ctxt->userData);
10924 ctxt->instate = XML_PARSER_MISC;
10925#ifdef DEBUG_PUSH
10926 xmlGenericError(xmlGenericErrorContext,
10927 "PP: entering MISC\n");
10928#endif
10929 }
10930 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010931 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010932 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010933 const xmlChar *prefix = NULL;
10934 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010935 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010936
10937 if ((avail < 2) && (ctxt->inputNr == 1))
10938 goto done;
10939 cur = ctxt->input->cur[0];
10940 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010941 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010942 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010943 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10944 ctxt->sax->endDocument(ctxt->userData);
10945 goto done;
10946 }
10947 if (!terminate) {
10948 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010949 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010950 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010951 goto done;
10952 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10953 goto done;
10954 }
10955 }
10956 if (ctxt->spaceNr == 0)
10957 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010958 else if (*ctxt->space == -2)
10959 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010960 else
10961 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010962#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010963 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010964#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010965 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010966#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010967 else
10968 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010969#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010970 if (name == NULL) {
10971 spacePop(ctxt);
10972 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010973 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10974 ctxt->sax->endDocument(ctxt->userData);
10975 goto done;
10976 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010977#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010978 /*
10979 * [ VC: Root Element Type ]
10980 * The Name in the document type declaration must match
10981 * the element type of the root element.
10982 */
10983 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10984 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10985 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010986#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010987
10988 /*
10989 * Check for an Empty Element.
10990 */
10991 if ((RAW == '/') && (NXT(1) == '>')) {
10992 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010993
10994 if (ctxt->sax2) {
10995 if ((ctxt->sax != NULL) &&
10996 (ctxt->sax->endElementNs != NULL) &&
10997 (!ctxt->disableSAX))
10998 ctxt->sax->endElementNs(ctxt->userData, name,
10999 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011000 if (ctxt->nsNr - nsNr > 0)
11001 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011002#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011003 } else {
11004 if ((ctxt->sax != NULL) &&
11005 (ctxt->sax->endElement != NULL) &&
11006 (!ctxt->disableSAX))
11007 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011008#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011009 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011010 spacePop(ctxt);
11011 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011012 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011013 } else {
11014 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011015 }
11016 break;
11017 }
11018 if (RAW == '>') {
11019 NEXT;
11020 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011021 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011022 "Couldn't find end of Start Tag %s\n",
11023 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011024 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011025 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011026 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011027 if (ctxt->sax2)
11028 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011029#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011030 else
11031 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011032#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011033
Daniel Veillarda880b122003-04-21 21:36:41 +000011034 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011035 break;
11036 }
11037 case XML_PARSER_CONTENT: {
11038 const xmlChar *test;
11039 unsigned int cons;
11040 if ((avail < 2) && (ctxt->inputNr == 1))
11041 goto done;
11042 cur = ctxt->input->cur[0];
11043 next = ctxt->input->cur[1];
11044
11045 test = CUR_PTR;
11046 cons = ctxt->input->consumed;
11047 if ((cur == '<') && (next == '/')) {
11048 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011049 break;
11050 } else if ((cur == '<') && (next == '?')) {
11051 if ((!terminate) &&
11052 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11053 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011054 xmlParsePI(ctxt);
11055 } else if ((cur == '<') && (next != '!')) {
11056 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011057 break;
11058 } else if ((cur == '<') && (next == '!') &&
11059 (ctxt->input->cur[2] == '-') &&
11060 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011061 int term;
11062
11063 if (avail < 4)
11064 goto done;
11065 ctxt->input->cur += 4;
11066 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11067 ctxt->input->cur -= 4;
11068 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000011069 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011070 xmlParseComment(ctxt);
11071 ctxt->instate = XML_PARSER_CONTENT;
11072 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11073 (ctxt->input->cur[2] == '[') &&
11074 (ctxt->input->cur[3] == 'C') &&
11075 (ctxt->input->cur[4] == 'D') &&
11076 (ctxt->input->cur[5] == 'A') &&
11077 (ctxt->input->cur[6] == 'T') &&
11078 (ctxt->input->cur[7] == 'A') &&
11079 (ctxt->input->cur[8] == '[')) {
11080 SKIP(9);
11081 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011082 break;
11083 } else if ((cur == '<') && (next == '!') &&
11084 (avail < 9)) {
11085 goto done;
11086 } else if (cur == '&') {
11087 if ((!terminate) &&
11088 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11089 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011090 xmlParseReference(ctxt);
11091 } else {
11092 /* TODO Avoid the extra copy, handle directly !!! */
11093 /*
11094 * Goal of the following test is:
11095 * - minimize calls to the SAX 'character' callback
11096 * when they are mergeable
11097 * - handle an problem for isBlank when we only parse
11098 * a sequence of blank chars and the next one is
11099 * not available to check against '<' presence.
11100 * - tries to homogenize the differences in SAX
11101 * callbacks between the push and pull versions
11102 * of the parser.
11103 */
11104 if ((ctxt->inputNr == 1) &&
11105 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11106 if (!terminate) {
11107 if (ctxt->progressive) {
11108 if ((lastlt == NULL) ||
11109 (ctxt->input->cur > lastlt))
11110 goto done;
11111 } else if (xmlParseLookupSequence(ctxt,
11112 '<', 0, 0) < 0) {
11113 goto done;
11114 }
11115 }
11116 }
11117 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011118 xmlParseCharData(ctxt, 0);
11119 }
11120 /*
11121 * Pop-up of finished entities.
11122 */
11123 while ((RAW == 0) && (ctxt->inputNr > 1))
11124 xmlPopInput(ctxt);
11125 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011126 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11127 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011128 ctxt->instate = XML_PARSER_EOF;
11129 break;
11130 }
11131 break;
11132 }
11133 case XML_PARSER_END_TAG:
11134 if (avail < 2)
11135 goto done;
11136 if (!terminate) {
11137 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011138 /* > can be found unescaped in attribute values */
11139 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011140 goto done;
11141 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11142 goto done;
11143 }
11144 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011145 if (ctxt->sax2) {
11146 xmlParseEndTag2(ctxt,
11147 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11148 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011149 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011150 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011151 }
11152#ifdef LIBXML_SAX1_ENABLED
11153 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011154 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011155#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011156 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011157 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011158 } else {
11159 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011160 }
11161 break;
11162 case XML_PARSER_CDATA_SECTION: {
11163 /*
11164 * The Push mode need to have the SAX callback for
11165 * cdataBlock merge back contiguous callbacks.
11166 */
11167 int base;
11168
11169 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11170 if (base < 0) {
11171 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011172 int tmp;
11173
11174 tmp = xmlCheckCdataPush(ctxt->input->cur,
11175 XML_PARSER_BIG_BUFFER_SIZE);
11176 if (tmp < 0) {
11177 tmp = -tmp;
11178 ctxt->input->cur += tmp;
11179 goto encoding_error;
11180 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011181 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11182 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011183 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011184 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011185 else if (ctxt->sax->characters != NULL)
11186 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011187 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011188 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011189 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011190 ctxt->checkIndex = 0;
11191 }
11192 goto done;
11193 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011194 int tmp;
11195
11196 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11197 if ((tmp < 0) || (tmp != base)) {
11198 tmp = -tmp;
11199 ctxt->input->cur += tmp;
11200 goto encoding_error;
11201 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011202 if ((ctxt->sax != NULL) && (base == 0) &&
11203 (ctxt->sax->cdataBlock != NULL) &&
11204 (!ctxt->disableSAX)) {
11205 /*
11206 * Special case to provide identical behaviour
11207 * between pull and push parsers on enpty CDATA
11208 * sections
11209 */
11210 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11211 (!strncmp((const char *)&ctxt->input->cur[-9],
11212 "<![CDATA[", 9)))
11213 ctxt->sax->cdataBlock(ctxt->userData,
11214 BAD_CAST "", 0);
11215 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011216 (!ctxt->disableSAX)) {
11217 if (ctxt->sax->cdataBlock != NULL)
11218 ctxt->sax->cdataBlock(ctxt->userData,
11219 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011220 else if (ctxt->sax->characters != NULL)
11221 ctxt->sax->characters(ctxt->userData,
11222 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011223 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011224 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011225 ctxt->checkIndex = 0;
11226 ctxt->instate = XML_PARSER_CONTENT;
11227#ifdef DEBUG_PUSH
11228 xmlGenericError(xmlGenericErrorContext,
11229 "PP: entering CONTENT\n");
11230#endif
11231 }
11232 break;
11233 }
Owen Taylor3473f882001-02-23 17:55:21 +000011234 case XML_PARSER_MISC:
11235 SKIP_BLANKS;
11236 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011237 avail = ctxt->input->length -
11238 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011239 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011240 avail = ctxt->input->buf->buffer->use -
11241 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011242 if (avail < 2)
11243 goto done;
11244 cur = ctxt->input->cur[0];
11245 next = ctxt->input->cur[1];
11246 if ((cur == '<') && (next == '?')) {
11247 if ((!terminate) &&
11248 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11249 goto done;
11250#ifdef DEBUG_PUSH
11251 xmlGenericError(xmlGenericErrorContext,
11252 "PP: Parsing PI\n");
11253#endif
11254 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011255 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011256 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011257 (ctxt->input->cur[2] == '-') &&
11258 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011259 if ((!terminate) &&
11260 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11261 goto done;
11262#ifdef DEBUG_PUSH
11263 xmlGenericError(xmlGenericErrorContext,
11264 "PP: Parsing Comment\n");
11265#endif
11266 xmlParseComment(ctxt);
11267 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011268 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011269 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011270 (ctxt->input->cur[2] == 'D') &&
11271 (ctxt->input->cur[3] == 'O') &&
11272 (ctxt->input->cur[4] == 'C') &&
11273 (ctxt->input->cur[5] == 'T') &&
11274 (ctxt->input->cur[6] == 'Y') &&
11275 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011276 (ctxt->input->cur[8] == 'E')) {
11277 if ((!terminate) &&
11278 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11279 goto done;
11280#ifdef DEBUG_PUSH
11281 xmlGenericError(xmlGenericErrorContext,
11282 "PP: Parsing internal subset\n");
11283#endif
11284 ctxt->inSubset = 1;
11285 xmlParseDocTypeDecl(ctxt);
11286 if (RAW == '[') {
11287 ctxt->instate = XML_PARSER_DTD;
11288#ifdef DEBUG_PUSH
11289 xmlGenericError(xmlGenericErrorContext,
11290 "PP: entering DTD\n");
11291#endif
11292 } else {
11293 /*
11294 * Create and update the external subset.
11295 */
11296 ctxt->inSubset = 2;
11297 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11298 (ctxt->sax->externalSubset != NULL))
11299 ctxt->sax->externalSubset(ctxt->userData,
11300 ctxt->intSubName, ctxt->extSubSystem,
11301 ctxt->extSubURI);
11302 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011303 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011304 ctxt->instate = XML_PARSER_PROLOG;
11305#ifdef DEBUG_PUSH
11306 xmlGenericError(xmlGenericErrorContext,
11307 "PP: entering PROLOG\n");
11308#endif
11309 }
11310 } else if ((cur == '<') && (next == '!') &&
11311 (avail < 9)) {
11312 goto done;
11313 } else {
11314 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011315 ctxt->progressive = 1;
11316 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011317#ifdef DEBUG_PUSH
11318 xmlGenericError(xmlGenericErrorContext,
11319 "PP: entering START_TAG\n");
11320#endif
11321 }
11322 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011323 case XML_PARSER_PROLOG:
11324 SKIP_BLANKS;
11325 if (ctxt->input->buf == NULL)
11326 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11327 else
11328 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11329 if (avail < 2)
11330 goto done;
11331 cur = ctxt->input->cur[0];
11332 next = ctxt->input->cur[1];
11333 if ((cur == '<') && (next == '?')) {
11334 if ((!terminate) &&
11335 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11336 goto done;
11337#ifdef DEBUG_PUSH
11338 xmlGenericError(xmlGenericErrorContext,
11339 "PP: Parsing PI\n");
11340#endif
11341 xmlParsePI(ctxt);
11342 } else if ((cur == '<') && (next == '!') &&
11343 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11344 if ((!terminate) &&
11345 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11346 goto done;
11347#ifdef DEBUG_PUSH
11348 xmlGenericError(xmlGenericErrorContext,
11349 "PP: Parsing Comment\n");
11350#endif
11351 xmlParseComment(ctxt);
11352 ctxt->instate = XML_PARSER_PROLOG;
11353 } else if ((cur == '<') && (next == '!') &&
11354 (avail < 4)) {
11355 goto done;
11356 } else {
11357 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011358 if (ctxt->progressive == 0)
11359 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011360 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011361#ifdef DEBUG_PUSH
11362 xmlGenericError(xmlGenericErrorContext,
11363 "PP: entering START_TAG\n");
11364#endif
11365 }
11366 break;
11367 case XML_PARSER_EPILOG:
11368 SKIP_BLANKS;
11369 if (ctxt->input->buf == NULL)
11370 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11371 else
11372 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11373 if (avail < 2)
11374 goto done;
11375 cur = ctxt->input->cur[0];
11376 next = ctxt->input->cur[1];
11377 if ((cur == '<') && (next == '?')) {
11378 if ((!terminate) &&
11379 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11380 goto done;
11381#ifdef DEBUG_PUSH
11382 xmlGenericError(xmlGenericErrorContext,
11383 "PP: Parsing PI\n");
11384#endif
11385 xmlParsePI(ctxt);
11386 ctxt->instate = XML_PARSER_EPILOG;
11387 } else if ((cur == '<') && (next == '!') &&
11388 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11389 if ((!terminate) &&
11390 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11391 goto done;
11392#ifdef DEBUG_PUSH
11393 xmlGenericError(xmlGenericErrorContext,
11394 "PP: Parsing Comment\n");
11395#endif
11396 xmlParseComment(ctxt);
11397 ctxt->instate = XML_PARSER_EPILOG;
11398 } else if ((cur == '<') && (next == '!') &&
11399 (avail < 4)) {
11400 goto done;
11401 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011402 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011403 ctxt->instate = XML_PARSER_EOF;
11404#ifdef DEBUG_PUSH
11405 xmlGenericError(xmlGenericErrorContext,
11406 "PP: entering EOF\n");
11407#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011408 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011409 ctxt->sax->endDocument(ctxt->userData);
11410 goto done;
11411 }
11412 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011413 case XML_PARSER_DTD: {
11414 /*
11415 * Sorry but progressive parsing of the internal subset
11416 * is not expected to be supported. We first check that
11417 * the full content of the internal subset is available and
11418 * the parsing is launched only at that point.
11419 * Internal subset ends up with "']' S? '>'" in an unescaped
11420 * section and not in a ']]>' sequence which are conditional
11421 * sections (whoever argued to keep that crap in XML deserve
11422 * a place in hell !).
11423 */
11424 int base, i;
11425 xmlChar *buf;
11426 xmlChar quote = 0;
11427
11428 base = ctxt->input->cur - ctxt->input->base;
11429 if (base < 0) return(0);
11430 if (ctxt->checkIndex > base)
11431 base = ctxt->checkIndex;
11432 buf = ctxt->input->buf->buffer->content;
11433 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11434 base++) {
11435 if (quote != 0) {
11436 if (buf[base] == quote)
11437 quote = 0;
11438 continue;
11439 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011440 if ((quote == 0) && (buf[base] == '<')) {
11441 int found = 0;
11442 /* special handling of comments */
11443 if (((unsigned int) base + 4 <
11444 ctxt->input->buf->buffer->use) &&
11445 (buf[base + 1] == '!') &&
11446 (buf[base + 2] == '-') &&
11447 (buf[base + 3] == '-')) {
11448 for (;(unsigned int) base + 3 <
11449 ctxt->input->buf->buffer->use; base++) {
11450 if ((buf[base] == '-') &&
11451 (buf[base + 1] == '-') &&
11452 (buf[base + 2] == '>')) {
11453 found = 1;
11454 base += 2;
11455 break;
11456 }
11457 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011458 if (!found) {
11459#if 0
11460 fprintf(stderr, "unfinished comment\n");
11461#endif
11462 break; /* for */
11463 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011464 continue;
11465 }
11466 }
Owen Taylor3473f882001-02-23 17:55:21 +000011467 if (buf[base] == '"') {
11468 quote = '"';
11469 continue;
11470 }
11471 if (buf[base] == '\'') {
11472 quote = '\'';
11473 continue;
11474 }
11475 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011476#if 0
11477 fprintf(stderr, "%c%c%c%c: ", buf[base],
11478 buf[base + 1], buf[base + 2], buf[base + 3]);
11479#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011480 if ((unsigned int) base +1 >=
11481 ctxt->input->buf->buffer->use)
11482 break;
11483 if (buf[base + 1] == ']') {
11484 /* conditional crap, skip both ']' ! */
11485 base++;
11486 continue;
11487 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011488 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011489 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11490 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011491 if (buf[base + i] == '>') {
11492#if 0
11493 fprintf(stderr, "found\n");
11494#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011495 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011496 }
11497 if (!IS_BLANK_CH(buf[base + i])) {
11498#if 0
11499 fprintf(stderr, "not found\n");
11500#endif
11501 goto not_end_of_int_subset;
11502 }
Owen Taylor3473f882001-02-23 17:55:21 +000011503 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011504#if 0
11505 fprintf(stderr, "end of stream\n");
11506#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011507 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011508
Owen Taylor3473f882001-02-23 17:55:21 +000011509 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011510not_end_of_int_subset:
11511 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011512 }
11513 /*
11514 * We didn't found the end of the Internal subset
11515 */
Owen Taylor3473f882001-02-23 17:55:21 +000011516#ifdef DEBUG_PUSH
11517 if (next == 0)
11518 xmlGenericError(xmlGenericErrorContext,
11519 "PP: lookup of int subset end filed\n");
11520#endif
11521 goto done;
11522
11523found_end_int_subset:
11524 xmlParseInternalSubset(ctxt);
11525 ctxt->inSubset = 2;
11526 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11527 (ctxt->sax->externalSubset != NULL))
11528 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11529 ctxt->extSubSystem, ctxt->extSubURI);
11530 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011531 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011532 ctxt->instate = XML_PARSER_PROLOG;
11533 ctxt->checkIndex = 0;
11534#ifdef DEBUG_PUSH
11535 xmlGenericError(xmlGenericErrorContext,
11536 "PP: entering PROLOG\n");
11537#endif
11538 break;
11539 }
11540 case XML_PARSER_COMMENT:
11541 xmlGenericError(xmlGenericErrorContext,
11542 "PP: internal error, state == COMMENT\n");
11543 ctxt->instate = XML_PARSER_CONTENT;
11544#ifdef DEBUG_PUSH
11545 xmlGenericError(xmlGenericErrorContext,
11546 "PP: entering CONTENT\n");
11547#endif
11548 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011549 case XML_PARSER_IGNORE:
11550 xmlGenericError(xmlGenericErrorContext,
11551 "PP: internal error, state == IGNORE");
11552 ctxt->instate = XML_PARSER_DTD;
11553#ifdef DEBUG_PUSH
11554 xmlGenericError(xmlGenericErrorContext,
11555 "PP: entering DTD\n");
11556#endif
11557 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011558 case XML_PARSER_PI:
11559 xmlGenericError(xmlGenericErrorContext,
11560 "PP: internal error, state == PI\n");
11561 ctxt->instate = XML_PARSER_CONTENT;
11562#ifdef DEBUG_PUSH
11563 xmlGenericError(xmlGenericErrorContext,
11564 "PP: entering CONTENT\n");
11565#endif
11566 break;
11567 case XML_PARSER_ENTITY_DECL:
11568 xmlGenericError(xmlGenericErrorContext,
11569 "PP: internal error, state == ENTITY_DECL\n");
11570 ctxt->instate = XML_PARSER_DTD;
11571#ifdef DEBUG_PUSH
11572 xmlGenericError(xmlGenericErrorContext,
11573 "PP: entering DTD\n");
11574#endif
11575 break;
11576 case XML_PARSER_ENTITY_VALUE:
11577 xmlGenericError(xmlGenericErrorContext,
11578 "PP: internal error, state == ENTITY_VALUE\n");
11579 ctxt->instate = XML_PARSER_CONTENT;
11580#ifdef DEBUG_PUSH
11581 xmlGenericError(xmlGenericErrorContext,
11582 "PP: entering DTD\n");
11583#endif
11584 break;
11585 case XML_PARSER_ATTRIBUTE_VALUE:
11586 xmlGenericError(xmlGenericErrorContext,
11587 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11588 ctxt->instate = XML_PARSER_START_TAG;
11589#ifdef DEBUG_PUSH
11590 xmlGenericError(xmlGenericErrorContext,
11591 "PP: entering START_TAG\n");
11592#endif
11593 break;
11594 case XML_PARSER_SYSTEM_LITERAL:
11595 xmlGenericError(xmlGenericErrorContext,
11596 "PP: internal error, state == SYSTEM_LITERAL\n");
11597 ctxt->instate = XML_PARSER_START_TAG;
11598#ifdef DEBUG_PUSH
11599 xmlGenericError(xmlGenericErrorContext,
11600 "PP: entering START_TAG\n");
11601#endif
11602 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011603 case XML_PARSER_PUBLIC_LITERAL:
11604 xmlGenericError(xmlGenericErrorContext,
11605 "PP: internal error, state == PUBLIC_LITERAL\n");
11606 ctxt->instate = XML_PARSER_START_TAG;
11607#ifdef DEBUG_PUSH
11608 xmlGenericError(xmlGenericErrorContext,
11609 "PP: entering START_TAG\n");
11610#endif
11611 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011612 }
11613 }
11614done:
11615#ifdef DEBUG_PUSH
11616 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11617#endif
11618 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011619encoding_error:
11620 {
11621 char buffer[150];
11622
11623 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11624 ctxt->input->cur[0], ctxt->input->cur[1],
11625 ctxt->input->cur[2], ctxt->input->cur[3]);
11626 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11627 "Input is not proper UTF-8, indicate encoding !\n%s",
11628 BAD_CAST buffer, NULL);
11629 }
11630 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011631}
11632
11633/**
Owen Taylor3473f882001-02-23 17:55:21 +000011634 * xmlParseChunk:
11635 * @ctxt: an XML parser context
11636 * @chunk: an char array
11637 * @size: the size in byte of the chunk
11638 * @terminate: last chunk indicator
11639 *
11640 * Parse a Chunk of memory
11641 *
11642 * Returns zero if no error, the xmlParserErrors otherwise.
11643 */
11644int
11645xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11646 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011647 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011648 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011649
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011650 if (ctxt == NULL)
11651 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011652 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011653 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011654 if (ctxt->instate == XML_PARSER_START)
11655 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011656 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11657 (chunk[size - 1] == '\r')) {
11658 end_in_lf = 1;
11659 size--;
11660 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011661
11662xmldecl_done:
11663
Owen Taylor3473f882001-02-23 17:55:21 +000011664 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11665 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11666 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11667 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011668 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011669
11670 /*
11671 * Specific handling if we autodetected an encoding, we should not
11672 * push more than the first line ... which depend on the encoding
11673 * And only push the rest once the final encoding was detected
11674 */
11675 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11676 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010011677 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011678
11679 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11680 BAD_CAST "UTF-16")) ||
11681 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11682 BAD_CAST "UTF16")))
11683 len = 90;
11684 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11685 BAD_CAST "UCS-4")) ||
11686 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11687 BAD_CAST "UCS4")))
11688 len = 180;
11689
11690 if (ctxt->input->buf->rawconsumed < len)
11691 len -= ctxt->input->buf->rawconsumed;
11692
Raul Hudeaba9716a2010-03-15 10:13:29 +010011693 /*
11694 * Change size for reading the initial declaration only
11695 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11696 * will blindly copy extra bytes from memory.
11697 */
Daniel Veillard60587d62010-11-04 15:16:27 +010011698 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010011699 remain = size - len;
11700 size = len;
11701 } else {
11702 remain = 0;
11703 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011704 }
William M. Bracka3215c72004-07-31 16:24:01 +000011705 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11706 if (res < 0) {
11707 ctxt->errNo = XML_PARSER_EOF;
11708 ctxt->disableSAX = 1;
11709 return (XML_PARSER_EOF);
11710 }
Owen Taylor3473f882001-02-23 17:55:21 +000011711 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11712 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011713 ctxt->input->end =
11714 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011715#ifdef DEBUG_PUSH
11716 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11717#endif
11718
Owen Taylor3473f882001-02-23 17:55:21 +000011719 } else if (ctxt->instate != XML_PARSER_EOF) {
11720 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11721 xmlParserInputBufferPtr in = ctxt->input->buf;
11722 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11723 (in->raw != NULL)) {
11724 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011725
Owen Taylor3473f882001-02-23 17:55:21 +000011726 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11727 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011728 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011729 xmlGenericError(xmlGenericErrorContext,
11730 "xmlParseChunk: encoder error\n");
11731 return(XML_ERR_INVALID_ENCODING);
11732 }
11733 }
11734 }
11735 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011736 if (remain != 0)
11737 xmlParseTryOrFinish(ctxt, 0);
11738 else
11739 xmlParseTryOrFinish(ctxt, terminate);
11740 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11741 return(ctxt->errNo);
11742
11743 if (remain != 0) {
11744 chunk += size;
11745 size = remain;
11746 remain = 0;
11747 goto xmldecl_done;
11748 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011749 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11750 (ctxt->input->buf != NULL)) {
11751 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11752 }
Owen Taylor3473f882001-02-23 17:55:21 +000011753 if (terminate) {
11754 /*
11755 * Check for termination
11756 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011757 int avail = 0;
11758
11759 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011760 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011761 avail = ctxt->input->length -
11762 (ctxt->input->cur - ctxt->input->base);
11763 else
11764 avail = ctxt->input->buf->buffer->use -
11765 (ctxt->input->cur - ctxt->input->base);
11766 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011767
Owen Taylor3473f882001-02-23 17:55:21 +000011768 if ((ctxt->instate != XML_PARSER_EOF) &&
11769 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011770 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011771 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011772 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011773 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011774 }
Owen Taylor3473f882001-02-23 17:55:21 +000011775 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011776 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011777 ctxt->sax->endDocument(ctxt->userData);
11778 }
11779 ctxt->instate = XML_PARSER_EOF;
11780 }
11781 return((xmlParserErrors) ctxt->errNo);
11782}
11783
11784/************************************************************************
11785 * *
11786 * I/O front end functions to the parser *
11787 * *
11788 ************************************************************************/
11789
11790/**
Owen Taylor3473f882001-02-23 17:55:21 +000011791 * xmlCreatePushParserCtxt:
11792 * @sax: a SAX handler
11793 * @user_data: The user data returned on SAX callbacks
11794 * @chunk: a pointer to an array of chars
11795 * @size: number of chars in the array
11796 * @filename: an optional file name or URI
11797 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011798 * Create a parser context for using the XML parser in push mode.
11799 * If @buffer and @size are non-NULL, the data is used to detect
11800 * the encoding. The remaining characters will be parsed so they
11801 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011802 * To allow content encoding detection, @size should be >= 4
11803 * The value of @filename is used for fetching external entities
11804 * and error/warning reports.
11805 *
11806 * Returns the new parser context or NULL
11807 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011808
Owen Taylor3473f882001-02-23 17:55:21 +000011809xmlParserCtxtPtr
11810xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11811 const char *chunk, int size, const char *filename) {
11812 xmlParserCtxtPtr ctxt;
11813 xmlParserInputPtr inputStream;
11814 xmlParserInputBufferPtr buf;
11815 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11816
11817 /*
11818 * plug some encoding conversion routines
11819 */
11820 if ((chunk != NULL) && (size >= 4))
11821 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11822
11823 buf = xmlAllocParserInputBuffer(enc);
11824 if (buf == NULL) return(NULL);
11825
11826 ctxt = xmlNewParserCtxt();
11827 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011828 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011829 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011830 return(NULL);
11831 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011832 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011833 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11834 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011835 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011836 xmlFreeParserInputBuffer(buf);
11837 xmlFreeParserCtxt(ctxt);
11838 return(NULL);
11839 }
Owen Taylor3473f882001-02-23 17:55:21 +000011840 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011841#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011842 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011843#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011844 xmlFree(ctxt->sax);
11845 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11846 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011847 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011848 xmlFreeParserInputBuffer(buf);
11849 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011850 return(NULL);
11851 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011852 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11853 if (sax->initialized == XML_SAX2_MAGIC)
11854 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11855 else
11856 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011857 if (user_data != NULL)
11858 ctxt->userData = user_data;
11859 }
11860 if (filename == NULL) {
11861 ctxt->directory = NULL;
11862 } else {
11863 ctxt->directory = xmlParserGetDirectory(filename);
11864 }
11865
11866 inputStream = xmlNewInputStream(ctxt);
11867 if (inputStream == NULL) {
11868 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011869 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011870 return(NULL);
11871 }
11872
11873 if (filename == NULL)
11874 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011875 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011876 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011877 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011878 if (inputStream->filename == NULL) {
11879 xmlFreeParserCtxt(ctxt);
11880 xmlFreeParserInputBuffer(buf);
11881 return(NULL);
11882 }
11883 }
Owen Taylor3473f882001-02-23 17:55:21 +000011884 inputStream->buf = buf;
11885 inputStream->base = inputStream->buf->buffer->content;
11886 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011887 inputStream->end =
11888 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011889
11890 inputPush(ctxt, inputStream);
11891
William M. Brack3a1cd212005-02-11 14:35:54 +000011892 /*
11893 * If the caller didn't provide an initial 'chunk' for determining
11894 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11895 * that it can be automatically determined later
11896 */
11897 if ((size == 0) || (chunk == NULL)) {
11898 ctxt->charset = XML_CHAR_ENCODING_NONE;
11899 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011900 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11901 int cur = ctxt->input->cur - ctxt->input->base;
11902
Owen Taylor3473f882001-02-23 17:55:21 +000011903 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011904
11905 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11906 ctxt->input->cur = ctxt->input->base + cur;
11907 ctxt->input->end =
11908 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011909#ifdef DEBUG_PUSH
11910 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11911#endif
11912 }
11913
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011914 if (enc != XML_CHAR_ENCODING_NONE) {
11915 xmlSwitchEncoding(ctxt, enc);
11916 }
11917
Owen Taylor3473f882001-02-23 17:55:21 +000011918 return(ctxt);
11919}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011920#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011921
11922/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011923 * xmlStopParser:
11924 * @ctxt: an XML parser context
11925 *
11926 * Blocks further parser processing
11927 */
11928void
11929xmlStopParser(xmlParserCtxtPtr ctxt) {
11930 if (ctxt == NULL)
11931 return;
11932 ctxt->instate = XML_PARSER_EOF;
11933 ctxt->disableSAX = 1;
11934 if (ctxt->input != NULL) {
11935 ctxt->input->cur = BAD_CAST"";
11936 ctxt->input->base = ctxt->input->cur;
11937 }
11938}
11939
11940/**
Owen Taylor3473f882001-02-23 17:55:21 +000011941 * xmlCreateIOParserCtxt:
11942 * @sax: a SAX handler
11943 * @user_data: The user data returned on SAX callbacks
11944 * @ioread: an I/O read function
11945 * @ioclose: an I/O close function
11946 * @ioctx: an I/O handler
11947 * @enc: the charset encoding if known
11948 *
11949 * Create a parser context for using the XML parser with an existing
11950 * I/O stream
11951 *
11952 * Returns the new parser context or NULL
11953 */
11954xmlParserCtxtPtr
11955xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11956 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11957 void *ioctx, xmlCharEncoding enc) {
11958 xmlParserCtxtPtr ctxt;
11959 xmlParserInputPtr inputStream;
11960 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011961
11962 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011963
11964 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11965 if (buf == NULL) return(NULL);
11966
11967 ctxt = xmlNewParserCtxt();
11968 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011969 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011970 return(NULL);
11971 }
11972 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011973#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011974 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011975#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011976 xmlFree(ctxt->sax);
11977 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11978 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011979 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011980 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011981 return(NULL);
11982 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011983 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11984 if (sax->initialized == XML_SAX2_MAGIC)
11985 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11986 else
11987 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011988 if (user_data != NULL)
11989 ctxt->userData = user_data;
11990 }
11991
11992 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11993 if (inputStream == NULL) {
11994 xmlFreeParserCtxt(ctxt);
11995 return(NULL);
11996 }
11997 inputPush(ctxt, inputStream);
11998
11999 return(ctxt);
12000}
12001
Daniel Veillard4432df22003-09-28 18:58:27 +000012002#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012003/************************************************************************
12004 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012005 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012006 * *
12007 ************************************************************************/
12008
12009/**
12010 * xmlIOParseDTD:
12011 * @sax: the SAX handler block or NULL
12012 * @input: an Input Buffer
12013 * @enc: the charset encoding if known
12014 *
12015 * Load and parse a DTD
12016 *
12017 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012018 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012019 */
12020
12021xmlDtdPtr
12022xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12023 xmlCharEncoding enc) {
12024 xmlDtdPtr ret = NULL;
12025 xmlParserCtxtPtr ctxt;
12026 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012027 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012028
12029 if (input == NULL)
12030 return(NULL);
12031
12032 ctxt = xmlNewParserCtxt();
12033 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012034 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012035 return(NULL);
12036 }
12037
12038 /*
12039 * Set-up the SAX context
12040 */
12041 if (sax != NULL) {
12042 if (ctxt->sax != NULL)
12043 xmlFree(ctxt->sax);
12044 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012045 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012046 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012047 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012048
12049 /*
12050 * generate a parser input from the I/O handler
12051 */
12052
Daniel Veillard43caefb2003-12-07 19:32:22 +000012053 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012054 if (pinput == NULL) {
12055 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012056 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012057 xmlFreeParserCtxt(ctxt);
12058 return(NULL);
12059 }
12060
12061 /*
12062 * plug some encoding conversion routines here.
12063 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012064 if (xmlPushInput(ctxt, pinput) < 0) {
12065 if (sax != NULL) ctxt->sax = NULL;
12066 xmlFreeParserCtxt(ctxt);
12067 return(NULL);
12068 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012069 if (enc != XML_CHAR_ENCODING_NONE) {
12070 xmlSwitchEncoding(ctxt, enc);
12071 }
Owen Taylor3473f882001-02-23 17:55:21 +000012072
12073 pinput->filename = NULL;
12074 pinput->line = 1;
12075 pinput->col = 1;
12076 pinput->base = ctxt->input->cur;
12077 pinput->cur = ctxt->input->cur;
12078 pinput->free = NULL;
12079
12080 /*
12081 * let's parse that entity knowing it's an external subset.
12082 */
12083 ctxt->inSubset = 2;
12084 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012085 if (ctxt->myDoc == NULL) {
12086 xmlErrMemory(ctxt, "New Doc failed");
12087 return(NULL);
12088 }
12089 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012090 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12091 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012092
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012093 if ((enc == XML_CHAR_ENCODING_NONE) &&
12094 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000012095 /*
12096 * Get the 4 first bytes and decode the charset
12097 * if enc != XML_CHAR_ENCODING_NONE
12098 * plug some encoding conversion routines.
12099 */
12100 start[0] = RAW;
12101 start[1] = NXT(1);
12102 start[2] = NXT(2);
12103 start[3] = NXT(3);
12104 enc = xmlDetectCharEncoding(start, 4);
12105 if (enc != XML_CHAR_ENCODING_NONE) {
12106 xmlSwitchEncoding(ctxt, enc);
12107 }
12108 }
12109
Owen Taylor3473f882001-02-23 17:55:21 +000012110 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12111
12112 if (ctxt->myDoc != NULL) {
12113 if (ctxt->wellFormed) {
12114 ret = ctxt->myDoc->extSubset;
12115 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012116 if (ret != NULL) {
12117 xmlNodePtr tmp;
12118
12119 ret->doc = NULL;
12120 tmp = ret->children;
12121 while (tmp != NULL) {
12122 tmp->doc = NULL;
12123 tmp = tmp->next;
12124 }
12125 }
Owen Taylor3473f882001-02-23 17:55:21 +000012126 } else {
12127 ret = NULL;
12128 }
12129 xmlFreeDoc(ctxt->myDoc);
12130 ctxt->myDoc = NULL;
12131 }
12132 if (sax != NULL) ctxt->sax = NULL;
12133 xmlFreeParserCtxt(ctxt);
12134
12135 return(ret);
12136}
12137
12138/**
12139 * xmlSAXParseDTD:
12140 * @sax: the SAX handler block
12141 * @ExternalID: a NAME* containing the External ID of the DTD
12142 * @SystemID: a NAME* containing the URL to the DTD
12143 *
12144 * Load and parse an external subset.
12145 *
12146 * Returns the resulting xmlDtdPtr or NULL in case of error.
12147 */
12148
12149xmlDtdPtr
12150xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12151 const xmlChar *SystemID) {
12152 xmlDtdPtr ret = NULL;
12153 xmlParserCtxtPtr ctxt;
12154 xmlParserInputPtr input = NULL;
12155 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012156 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012157
12158 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12159
12160 ctxt = xmlNewParserCtxt();
12161 if (ctxt == NULL) {
12162 return(NULL);
12163 }
12164
12165 /*
12166 * Set-up the SAX context
12167 */
12168 if (sax != NULL) {
12169 if (ctxt->sax != NULL)
12170 xmlFree(ctxt->sax);
12171 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012172 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012173 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012174
12175 /*
12176 * Canonicalise the system ID
12177 */
12178 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012179 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012180 xmlFreeParserCtxt(ctxt);
12181 return(NULL);
12182 }
Owen Taylor3473f882001-02-23 17:55:21 +000012183
12184 /*
12185 * Ask the Entity resolver to load the damn thing
12186 */
12187
12188 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012189 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12190 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012191 if (input == NULL) {
12192 if (sax != NULL) ctxt->sax = NULL;
12193 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012194 if (systemIdCanonic != NULL)
12195 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012196 return(NULL);
12197 }
12198
12199 /*
12200 * plug some encoding conversion routines here.
12201 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012202 if (xmlPushInput(ctxt, input) < 0) {
12203 if (sax != NULL) ctxt->sax = NULL;
12204 xmlFreeParserCtxt(ctxt);
12205 if (systemIdCanonic != NULL)
12206 xmlFree(systemIdCanonic);
12207 return(NULL);
12208 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012209 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12210 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12211 xmlSwitchEncoding(ctxt, enc);
12212 }
Owen Taylor3473f882001-02-23 17:55:21 +000012213
12214 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012215 input->filename = (char *) systemIdCanonic;
12216 else
12217 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012218 input->line = 1;
12219 input->col = 1;
12220 input->base = ctxt->input->cur;
12221 input->cur = ctxt->input->cur;
12222 input->free = NULL;
12223
12224 /*
12225 * let's parse that entity knowing it's an external subset.
12226 */
12227 ctxt->inSubset = 2;
12228 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012229 if (ctxt->myDoc == NULL) {
12230 xmlErrMemory(ctxt, "New Doc failed");
12231 if (sax != NULL) ctxt->sax = NULL;
12232 xmlFreeParserCtxt(ctxt);
12233 return(NULL);
12234 }
12235 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012236 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12237 ExternalID, SystemID);
12238 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12239
12240 if (ctxt->myDoc != NULL) {
12241 if (ctxt->wellFormed) {
12242 ret = ctxt->myDoc->extSubset;
12243 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012244 if (ret != NULL) {
12245 xmlNodePtr tmp;
12246
12247 ret->doc = NULL;
12248 tmp = ret->children;
12249 while (tmp != NULL) {
12250 tmp->doc = NULL;
12251 tmp = tmp->next;
12252 }
12253 }
Owen Taylor3473f882001-02-23 17:55:21 +000012254 } else {
12255 ret = NULL;
12256 }
12257 xmlFreeDoc(ctxt->myDoc);
12258 ctxt->myDoc = NULL;
12259 }
12260 if (sax != NULL) ctxt->sax = NULL;
12261 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012262
Owen Taylor3473f882001-02-23 17:55:21 +000012263 return(ret);
12264}
12265
Daniel Veillard4432df22003-09-28 18:58:27 +000012266
Owen Taylor3473f882001-02-23 17:55:21 +000012267/**
12268 * xmlParseDTD:
12269 * @ExternalID: a NAME* containing the External ID of the DTD
12270 * @SystemID: a NAME* containing the URL to the DTD
12271 *
12272 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012273 *
Owen Taylor3473f882001-02-23 17:55:21 +000012274 * Returns the resulting xmlDtdPtr or NULL in case of error.
12275 */
12276
12277xmlDtdPtr
12278xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12279 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12280}
Daniel Veillard4432df22003-09-28 18:58:27 +000012281#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012282
12283/************************************************************************
12284 * *
12285 * Front ends when parsing an Entity *
12286 * *
12287 ************************************************************************/
12288
12289/**
Owen Taylor3473f882001-02-23 17:55:21 +000012290 * xmlParseCtxtExternalEntity:
12291 * @ctx: the existing parsing context
12292 * @URL: the URL for the entity to load
12293 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012294 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012295 *
12296 * Parse an external general entity within an existing parsing context
12297 * An external general parsed entity is well-formed if it matches the
12298 * production labeled extParsedEnt.
12299 *
12300 * [78] extParsedEnt ::= TextDecl? content
12301 *
12302 * Returns 0 if the entity is well formed, -1 in case of args problem and
12303 * the parser error code otherwise
12304 */
12305
12306int
12307xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012308 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012309 xmlParserCtxtPtr ctxt;
12310 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012311 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012312 xmlSAXHandlerPtr oldsax = NULL;
12313 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012314 xmlChar start[4];
12315 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012316
Daniel Veillardce682bc2004-11-05 17:22:25 +000012317 if (ctx == NULL) return(-1);
12318
Daniel Veillard0161e632008-08-28 15:36:32 +000012319 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12320 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012321 return(XML_ERR_ENTITY_LOOP);
12322 }
12323
Daniel Veillardcda96922001-08-21 10:56:31 +000012324 if (lst != NULL)
12325 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012326 if ((URL == NULL) && (ID == NULL))
12327 return(-1);
12328 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12329 return(-1);
12330
Rob Richards798743a2009-06-19 13:54:25 -040012331 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012332 if (ctxt == NULL) {
12333 return(-1);
12334 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012335
Owen Taylor3473f882001-02-23 17:55:21 +000012336 oldsax = ctxt->sax;
12337 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012338 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012339 newDoc = xmlNewDoc(BAD_CAST "1.0");
12340 if (newDoc == NULL) {
12341 xmlFreeParserCtxt(ctxt);
12342 return(-1);
12343 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012344 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012345 if (ctx->myDoc->dict) {
12346 newDoc->dict = ctx->myDoc->dict;
12347 xmlDictReference(newDoc->dict);
12348 }
Owen Taylor3473f882001-02-23 17:55:21 +000012349 if (ctx->myDoc != NULL) {
12350 newDoc->intSubset = ctx->myDoc->intSubset;
12351 newDoc->extSubset = ctx->myDoc->extSubset;
12352 }
12353 if (ctx->myDoc->URL != NULL) {
12354 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12355 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012356 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12357 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012358 ctxt->sax = oldsax;
12359 xmlFreeParserCtxt(ctxt);
12360 newDoc->intSubset = NULL;
12361 newDoc->extSubset = NULL;
12362 xmlFreeDoc(newDoc);
12363 return(-1);
12364 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012365 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012366 nodePush(ctxt, newDoc->children);
12367 if (ctx->myDoc == NULL) {
12368 ctxt->myDoc = newDoc;
12369 } else {
12370 ctxt->myDoc = ctx->myDoc;
12371 newDoc->children->doc = ctx->myDoc;
12372 }
12373
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012374 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012375 * Get the 4 first bytes and decode the charset
12376 * if enc != XML_CHAR_ENCODING_NONE
12377 * plug some encoding conversion routines.
12378 */
12379 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012380 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12381 start[0] = RAW;
12382 start[1] = NXT(1);
12383 start[2] = NXT(2);
12384 start[3] = NXT(3);
12385 enc = xmlDetectCharEncoding(start, 4);
12386 if (enc != XML_CHAR_ENCODING_NONE) {
12387 xmlSwitchEncoding(ctxt, enc);
12388 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012389 }
12390
Owen Taylor3473f882001-02-23 17:55:21 +000012391 /*
12392 * Parse a possible text declaration first
12393 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012394 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012395 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012396 /*
12397 * An XML-1.0 document can't reference an entity not XML-1.0
12398 */
12399 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12400 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12401 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12402 "Version mismatch between document and entity\n");
12403 }
Owen Taylor3473f882001-02-23 17:55:21 +000012404 }
12405
12406 /*
12407 * Doing validity checking on chunk doesn't make sense
12408 */
12409 ctxt->instate = XML_PARSER_CONTENT;
12410 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012411 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012412 ctxt->loadsubset = ctx->loadsubset;
12413 ctxt->depth = ctx->depth + 1;
12414 ctxt->replaceEntities = ctx->replaceEntities;
12415 if (ctxt->validate) {
12416 ctxt->vctxt.error = ctx->vctxt.error;
12417 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012418 } else {
12419 ctxt->vctxt.error = NULL;
12420 ctxt->vctxt.warning = NULL;
12421 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012422 ctxt->vctxt.nodeTab = NULL;
12423 ctxt->vctxt.nodeNr = 0;
12424 ctxt->vctxt.nodeMax = 0;
12425 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012426 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12427 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012428 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12429 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12430 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012431 ctxt->dictNames = ctx->dictNames;
12432 ctxt->attsDefault = ctx->attsDefault;
12433 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012434 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012435
12436 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012437
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012438 ctx->validate = ctxt->validate;
12439 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012440 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012441 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012442 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012443 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012444 }
12445 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012446 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012447 }
12448
12449 if (!ctxt->wellFormed) {
12450 if (ctxt->errNo == 0)
12451 ret = 1;
12452 else
12453 ret = ctxt->errNo;
12454 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012455 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012456 xmlNodePtr cur;
12457
12458 /*
12459 * Return the newly created nodeset after unlinking it from
12460 * they pseudo parent.
12461 */
12462 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012463 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012464 while (cur != NULL) {
12465 cur->parent = NULL;
12466 cur = cur->next;
12467 }
12468 newDoc->children->children = NULL;
12469 }
12470 ret = 0;
12471 }
12472 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012473 ctxt->dict = NULL;
12474 ctxt->attsDefault = NULL;
12475 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012476 xmlFreeParserCtxt(ctxt);
12477 newDoc->intSubset = NULL;
12478 newDoc->extSubset = NULL;
12479 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012480
Owen Taylor3473f882001-02-23 17:55:21 +000012481 return(ret);
12482}
12483
12484/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012485 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012486 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012487 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012488 * @sax: the SAX handler bloc (possibly NULL)
12489 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12490 * @depth: Used for loop detection, use 0
12491 * @URL: the URL for the entity to load
12492 * @ID: the System ID for the entity to load
12493 * @list: the return value for the set of parsed nodes
12494 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012495 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012496 *
12497 * Returns 0 if the entity is well formed, -1 in case of args problem and
12498 * the parser error code otherwise
12499 */
12500
Daniel Veillard7d515752003-09-26 19:12:37 +000012501static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012502xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12503 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012504 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012505 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012506 xmlParserCtxtPtr ctxt;
12507 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012508 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012509 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012510 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012511 xmlChar start[4];
12512 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012513
Daniel Veillard0161e632008-08-28 15:36:32 +000012514 if (((depth > 40) &&
12515 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12516 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012517 return(XML_ERR_ENTITY_LOOP);
12518 }
12519
Owen Taylor3473f882001-02-23 17:55:21 +000012520 if (list != NULL)
12521 *list = NULL;
12522 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012523 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012524 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012525 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012526
12527
Rob Richards9c0aa472009-03-26 18:10:19 +000012528 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012529 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012530 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012531 if (oldctxt != NULL) {
12532 ctxt->_private = oldctxt->_private;
12533 ctxt->loadsubset = oldctxt->loadsubset;
12534 ctxt->validate = oldctxt->validate;
12535 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012536 ctxt->record_info = oldctxt->record_info;
12537 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12538 ctxt->node_seq.length = oldctxt->node_seq.length;
12539 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012540 } else {
12541 /*
12542 * Doing validity checking on chunk without context
12543 * doesn't make sense
12544 */
12545 ctxt->_private = NULL;
12546 ctxt->validate = 0;
12547 ctxt->external = 2;
12548 ctxt->loadsubset = 0;
12549 }
Owen Taylor3473f882001-02-23 17:55:21 +000012550 if (sax != NULL) {
12551 oldsax = ctxt->sax;
12552 ctxt->sax = sax;
12553 if (user_data != NULL)
12554 ctxt->userData = user_data;
12555 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012556 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012557 newDoc = xmlNewDoc(BAD_CAST "1.0");
12558 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012559 ctxt->node_seq.maximum = 0;
12560 ctxt->node_seq.length = 0;
12561 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012562 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012563 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012564 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012565 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012566 newDoc->intSubset = doc->intSubset;
12567 newDoc->extSubset = doc->extSubset;
12568 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012569 xmlDictReference(newDoc->dict);
12570
Owen Taylor3473f882001-02-23 17:55:21 +000012571 if (doc->URL != NULL) {
12572 newDoc->URL = xmlStrdup(doc->URL);
12573 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012574 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12575 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012576 if (sax != NULL)
12577 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012578 ctxt->node_seq.maximum = 0;
12579 ctxt->node_seq.length = 0;
12580 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012581 xmlFreeParserCtxt(ctxt);
12582 newDoc->intSubset = NULL;
12583 newDoc->extSubset = NULL;
12584 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012585 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012586 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012587 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012588 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012589 ctxt->myDoc = doc;
12590 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012591
Daniel Veillard0161e632008-08-28 15:36:32 +000012592 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012593 * Get the 4 first bytes and decode the charset
12594 * if enc != XML_CHAR_ENCODING_NONE
12595 * plug some encoding conversion routines.
12596 */
12597 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012598 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12599 start[0] = RAW;
12600 start[1] = NXT(1);
12601 start[2] = NXT(2);
12602 start[3] = NXT(3);
12603 enc = xmlDetectCharEncoding(start, 4);
12604 if (enc != XML_CHAR_ENCODING_NONE) {
12605 xmlSwitchEncoding(ctxt, enc);
12606 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012607 }
12608
Owen Taylor3473f882001-02-23 17:55:21 +000012609 /*
12610 * Parse a possible text declaration first
12611 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012612 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012613 xmlParseTextDecl(ctxt);
12614 }
12615
Owen Taylor3473f882001-02-23 17:55:21 +000012616 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012617 ctxt->depth = depth;
12618
12619 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012620
Daniel Veillard561b7f82002-03-20 21:55:57 +000012621 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012622 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012623 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012624 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012625 }
12626 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012627 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012628 }
12629
12630 if (!ctxt->wellFormed) {
12631 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012632 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012633 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012634 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012635 } else {
12636 if (list != NULL) {
12637 xmlNodePtr cur;
12638
12639 /*
12640 * Return the newly created nodeset after unlinking it from
12641 * they pseudo parent.
12642 */
12643 cur = newDoc->children->children;
12644 *list = cur;
12645 while (cur != NULL) {
12646 cur->parent = NULL;
12647 cur = cur->next;
12648 }
12649 newDoc->children->children = NULL;
12650 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012651 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012652 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012653
12654 /*
12655 * Record in the parent context the number of entities replacement
12656 * done when parsing that reference.
12657 */
Daniel Veillard76d36452009-09-07 11:19:33 +020012658 if (oldctxt != NULL)
12659 oldctxt->nbentities += ctxt->nbentities;
12660
Daniel Veillard0161e632008-08-28 15:36:32 +000012661 /*
12662 * Also record the size of the entity parsed
12663 */
12664 if (ctxt->input != NULL) {
12665 oldctxt->sizeentities += ctxt->input->consumed;
12666 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12667 }
12668 /*
12669 * And record the last error if any
12670 */
12671 if (ctxt->lastError.code != XML_ERR_OK)
12672 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12673
Owen Taylor3473f882001-02-23 17:55:21 +000012674 if (sax != NULL)
12675 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012676 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12677 oldctxt->node_seq.length = ctxt->node_seq.length;
12678 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012679 ctxt->node_seq.maximum = 0;
12680 ctxt->node_seq.length = 0;
12681 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012682 xmlFreeParserCtxt(ctxt);
12683 newDoc->intSubset = NULL;
12684 newDoc->extSubset = NULL;
12685 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012686
Owen Taylor3473f882001-02-23 17:55:21 +000012687 return(ret);
12688}
12689
Daniel Veillard81273902003-09-30 00:43:48 +000012690#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012691/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012692 * xmlParseExternalEntity:
12693 * @doc: the document the chunk pertains to
12694 * @sax: the SAX handler bloc (possibly NULL)
12695 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12696 * @depth: Used for loop detection, use 0
12697 * @URL: the URL for the entity to load
12698 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012699 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012700 *
12701 * Parse an external general entity
12702 * An external general parsed entity is well-formed if it matches the
12703 * production labeled extParsedEnt.
12704 *
12705 * [78] extParsedEnt ::= TextDecl? content
12706 *
12707 * Returns 0 if the entity is well formed, -1 in case of args problem and
12708 * the parser error code otherwise
12709 */
12710
12711int
12712xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012713 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012714 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012715 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012716}
12717
12718/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012719 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012720 * @doc: the document the chunk pertains to
12721 * @sax: the SAX handler bloc (possibly NULL)
12722 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12723 * @depth: Used for loop detection, use 0
12724 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012725 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012726 *
12727 * Parse a well-balanced chunk of an XML document
12728 * called by the parser
12729 * The allowed sequence for the Well Balanced Chunk is the one defined by
12730 * the content production in the XML grammar:
12731 *
12732 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12733 *
12734 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12735 * the parser error code otherwise
12736 */
12737
12738int
12739xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012740 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012741 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12742 depth, string, lst, 0 );
12743}
Daniel Veillard81273902003-09-30 00:43:48 +000012744#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012745
12746/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012747 * xmlParseBalancedChunkMemoryInternal:
12748 * @oldctxt: the existing parsing context
12749 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12750 * @user_data: the user data field for the parser context
12751 * @lst: the return value for the set of parsed nodes
12752 *
12753 *
12754 * Parse a well-balanced chunk of an XML document
12755 * called by the parser
12756 * The allowed sequence for the Well Balanced Chunk is the one defined by
12757 * the content production in the XML grammar:
12758 *
12759 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12760 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012761 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12762 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012763 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012764 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012765 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012766 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012767static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012768xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12769 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12770 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012771 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012772 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012773 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012774 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012775 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012776 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012777 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012778#ifdef SAX2
12779 int i;
12780#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012781
Daniel Veillard0161e632008-08-28 15:36:32 +000012782 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12783 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012784 return(XML_ERR_ENTITY_LOOP);
12785 }
12786
12787
12788 if (lst != NULL)
12789 *lst = NULL;
12790 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012791 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012792
12793 size = xmlStrlen(string);
12794
12795 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012796 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012797 if (user_data != NULL)
12798 ctxt->userData = user_data;
12799 else
12800 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012801 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12802 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012803 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12804 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12805 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012806
Daniel Veillard74eaec12009-08-26 15:57:20 +020012807#ifdef SAX2
12808 /* propagate namespaces down the entity */
12809 for (i = 0;i < oldctxt->nsNr;i += 2) {
12810 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12811 }
12812#endif
12813
Daniel Veillard328f48c2002-11-15 15:24:34 +000012814 oldsax = ctxt->sax;
12815 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012816 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012817 ctxt->replaceEntities = oldctxt->replaceEntities;
12818 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012819
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012820 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012821 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012822 newDoc = xmlNewDoc(BAD_CAST "1.0");
12823 if (newDoc == NULL) {
12824 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012825 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012826 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012827 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012828 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012829 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012830 newDoc->dict = ctxt->dict;
12831 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012832 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012833 } else {
12834 ctxt->myDoc = oldctxt->myDoc;
12835 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012836 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012837 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012838 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12839 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012840 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012841 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012842 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012843 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012844 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012845 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012846 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012847 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012848 ctxt->myDoc->children = NULL;
12849 ctxt->myDoc->last = NULL;
12850 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012851 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012852 ctxt->instate = XML_PARSER_CONTENT;
12853 ctxt->depth = oldctxt->depth + 1;
12854
Daniel Veillard328f48c2002-11-15 15:24:34 +000012855 ctxt->validate = 0;
12856 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012857 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12858 /*
12859 * ID/IDREF registration will be done in xmlValidateElement below
12860 */
12861 ctxt->loadsubset |= XML_SKIP_IDS;
12862 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012863 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012864 ctxt->attsDefault = oldctxt->attsDefault;
12865 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012866
Daniel Veillard68e9e742002-11-16 15:35:11 +000012867 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012868 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012869 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012870 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012871 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012872 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012873 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012874 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012875 }
12876
12877 if (!ctxt->wellFormed) {
12878 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012879 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012880 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012881 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012882 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012883 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012884 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012885
William M. Brack7b9154b2003-09-27 19:23:50 +000012886 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012887 xmlNodePtr cur;
12888
12889 /*
12890 * Return the newly created nodeset after unlinking it from
12891 * they pseudo parent.
12892 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012893 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012894 *lst = cur;
12895 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012896#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012897 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12898 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12899 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012900 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12901 oldctxt->myDoc, cur);
12902 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012903#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012904 cur->parent = NULL;
12905 cur = cur->next;
12906 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012907 ctxt->myDoc->children->children = NULL;
12908 }
12909 if (ctxt->myDoc != NULL) {
12910 xmlFreeNode(ctxt->myDoc->children);
12911 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012912 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012913 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012914
12915 /*
12916 * Record in the parent context the number of entities replacement
12917 * done when parsing that reference.
12918 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020012919 if (oldctxt != NULL)
12920 oldctxt->nbentities += ctxt->nbentities;
12921
Daniel Veillard0161e632008-08-28 15:36:32 +000012922 /*
12923 * Also record the last error if any
12924 */
12925 if (ctxt->lastError.code != XML_ERR_OK)
12926 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12927
Daniel Veillard328f48c2002-11-15 15:24:34 +000012928 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012929 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012930 ctxt->attsDefault = NULL;
12931 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012932 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012933 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012934 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012935 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012936
Daniel Veillard328f48c2002-11-15 15:24:34 +000012937 return(ret);
12938}
12939
Daniel Veillard29b17482004-08-16 00:39:03 +000012940/**
12941 * xmlParseInNodeContext:
12942 * @node: the context node
12943 * @data: the input string
12944 * @datalen: the input string length in bytes
12945 * @options: a combination of xmlParserOption
12946 * @lst: the return value for the set of parsed nodes
12947 *
12948 * Parse a well-balanced chunk of an XML document
12949 * within the context (DTD, namespaces, etc ...) of the given node.
12950 *
12951 * The allowed sequence for the data is a Well Balanced Chunk defined by
12952 * the content production in the XML grammar:
12953 *
12954 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12955 *
12956 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12957 * error code otherwise
12958 */
12959xmlParserErrors
12960xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12961 int options, xmlNodePtr *lst) {
12962#ifdef SAX2
12963 xmlParserCtxtPtr ctxt;
12964 xmlDocPtr doc = NULL;
12965 xmlNodePtr fake, cur;
12966 int nsnr = 0;
12967
12968 xmlParserErrors ret = XML_ERR_OK;
12969
12970 /*
12971 * check all input parameters, grab the document
12972 */
12973 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12974 return(XML_ERR_INTERNAL_ERROR);
12975 switch (node->type) {
12976 case XML_ELEMENT_NODE:
12977 case XML_ATTRIBUTE_NODE:
12978 case XML_TEXT_NODE:
12979 case XML_CDATA_SECTION_NODE:
12980 case XML_ENTITY_REF_NODE:
12981 case XML_PI_NODE:
12982 case XML_COMMENT_NODE:
12983 case XML_DOCUMENT_NODE:
12984 case XML_HTML_DOCUMENT_NODE:
12985 break;
12986 default:
12987 return(XML_ERR_INTERNAL_ERROR);
12988
12989 }
12990 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12991 (node->type != XML_DOCUMENT_NODE) &&
12992 (node->type != XML_HTML_DOCUMENT_NODE))
12993 node = node->parent;
12994 if (node == NULL)
12995 return(XML_ERR_INTERNAL_ERROR);
12996 if (node->type == XML_ELEMENT_NODE)
12997 doc = node->doc;
12998 else
12999 doc = (xmlDocPtr) node;
13000 if (doc == NULL)
13001 return(XML_ERR_INTERNAL_ERROR);
13002
13003 /*
13004 * allocate a context and set-up everything not related to the
13005 * node position in the tree
13006 */
13007 if (doc->type == XML_DOCUMENT_NODE)
13008 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13009#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013010 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013011 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013012 /*
13013 * When parsing in context, it makes no sense to add implied
13014 * elements like html/body/etc...
13015 */
13016 options |= HTML_PARSE_NOIMPLIED;
13017 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013018#endif
13019 else
13020 return(XML_ERR_INTERNAL_ERROR);
13021
13022 if (ctxt == NULL)
13023 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013024
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013025 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013026 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13027 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13028 * we must wait until the last moment to free the original one.
13029 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013030 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013031 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013032 xmlDictFree(ctxt->dict);
13033 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013034 } else
13035 options |= XML_PARSE_NODICT;
13036
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013037 if (doc->encoding != NULL) {
13038 xmlCharEncodingHandlerPtr hdlr;
13039
13040 if (ctxt->encoding != NULL)
13041 xmlFree((xmlChar *) ctxt->encoding);
13042 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13043
13044 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13045 if (hdlr != NULL) {
13046 xmlSwitchToEncoding(ctxt, hdlr);
13047 } else {
13048 return(XML_ERR_UNSUPPORTED_ENCODING);
13049 }
13050 }
13051
Daniel Veillard37334572008-07-31 08:20:02 +000013052 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013053 xmlDetectSAX2(ctxt);
13054 ctxt->myDoc = doc;
13055
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013056 fake = xmlNewComment(NULL);
13057 if (fake == NULL) {
13058 xmlFreeParserCtxt(ctxt);
13059 return(XML_ERR_NO_MEMORY);
13060 }
13061 xmlAddChild(node, fake);
13062
Daniel Veillard29b17482004-08-16 00:39:03 +000013063 if (node->type == XML_ELEMENT_NODE) {
13064 nodePush(ctxt, node);
13065 /*
13066 * initialize the SAX2 namespaces stack
13067 */
13068 cur = node;
13069 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13070 xmlNsPtr ns = cur->nsDef;
13071 const xmlChar *iprefix, *ihref;
13072
13073 while (ns != NULL) {
13074 if (ctxt->dict) {
13075 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13076 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13077 } else {
13078 iprefix = ns->prefix;
13079 ihref = ns->href;
13080 }
13081
13082 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13083 nsPush(ctxt, iprefix, ihref);
13084 nsnr++;
13085 }
13086 ns = ns->next;
13087 }
13088 cur = cur->parent;
13089 }
13090 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013091 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013092
13093 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13094 /*
13095 * ID/IDREF registration will be done in xmlValidateElement below
13096 */
13097 ctxt->loadsubset |= XML_SKIP_IDS;
13098 }
13099
Daniel Veillard499cc922006-01-18 17:22:35 +000013100#ifdef LIBXML_HTML_ENABLED
13101 if (doc->type == XML_HTML_DOCUMENT_NODE)
13102 __htmlParseContent(ctxt);
13103 else
13104#endif
13105 xmlParseContent(ctxt);
13106
Daniel Veillard29b17482004-08-16 00:39:03 +000013107 nsPop(ctxt, nsnr);
13108 if ((RAW == '<') && (NXT(1) == '/')) {
13109 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13110 } else if (RAW != 0) {
13111 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13112 }
13113 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13114 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13115 ctxt->wellFormed = 0;
13116 }
13117
13118 if (!ctxt->wellFormed) {
13119 if (ctxt->errNo == 0)
13120 ret = XML_ERR_INTERNAL_ERROR;
13121 else
13122 ret = (xmlParserErrors)ctxt->errNo;
13123 } else {
13124 ret = XML_ERR_OK;
13125 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013126
Daniel Veillard29b17482004-08-16 00:39:03 +000013127 /*
13128 * Return the newly created nodeset after unlinking it from
13129 * the pseudo sibling.
13130 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013131
Daniel Veillard29b17482004-08-16 00:39:03 +000013132 cur = fake->next;
13133 fake->next = NULL;
13134 node->last = fake;
13135
13136 if (cur != NULL) {
13137 cur->prev = NULL;
13138 }
13139
13140 *lst = cur;
13141
13142 while (cur != NULL) {
13143 cur->parent = NULL;
13144 cur = cur->next;
13145 }
13146
13147 xmlUnlinkNode(fake);
13148 xmlFreeNode(fake);
13149
13150
13151 if (ret != XML_ERR_OK) {
13152 xmlFreeNodeList(*lst);
13153 *lst = NULL;
13154 }
William M. Brackc3f81342004-10-03 01:22:44 +000013155
William M. Brackb7b54de2004-10-06 16:38:01 +000013156 if (doc->dict != NULL)
13157 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013158 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013159
Daniel Veillard29b17482004-08-16 00:39:03 +000013160 return(ret);
13161#else /* !SAX2 */
13162 return(XML_ERR_INTERNAL_ERROR);
13163#endif
13164}
13165
Daniel Veillard81273902003-09-30 00:43:48 +000013166#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013167/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013168 * xmlParseBalancedChunkMemoryRecover:
13169 * @doc: the document the chunk pertains to
13170 * @sax: the SAX handler bloc (possibly NULL)
13171 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13172 * @depth: Used for loop detection, use 0
13173 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13174 * @lst: the return value for the set of parsed nodes
13175 * @recover: return nodes even if the data is broken (use 0)
13176 *
13177 *
13178 * Parse a well-balanced chunk of an XML document
13179 * called by the parser
13180 * The allowed sequence for the Well Balanced Chunk is the one defined by
13181 * the content production in the XML grammar:
13182 *
13183 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13184 *
13185 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13186 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013187 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013188 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013189 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13190 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013191 */
13192int
13193xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013194 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013195 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013196 xmlParserCtxtPtr ctxt;
13197 xmlDocPtr newDoc;
13198 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013199 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013200 int size;
13201 int ret = 0;
13202
Daniel Veillard0161e632008-08-28 15:36:32 +000013203 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013204 return(XML_ERR_ENTITY_LOOP);
13205 }
13206
13207
Daniel Veillardcda96922001-08-21 10:56:31 +000013208 if (lst != NULL)
13209 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013210 if (string == NULL)
13211 return(-1);
13212
13213 size = xmlStrlen(string);
13214
13215 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13216 if (ctxt == NULL) return(-1);
13217 ctxt->userData = ctxt;
13218 if (sax != NULL) {
13219 oldsax = ctxt->sax;
13220 ctxt->sax = sax;
13221 if (user_data != NULL)
13222 ctxt->userData = user_data;
13223 }
13224 newDoc = xmlNewDoc(BAD_CAST "1.0");
13225 if (newDoc == NULL) {
13226 xmlFreeParserCtxt(ctxt);
13227 return(-1);
13228 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013229 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013230 if ((doc != NULL) && (doc->dict != NULL)) {
13231 xmlDictFree(ctxt->dict);
13232 ctxt->dict = doc->dict;
13233 xmlDictReference(ctxt->dict);
13234 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13235 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13236 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13237 ctxt->dictNames = 1;
13238 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013239 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013240 }
Owen Taylor3473f882001-02-23 17:55:21 +000013241 if (doc != NULL) {
13242 newDoc->intSubset = doc->intSubset;
13243 newDoc->extSubset = doc->extSubset;
13244 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013245 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13246 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013247 if (sax != NULL)
13248 ctxt->sax = oldsax;
13249 xmlFreeParserCtxt(ctxt);
13250 newDoc->intSubset = NULL;
13251 newDoc->extSubset = NULL;
13252 xmlFreeDoc(newDoc);
13253 return(-1);
13254 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013255 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13256 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013257 if (doc == NULL) {
13258 ctxt->myDoc = newDoc;
13259 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013260 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013261 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013262 /* Ensure that doc has XML spec namespace */
13263 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13264 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013265 }
13266 ctxt->instate = XML_PARSER_CONTENT;
13267 ctxt->depth = depth;
13268
13269 /*
13270 * Doing validity checking on chunk doesn't make sense
13271 */
13272 ctxt->validate = 0;
13273 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013274 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013275
Daniel Veillardb39bc392002-10-26 19:29:51 +000013276 if ( doc != NULL ){
13277 content = doc->children;
13278 doc->children = NULL;
13279 xmlParseContent(ctxt);
13280 doc->children = content;
13281 }
13282 else {
13283 xmlParseContent(ctxt);
13284 }
Owen Taylor3473f882001-02-23 17:55:21 +000013285 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013286 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013287 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013288 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013289 }
13290 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013291 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013292 }
13293
13294 if (!ctxt->wellFormed) {
13295 if (ctxt->errNo == 0)
13296 ret = 1;
13297 else
13298 ret = ctxt->errNo;
13299 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013300 ret = 0;
13301 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013302
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013303 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13304 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013305
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013306 /*
13307 * Return the newly created nodeset after unlinking it from
13308 * they pseudo parent.
13309 */
13310 cur = newDoc->children->children;
13311 *lst = cur;
13312 while (cur != NULL) {
13313 xmlSetTreeDoc(cur, doc);
13314 cur->parent = NULL;
13315 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013316 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013317 newDoc->children->children = NULL;
13318 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013319
13320 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013321 ctxt->sax = oldsax;
13322 xmlFreeParserCtxt(ctxt);
13323 newDoc->intSubset = NULL;
13324 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013325 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013326 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013327
Owen Taylor3473f882001-02-23 17:55:21 +000013328 return(ret);
13329}
13330
13331/**
13332 * xmlSAXParseEntity:
13333 * @sax: the SAX handler block
13334 * @filename: the filename
13335 *
13336 * parse an XML external entity out of context and build a tree.
13337 * It use the given SAX function block to handle the parsing callback.
13338 * If sax is NULL, fallback to the default DOM tree building routines.
13339 *
13340 * [78] extParsedEnt ::= TextDecl? content
13341 *
13342 * This correspond to a "Well Balanced" chunk
13343 *
13344 * Returns the resulting document tree
13345 */
13346
13347xmlDocPtr
13348xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13349 xmlDocPtr ret;
13350 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013351
13352 ctxt = xmlCreateFileParserCtxt(filename);
13353 if (ctxt == NULL) {
13354 return(NULL);
13355 }
13356 if (sax != NULL) {
13357 if (ctxt->sax != NULL)
13358 xmlFree(ctxt->sax);
13359 ctxt->sax = sax;
13360 ctxt->userData = NULL;
13361 }
13362
Owen Taylor3473f882001-02-23 17:55:21 +000013363 xmlParseExtParsedEnt(ctxt);
13364
13365 if (ctxt->wellFormed)
13366 ret = ctxt->myDoc;
13367 else {
13368 ret = NULL;
13369 xmlFreeDoc(ctxt->myDoc);
13370 ctxt->myDoc = NULL;
13371 }
13372 if (sax != NULL)
13373 ctxt->sax = NULL;
13374 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013375
Owen Taylor3473f882001-02-23 17:55:21 +000013376 return(ret);
13377}
13378
13379/**
13380 * xmlParseEntity:
13381 * @filename: the filename
13382 *
13383 * parse an XML external entity out of context and build a tree.
13384 *
13385 * [78] extParsedEnt ::= TextDecl? content
13386 *
13387 * This correspond to a "Well Balanced" chunk
13388 *
13389 * Returns the resulting document tree
13390 */
13391
13392xmlDocPtr
13393xmlParseEntity(const char *filename) {
13394 return(xmlSAXParseEntity(NULL, filename));
13395}
Daniel Veillard81273902003-09-30 00:43:48 +000013396#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013397
13398/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013399 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013400 * @URL: the entity URL
13401 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013402 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013403 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013404 *
13405 * Create a parser context for an external entity
13406 * Automatic support for ZLIB/Compress compressed document is provided
13407 * by default if found at compile-time.
13408 *
13409 * Returns the new parser context or NULL
13410 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013411static xmlParserCtxtPtr
13412xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13413 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013414 xmlParserCtxtPtr ctxt;
13415 xmlParserInputPtr inputStream;
13416 char *directory = NULL;
13417 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013418
Owen Taylor3473f882001-02-23 17:55:21 +000013419 ctxt = xmlNewParserCtxt();
13420 if (ctxt == NULL) {
13421 return(NULL);
13422 }
13423
Daniel Veillard48247b42009-07-10 16:12:46 +020013424 if (pctx != NULL) {
13425 ctxt->options = pctx->options;
13426 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013427 }
13428
Owen Taylor3473f882001-02-23 17:55:21 +000013429 uri = xmlBuildURI(URL, base);
13430
13431 if (uri == NULL) {
13432 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13433 if (inputStream == NULL) {
13434 xmlFreeParserCtxt(ctxt);
13435 return(NULL);
13436 }
13437
13438 inputPush(ctxt, inputStream);
13439
13440 if ((ctxt->directory == NULL) && (directory == NULL))
13441 directory = xmlParserGetDirectory((char *)URL);
13442 if ((ctxt->directory == NULL) && (directory != NULL))
13443 ctxt->directory = directory;
13444 } else {
13445 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13446 if (inputStream == NULL) {
13447 xmlFree(uri);
13448 xmlFreeParserCtxt(ctxt);
13449 return(NULL);
13450 }
13451
13452 inputPush(ctxt, inputStream);
13453
13454 if ((ctxt->directory == NULL) && (directory == NULL))
13455 directory = xmlParserGetDirectory((char *)uri);
13456 if ((ctxt->directory == NULL) && (directory != NULL))
13457 ctxt->directory = directory;
13458 xmlFree(uri);
13459 }
Owen Taylor3473f882001-02-23 17:55:21 +000013460 return(ctxt);
13461}
13462
Rob Richards9c0aa472009-03-26 18:10:19 +000013463/**
13464 * xmlCreateEntityParserCtxt:
13465 * @URL: the entity URL
13466 * @ID: the entity PUBLIC ID
13467 * @base: a possible base for the target URI
13468 *
13469 * Create a parser context for an external entity
13470 * Automatic support for ZLIB/Compress compressed document is provided
13471 * by default if found at compile-time.
13472 *
13473 * Returns the new parser context or NULL
13474 */
13475xmlParserCtxtPtr
13476xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13477 const xmlChar *base) {
13478 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13479
13480}
13481
Owen Taylor3473f882001-02-23 17:55:21 +000013482/************************************************************************
13483 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013484 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013485 * *
13486 ************************************************************************/
13487
13488/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013489 * xmlCreateURLParserCtxt:
13490 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013491 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013492 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013493 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013494 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013495 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013496 *
13497 * Returns the new parser context or NULL
13498 */
13499xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013500xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013501{
13502 xmlParserCtxtPtr ctxt;
13503 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013504 char *directory = NULL;
13505
Owen Taylor3473f882001-02-23 17:55:21 +000013506 ctxt = xmlNewParserCtxt();
13507 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013508 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013509 return(NULL);
13510 }
13511
Daniel Veillarddf292f72005-01-16 19:00:15 +000013512 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013513 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013514 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013515
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013516 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013517 if (inputStream == NULL) {
13518 xmlFreeParserCtxt(ctxt);
13519 return(NULL);
13520 }
13521
Owen Taylor3473f882001-02-23 17:55:21 +000013522 inputPush(ctxt, inputStream);
13523 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013524 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013525 if ((ctxt->directory == NULL) && (directory != NULL))
13526 ctxt->directory = directory;
13527
13528 return(ctxt);
13529}
13530
Daniel Veillard61b93382003-11-03 14:28:31 +000013531/**
13532 * xmlCreateFileParserCtxt:
13533 * @filename: the filename
13534 *
13535 * Create a parser context for a file content.
13536 * Automatic support for ZLIB/Compress compressed document is provided
13537 * by default if found at compile-time.
13538 *
13539 * Returns the new parser context or NULL
13540 */
13541xmlParserCtxtPtr
13542xmlCreateFileParserCtxt(const char *filename)
13543{
13544 return(xmlCreateURLParserCtxt(filename, 0));
13545}
13546
Daniel Veillard81273902003-09-30 00:43:48 +000013547#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013548/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013549 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013550 * @sax: the SAX handler block
13551 * @filename: the filename
13552 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13553 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013554 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013555 *
13556 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13557 * compressed document is provided by default if found at compile-time.
13558 * It use the given SAX function block to handle the parsing callback.
13559 * If sax is NULL, fallback to the default DOM tree building routines.
13560 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013561 * User data (void *) is stored within the parser context in the
13562 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013563 *
Owen Taylor3473f882001-02-23 17:55:21 +000013564 * Returns the resulting document tree
13565 */
13566
13567xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013568xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13569 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013570 xmlDocPtr ret;
13571 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013572
Daniel Veillard635ef722001-10-29 11:48:19 +000013573 xmlInitParser();
13574
Owen Taylor3473f882001-02-23 17:55:21 +000013575 ctxt = xmlCreateFileParserCtxt(filename);
13576 if (ctxt == NULL) {
13577 return(NULL);
13578 }
13579 if (sax != NULL) {
13580 if (ctxt->sax != NULL)
13581 xmlFree(ctxt->sax);
13582 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013583 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013584 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013585 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013586 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013587 }
Owen Taylor3473f882001-02-23 17:55:21 +000013588
Daniel Veillard37d2d162008-03-14 10:54:00 +000013589 if (ctxt->directory == NULL)
13590 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013591
Daniel Veillarddad3f682002-11-17 16:47:27 +000013592 ctxt->recovery = recovery;
13593
Owen Taylor3473f882001-02-23 17:55:21 +000013594 xmlParseDocument(ctxt);
13595
William M. Brackc07329e2003-09-08 01:57:30 +000013596 if ((ctxt->wellFormed) || recovery) {
13597 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013598 if (ret != NULL) {
13599 if (ctxt->input->buf->compressed > 0)
13600 ret->compression = 9;
13601 else
13602 ret->compression = ctxt->input->buf->compressed;
13603 }
William M. Brackc07329e2003-09-08 01:57:30 +000013604 }
Owen Taylor3473f882001-02-23 17:55:21 +000013605 else {
13606 ret = NULL;
13607 xmlFreeDoc(ctxt->myDoc);
13608 ctxt->myDoc = NULL;
13609 }
13610 if (sax != NULL)
13611 ctxt->sax = NULL;
13612 xmlFreeParserCtxt(ctxt);
13613
13614 return(ret);
13615}
13616
13617/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013618 * xmlSAXParseFile:
13619 * @sax: the SAX handler block
13620 * @filename: the filename
13621 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13622 * documents
13623 *
13624 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13625 * compressed document is provided by default if found at compile-time.
13626 * It use the given SAX function block to handle the parsing callback.
13627 * If sax is NULL, fallback to the default DOM tree building routines.
13628 *
13629 * Returns the resulting document tree
13630 */
13631
13632xmlDocPtr
13633xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13634 int recovery) {
13635 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13636}
13637
13638/**
Owen Taylor3473f882001-02-23 17:55:21 +000013639 * xmlRecoverDoc:
13640 * @cur: a pointer to an array of xmlChar
13641 *
13642 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013643 * In the case the document is not Well Formed, a attempt to build a
13644 * tree is tried anyway
13645 *
13646 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013647 */
13648
13649xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013650xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013651 return(xmlSAXParseDoc(NULL, cur, 1));
13652}
13653
13654/**
13655 * xmlParseFile:
13656 * @filename: the filename
13657 *
13658 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13659 * compressed document is provided by default if found at compile-time.
13660 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013661 * Returns the resulting document tree if the file was wellformed,
13662 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013663 */
13664
13665xmlDocPtr
13666xmlParseFile(const char *filename) {
13667 return(xmlSAXParseFile(NULL, filename, 0));
13668}
13669
13670/**
13671 * xmlRecoverFile:
13672 * @filename: the filename
13673 *
13674 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13675 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013676 * In the case the document is not Well Formed, it attempts to build
13677 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013678 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013679 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013680 */
13681
13682xmlDocPtr
13683xmlRecoverFile(const char *filename) {
13684 return(xmlSAXParseFile(NULL, filename, 1));
13685}
13686
13687
13688/**
13689 * xmlSetupParserForBuffer:
13690 * @ctxt: an XML parser context
13691 * @buffer: a xmlChar * buffer
13692 * @filename: a file name
13693 *
13694 * Setup the parser context to parse a new buffer; Clears any prior
13695 * contents from the parser context. The buffer parameter must not be
13696 * NULL, but the filename parameter can be
13697 */
13698void
13699xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13700 const char* filename)
13701{
13702 xmlParserInputPtr input;
13703
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013704 if ((ctxt == NULL) || (buffer == NULL))
13705 return;
13706
Owen Taylor3473f882001-02-23 17:55:21 +000013707 input = xmlNewInputStream(ctxt);
13708 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013709 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013710 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013711 return;
13712 }
13713
13714 xmlClearParserCtxt(ctxt);
13715 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013716 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013717 input->base = buffer;
13718 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013719 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013720 inputPush(ctxt, input);
13721}
13722
13723/**
13724 * xmlSAXUserParseFile:
13725 * @sax: a SAX handler
13726 * @user_data: The user data returned on SAX callbacks
13727 * @filename: a file name
13728 *
13729 * parse an XML file and call the given SAX handler routines.
13730 * Automatic support for ZLIB/Compress compressed document is provided
13731 *
13732 * Returns 0 in case of success or a error number otherwise
13733 */
13734int
13735xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13736 const char *filename) {
13737 int ret = 0;
13738 xmlParserCtxtPtr ctxt;
13739
13740 ctxt = xmlCreateFileParserCtxt(filename);
13741 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013742 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013743 xmlFree(ctxt->sax);
13744 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013745 xmlDetectSAX2(ctxt);
13746
Owen Taylor3473f882001-02-23 17:55:21 +000013747 if (user_data != NULL)
13748 ctxt->userData = user_data;
13749
13750 xmlParseDocument(ctxt);
13751
13752 if (ctxt->wellFormed)
13753 ret = 0;
13754 else {
13755 if (ctxt->errNo != 0)
13756 ret = ctxt->errNo;
13757 else
13758 ret = -1;
13759 }
13760 if (sax != NULL)
13761 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013762 if (ctxt->myDoc != NULL) {
13763 xmlFreeDoc(ctxt->myDoc);
13764 ctxt->myDoc = NULL;
13765 }
Owen Taylor3473f882001-02-23 17:55:21 +000013766 xmlFreeParserCtxt(ctxt);
13767
13768 return ret;
13769}
Daniel Veillard81273902003-09-30 00:43:48 +000013770#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013771
13772/************************************************************************
13773 * *
13774 * Front ends when parsing from memory *
13775 * *
13776 ************************************************************************/
13777
13778/**
13779 * xmlCreateMemoryParserCtxt:
13780 * @buffer: a pointer to a char array
13781 * @size: the size of the array
13782 *
13783 * Create a parser context for an XML in-memory document.
13784 *
13785 * Returns the new parser context or NULL
13786 */
13787xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013788xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013789 xmlParserCtxtPtr ctxt;
13790 xmlParserInputPtr input;
13791 xmlParserInputBufferPtr buf;
13792
13793 if (buffer == NULL)
13794 return(NULL);
13795 if (size <= 0)
13796 return(NULL);
13797
13798 ctxt = xmlNewParserCtxt();
13799 if (ctxt == NULL)
13800 return(NULL);
13801
Daniel Veillard53350552003-09-18 13:35:51 +000013802 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013803 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013804 if (buf == NULL) {
13805 xmlFreeParserCtxt(ctxt);
13806 return(NULL);
13807 }
Owen Taylor3473f882001-02-23 17:55:21 +000013808
13809 input = xmlNewInputStream(ctxt);
13810 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013811 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013812 xmlFreeParserCtxt(ctxt);
13813 return(NULL);
13814 }
13815
13816 input->filename = NULL;
13817 input->buf = buf;
13818 input->base = input->buf->buffer->content;
13819 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013820 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013821
13822 inputPush(ctxt, input);
13823 return(ctxt);
13824}
13825
Daniel Veillard81273902003-09-30 00:43:48 +000013826#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013827/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013828 * xmlSAXParseMemoryWithData:
13829 * @sax: the SAX handler block
13830 * @buffer: an pointer to a char array
13831 * @size: the size of the array
13832 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13833 * documents
13834 * @data: the userdata
13835 *
13836 * parse an XML in-memory block and use the given SAX function block
13837 * to handle the parsing callback. If sax is NULL, fallback to the default
13838 * DOM tree building routines.
13839 *
13840 * User data (void *) is stored within the parser context in the
13841 * context's _private member, so it is available nearly everywhere in libxml
13842 *
13843 * Returns the resulting document tree
13844 */
13845
13846xmlDocPtr
13847xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13848 int size, int recovery, void *data) {
13849 xmlDocPtr ret;
13850 xmlParserCtxtPtr ctxt;
13851
Daniel Veillardab2a7632009-07-09 08:45:03 +020013852 xmlInitParser();
13853
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013854 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13855 if (ctxt == NULL) return(NULL);
13856 if (sax != NULL) {
13857 if (ctxt->sax != NULL)
13858 xmlFree(ctxt->sax);
13859 ctxt->sax = sax;
13860 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013861 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013862 if (data!=NULL) {
13863 ctxt->_private=data;
13864 }
13865
Daniel Veillardadba5f12003-04-04 16:09:01 +000013866 ctxt->recovery = recovery;
13867
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013868 xmlParseDocument(ctxt);
13869
13870 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13871 else {
13872 ret = NULL;
13873 xmlFreeDoc(ctxt->myDoc);
13874 ctxt->myDoc = NULL;
13875 }
13876 if (sax != NULL)
13877 ctxt->sax = NULL;
13878 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013879
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013880 return(ret);
13881}
13882
13883/**
Owen Taylor3473f882001-02-23 17:55:21 +000013884 * xmlSAXParseMemory:
13885 * @sax: the SAX handler block
13886 * @buffer: an pointer to a char array
13887 * @size: the size of the array
13888 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13889 * documents
13890 *
13891 * parse an XML in-memory block and use the given SAX function block
13892 * to handle the parsing callback. If sax is NULL, fallback to the default
13893 * DOM tree building routines.
13894 *
13895 * Returns the resulting document tree
13896 */
13897xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013898xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13899 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013900 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013901}
13902
13903/**
13904 * xmlParseMemory:
13905 * @buffer: an pointer to a char array
13906 * @size: the size of the array
13907 *
13908 * parse an XML in-memory block and build a tree.
13909 *
13910 * Returns the resulting document tree
13911 */
13912
Daniel Veillard50822cb2001-07-26 20:05:51 +000013913xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013914 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13915}
13916
13917/**
13918 * xmlRecoverMemory:
13919 * @buffer: an pointer to a char array
13920 * @size: the size of the array
13921 *
13922 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013923 * In the case the document is not Well Formed, an attempt to
13924 * build a tree is tried anyway
13925 *
13926 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013927 */
13928
Daniel Veillard50822cb2001-07-26 20:05:51 +000013929xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013930 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13931}
13932
13933/**
13934 * xmlSAXUserParseMemory:
13935 * @sax: a SAX handler
13936 * @user_data: The user data returned on SAX callbacks
13937 * @buffer: an in-memory XML document input
13938 * @size: the length of the XML document in bytes
13939 *
13940 * A better SAX parsing routine.
13941 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013942 *
Owen Taylor3473f882001-02-23 17:55:21 +000013943 * Returns 0 in case of success or a error number otherwise
13944 */
13945int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013946 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013947 int ret = 0;
13948 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013949
13950 xmlInitParser();
13951
Owen Taylor3473f882001-02-23 17:55:21 +000013952 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13953 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013954 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13955 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013956 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013957 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013958
Daniel Veillard30211a02001-04-26 09:33:18 +000013959 if (user_data != NULL)
13960 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013961
Owen Taylor3473f882001-02-23 17:55:21 +000013962 xmlParseDocument(ctxt);
13963
13964 if (ctxt->wellFormed)
13965 ret = 0;
13966 else {
13967 if (ctxt->errNo != 0)
13968 ret = ctxt->errNo;
13969 else
13970 ret = -1;
13971 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013972 if (sax != NULL)
13973 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013974 if (ctxt->myDoc != NULL) {
13975 xmlFreeDoc(ctxt->myDoc);
13976 ctxt->myDoc = NULL;
13977 }
Owen Taylor3473f882001-02-23 17:55:21 +000013978 xmlFreeParserCtxt(ctxt);
13979
13980 return ret;
13981}
Daniel Veillard81273902003-09-30 00:43:48 +000013982#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013983
13984/**
13985 * xmlCreateDocParserCtxt:
13986 * @cur: a pointer to an array of xmlChar
13987 *
13988 * Creates a parser context for an XML in-memory document.
13989 *
13990 * Returns the new parser context or NULL
13991 */
13992xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013993xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013994 int len;
13995
13996 if (cur == NULL)
13997 return(NULL);
13998 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013999 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014000}
14001
Daniel Veillard81273902003-09-30 00:43:48 +000014002#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014003/**
14004 * xmlSAXParseDoc:
14005 * @sax: the SAX handler block
14006 * @cur: a pointer to an array of xmlChar
14007 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14008 * documents
14009 *
14010 * parse an XML in-memory document and build a tree.
14011 * It use the given SAX function block to handle the parsing callback.
14012 * If sax is NULL, fallback to the default DOM tree building routines.
14013 *
14014 * Returns the resulting document tree
14015 */
14016
14017xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014018xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014019 xmlDocPtr ret;
14020 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014021 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014022
Daniel Veillard38936062004-11-04 17:45:11 +000014023 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014024
14025
14026 ctxt = xmlCreateDocParserCtxt(cur);
14027 if (ctxt == NULL) return(NULL);
14028 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014029 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014030 ctxt->sax = sax;
14031 ctxt->userData = NULL;
14032 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014033 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014034
14035 xmlParseDocument(ctxt);
14036 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14037 else {
14038 ret = NULL;
14039 xmlFreeDoc(ctxt->myDoc);
14040 ctxt->myDoc = NULL;
14041 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014042 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014043 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014044 xmlFreeParserCtxt(ctxt);
14045
14046 return(ret);
14047}
14048
14049/**
14050 * xmlParseDoc:
14051 * @cur: a pointer to an array of xmlChar
14052 *
14053 * parse an XML in-memory document and build a tree.
14054 *
14055 * Returns the resulting document tree
14056 */
14057
14058xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014059xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014060 return(xmlSAXParseDoc(NULL, cur, 0));
14061}
Daniel Veillard81273902003-09-30 00:43:48 +000014062#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014063
Daniel Veillard81273902003-09-30 00:43:48 +000014064#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014065/************************************************************************
14066 * *
14067 * Specific function to keep track of entities references *
14068 * and used by the XSLT debugger *
14069 * *
14070 ************************************************************************/
14071
14072static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14073
14074/**
14075 * xmlAddEntityReference:
14076 * @ent : A valid entity
14077 * @firstNode : A valid first node for children of entity
14078 * @lastNode : A valid last node of children entity
14079 *
14080 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14081 */
14082static void
14083xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14084 xmlNodePtr lastNode)
14085{
14086 if (xmlEntityRefFunc != NULL) {
14087 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14088 }
14089}
14090
14091
14092/**
14093 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014094 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014095 *
14096 * Set the function to call call back when a xml reference has been made
14097 */
14098void
14099xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14100{
14101 xmlEntityRefFunc = func;
14102}
Daniel Veillard81273902003-09-30 00:43:48 +000014103#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014104
14105/************************************************************************
14106 * *
14107 * Miscellaneous *
14108 * *
14109 ************************************************************************/
14110
14111#ifdef LIBXML_XPATH_ENABLED
14112#include <libxml/xpath.h>
14113#endif
14114
Daniel Veillardffa3c742005-07-21 13:24:09 +000014115extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014116static int xmlParserInitialized = 0;
14117
14118/**
14119 * xmlInitParser:
14120 *
14121 * Initialization function for the XML parser.
14122 * This is not reentrant. Call once before processing in case of
14123 * use in multithreaded programs.
14124 */
14125
14126void
14127xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014128 if (xmlParserInitialized != 0)
14129 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014130
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014131#ifdef LIBXML_THREAD_ENABLED
14132 __xmlGlobalInitMutexLock();
14133 if (xmlParserInitialized == 0) {
14134#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014135 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014136 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014137 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14138 (xmlGenericError == NULL))
14139 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014140 xmlInitMemory();
14141 xmlInitCharEncodingHandlers();
14142 xmlDefaultSAXHandlerInit();
14143 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014144#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014145 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014146#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014147#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014148 htmlInitAutoClose();
14149 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014150#endif
14151#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014152 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014153#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014154 xmlParserInitialized = 1;
14155#ifdef LIBXML_THREAD_ENABLED
14156 }
14157 __xmlGlobalInitMutexUnlock();
14158#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014159}
14160
14161/**
14162 * xmlCleanupParser:
14163 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014164 * This function name is somewhat misleading. It does not clean up
14165 * parser state, it cleans up memory allocated by the library itself.
14166 * It is a cleanup function for the XML library. It tries to reclaim all
14167 * related global memory allocated for the library processing.
14168 * It doesn't deallocate any document related memory. One should
14169 * call xmlCleanupParser() only when the process has finished using
14170 * the library and all XML/HTML documents built with it.
14171 * See also xmlInitParser() which has the opposite function of preparing
14172 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014173 *
14174 * WARNING: if your application is multithreaded or has plugin support
14175 * calling this may crash the application if another thread or
14176 * a plugin is still using libxml2. It's sometimes very hard to
14177 * guess if libxml2 is in use in the application, some libraries
14178 * or plugins may use it without notice. In case of doubt abstain
14179 * from calling this function or do it just before calling exit()
14180 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014181 */
14182
14183void
14184xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014185 if (!xmlParserInitialized)
14186 return;
14187
Owen Taylor3473f882001-02-23 17:55:21 +000014188 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014189#ifdef LIBXML_CATALOG_ENABLED
14190 xmlCatalogCleanup();
14191#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014192 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014193 xmlCleanupInputCallbacks();
14194#ifdef LIBXML_OUTPUT_ENABLED
14195 xmlCleanupOutputCallbacks();
14196#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014197#ifdef LIBXML_SCHEMAS_ENABLED
14198 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014199 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014200#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014201 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014202 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014203 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014204 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014205 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014206}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014207
14208/************************************************************************
14209 * *
14210 * New set (2.6.0) of simpler and more flexible APIs *
14211 * *
14212 ************************************************************************/
14213
14214/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014215 * DICT_FREE:
14216 * @str: a string
14217 *
14218 * Free a string if it is not owned by the "dict" dictionnary in the
14219 * current scope
14220 */
14221#define DICT_FREE(str) \
14222 if ((str) && ((!dict) || \
14223 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14224 xmlFree((char *)(str));
14225
14226/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014227 * xmlCtxtReset:
14228 * @ctxt: an XML parser context
14229 *
14230 * Reset a parser context
14231 */
14232void
14233xmlCtxtReset(xmlParserCtxtPtr ctxt)
14234{
14235 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014236 xmlDictPtr dict;
14237
14238 if (ctxt == NULL)
14239 return;
14240
14241 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014242
14243 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14244 xmlFreeInputStream(input);
14245 }
14246 ctxt->inputNr = 0;
14247 ctxt->input = NULL;
14248
14249 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014250 if (ctxt->spaceTab != NULL) {
14251 ctxt->spaceTab[0] = -1;
14252 ctxt->space = &ctxt->spaceTab[0];
14253 } else {
14254 ctxt->space = NULL;
14255 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014256
14257
14258 ctxt->nodeNr = 0;
14259 ctxt->node = NULL;
14260
14261 ctxt->nameNr = 0;
14262 ctxt->name = NULL;
14263
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014264 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014265 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014266 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014267 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014268 DICT_FREE(ctxt->directory);
14269 ctxt->directory = NULL;
14270 DICT_FREE(ctxt->extSubURI);
14271 ctxt->extSubURI = NULL;
14272 DICT_FREE(ctxt->extSubSystem);
14273 ctxt->extSubSystem = NULL;
14274 if (ctxt->myDoc != NULL)
14275 xmlFreeDoc(ctxt->myDoc);
14276 ctxt->myDoc = NULL;
14277
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014278 ctxt->standalone = -1;
14279 ctxt->hasExternalSubset = 0;
14280 ctxt->hasPErefs = 0;
14281 ctxt->html = 0;
14282 ctxt->external = 0;
14283 ctxt->instate = XML_PARSER_START;
14284 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014285
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014286 ctxt->wellFormed = 1;
14287 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014288 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014289 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014290#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014291 ctxt->vctxt.userData = ctxt;
14292 ctxt->vctxt.error = xmlParserValidityError;
14293 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014294#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014295 ctxt->record_info = 0;
14296 ctxt->nbChars = 0;
14297 ctxt->checkIndex = 0;
14298 ctxt->inSubset = 0;
14299 ctxt->errNo = XML_ERR_OK;
14300 ctxt->depth = 0;
14301 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14302 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014303 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014304 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014305 xmlInitNodeInfoSeq(&ctxt->node_seq);
14306
14307 if (ctxt->attsDefault != NULL) {
14308 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14309 ctxt->attsDefault = NULL;
14310 }
14311 if (ctxt->attsSpecial != NULL) {
14312 xmlHashFree(ctxt->attsSpecial, NULL);
14313 ctxt->attsSpecial = NULL;
14314 }
14315
Daniel Veillard4432df22003-09-28 18:58:27 +000014316#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014317 if (ctxt->catalogs != NULL)
14318 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014319#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014320 if (ctxt->lastError.code != XML_ERR_OK)
14321 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014322}
14323
14324/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014325 * xmlCtxtResetPush:
14326 * @ctxt: an XML parser context
14327 * @chunk: a pointer to an array of chars
14328 * @size: number of chars in the array
14329 * @filename: an optional file name or URI
14330 * @encoding: the document encoding, or NULL
14331 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014332 * Reset a push parser context
14333 *
14334 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014335 */
14336int
14337xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14338 int size, const char *filename, const char *encoding)
14339{
14340 xmlParserInputPtr inputStream;
14341 xmlParserInputBufferPtr buf;
14342 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14343
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014344 if (ctxt == NULL)
14345 return(1);
14346
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014347 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14348 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14349
14350 buf = xmlAllocParserInputBuffer(enc);
14351 if (buf == NULL)
14352 return(1);
14353
14354 if (ctxt == NULL) {
14355 xmlFreeParserInputBuffer(buf);
14356 return(1);
14357 }
14358
14359 xmlCtxtReset(ctxt);
14360
14361 if (ctxt->pushTab == NULL) {
14362 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14363 sizeof(xmlChar *));
14364 if (ctxt->pushTab == NULL) {
14365 xmlErrMemory(ctxt, NULL);
14366 xmlFreeParserInputBuffer(buf);
14367 return(1);
14368 }
14369 }
14370
14371 if (filename == NULL) {
14372 ctxt->directory = NULL;
14373 } else {
14374 ctxt->directory = xmlParserGetDirectory(filename);
14375 }
14376
14377 inputStream = xmlNewInputStream(ctxt);
14378 if (inputStream == NULL) {
14379 xmlFreeParserInputBuffer(buf);
14380 return(1);
14381 }
14382
14383 if (filename == NULL)
14384 inputStream->filename = NULL;
14385 else
14386 inputStream->filename = (char *)
14387 xmlCanonicPath((const xmlChar *) filename);
14388 inputStream->buf = buf;
14389 inputStream->base = inputStream->buf->buffer->content;
14390 inputStream->cur = inputStream->buf->buffer->content;
14391 inputStream->end =
14392 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14393
14394 inputPush(ctxt, inputStream);
14395
14396 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14397 (ctxt->input->buf != NULL)) {
14398 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14399 int cur = ctxt->input->cur - ctxt->input->base;
14400
14401 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14402
14403 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14404 ctxt->input->cur = ctxt->input->base + cur;
14405 ctxt->input->end =
14406 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14407 use];
14408#ifdef DEBUG_PUSH
14409 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14410#endif
14411 }
14412
14413 if (encoding != NULL) {
14414 xmlCharEncodingHandlerPtr hdlr;
14415
Daniel Veillard37334572008-07-31 08:20:02 +000014416 if (ctxt->encoding != NULL)
14417 xmlFree((xmlChar *) ctxt->encoding);
14418 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14419
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014420 hdlr = xmlFindCharEncodingHandler(encoding);
14421 if (hdlr != NULL) {
14422 xmlSwitchToEncoding(ctxt, hdlr);
14423 } else {
14424 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14425 "Unsupported encoding %s\n", BAD_CAST encoding);
14426 }
14427 } else if (enc != XML_CHAR_ENCODING_NONE) {
14428 xmlSwitchEncoding(ctxt, enc);
14429 }
14430
14431 return(0);
14432}
14433
Daniel Veillard37334572008-07-31 08:20:02 +000014434
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014435/**
Daniel Veillard37334572008-07-31 08:20:02 +000014436 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014437 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014438 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014439 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014440 *
14441 * Applies the options to the parser context
14442 *
14443 * Returns 0 in case of success, the set of unknown or unimplemented options
14444 * in case of error.
14445 */
Daniel Veillard37334572008-07-31 08:20:02 +000014446static int
14447xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014448{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014449 if (ctxt == NULL)
14450 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014451 if (encoding != NULL) {
14452 if (ctxt->encoding != NULL)
14453 xmlFree((xmlChar *) ctxt->encoding);
14454 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14455 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014456 if (options & XML_PARSE_RECOVER) {
14457 ctxt->recovery = 1;
14458 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014459 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014460 } else
14461 ctxt->recovery = 0;
14462 if (options & XML_PARSE_DTDLOAD) {
14463 ctxt->loadsubset = XML_DETECT_IDS;
14464 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014465 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014466 } else
14467 ctxt->loadsubset = 0;
14468 if (options & XML_PARSE_DTDATTR) {
14469 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14470 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014471 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014472 }
14473 if (options & XML_PARSE_NOENT) {
14474 ctxt->replaceEntities = 1;
14475 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14476 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014477 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014478 } else
14479 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014480 if (options & XML_PARSE_PEDANTIC) {
14481 ctxt->pedantic = 1;
14482 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014483 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014484 } else
14485 ctxt->pedantic = 0;
14486 if (options & XML_PARSE_NOBLANKS) {
14487 ctxt->keepBlanks = 0;
14488 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14489 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014490 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014491 } else
14492 ctxt->keepBlanks = 1;
14493 if (options & XML_PARSE_DTDVALID) {
14494 ctxt->validate = 1;
14495 if (options & XML_PARSE_NOWARNING)
14496 ctxt->vctxt.warning = NULL;
14497 if (options & XML_PARSE_NOERROR)
14498 ctxt->vctxt.error = NULL;
14499 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014500 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014501 } else
14502 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014503 if (options & XML_PARSE_NOWARNING) {
14504 ctxt->sax->warning = NULL;
14505 options -= XML_PARSE_NOWARNING;
14506 }
14507 if (options & XML_PARSE_NOERROR) {
14508 ctxt->sax->error = NULL;
14509 ctxt->sax->fatalError = NULL;
14510 options -= XML_PARSE_NOERROR;
14511 }
Daniel Veillard81273902003-09-30 00:43:48 +000014512#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014513 if (options & XML_PARSE_SAX1) {
14514 ctxt->sax->startElement = xmlSAX2StartElement;
14515 ctxt->sax->endElement = xmlSAX2EndElement;
14516 ctxt->sax->startElementNs = NULL;
14517 ctxt->sax->endElementNs = NULL;
14518 ctxt->sax->initialized = 1;
14519 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014520 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014521 }
Daniel Veillard81273902003-09-30 00:43:48 +000014522#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014523 if (options & XML_PARSE_NODICT) {
14524 ctxt->dictNames = 0;
14525 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014526 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014527 } else {
14528 ctxt->dictNames = 1;
14529 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014530 if (options & XML_PARSE_NOCDATA) {
14531 ctxt->sax->cdataBlock = NULL;
14532 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014533 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014534 }
14535 if (options & XML_PARSE_NSCLEAN) {
14536 ctxt->options |= XML_PARSE_NSCLEAN;
14537 options -= XML_PARSE_NSCLEAN;
14538 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014539 if (options & XML_PARSE_NONET) {
14540 ctxt->options |= XML_PARSE_NONET;
14541 options -= XML_PARSE_NONET;
14542 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014543 if (options & XML_PARSE_COMPACT) {
14544 ctxt->options |= XML_PARSE_COMPACT;
14545 options -= XML_PARSE_COMPACT;
14546 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014547 if (options & XML_PARSE_OLD10) {
14548 ctxt->options |= XML_PARSE_OLD10;
14549 options -= XML_PARSE_OLD10;
14550 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014551 if (options & XML_PARSE_NOBASEFIX) {
14552 ctxt->options |= XML_PARSE_NOBASEFIX;
14553 options -= XML_PARSE_NOBASEFIX;
14554 }
14555 if (options & XML_PARSE_HUGE) {
14556 ctxt->options |= XML_PARSE_HUGE;
14557 options -= XML_PARSE_HUGE;
14558 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014559 if (options & XML_PARSE_OLDSAX) {
14560 ctxt->options |= XML_PARSE_OLDSAX;
14561 options -= XML_PARSE_OLDSAX;
14562 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014563 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014564 return (options);
14565}
14566
14567/**
Daniel Veillard37334572008-07-31 08:20:02 +000014568 * xmlCtxtUseOptions:
14569 * @ctxt: an XML parser context
14570 * @options: a combination of xmlParserOption
14571 *
14572 * Applies the options to the parser context
14573 *
14574 * Returns 0 in case of success, the set of unknown or unimplemented options
14575 * in case of error.
14576 */
14577int
14578xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14579{
14580 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14581}
14582
14583/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014584 * xmlDoRead:
14585 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014586 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014587 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014588 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014589 * @reuse: keep the context for reuse
14590 *
14591 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014592 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014593 * Returns the resulting document tree or NULL
14594 */
14595static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014596xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14597 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014598{
14599 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014600
14601 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014602 if (encoding != NULL) {
14603 xmlCharEncodingHandlerPtr hdlr;
14604
14605 hdlr = xmlFindCharEncodingHandler(encoding);
14606 if (hdlr != NULL)
14607 xmlSwitchToEncoding(ctxt, hdlr);
14608 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014609 if ((URL != NULL) && (ctxt->input != NULL) &&
14610 (ctxt->input->filename == NULL))
14611 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014612 xmlParseDocument(ctxt);
14613 if ((ctxt->wellFormed) || ctxt->recovery)
14614 ret = ctxt->myDoc;
14615 else {
14616 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014617 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014618 xmlFreeDoc(ctxt->myDoc);
14619 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014620 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014621 ctxt->myDoc = NULL;
14622 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014623 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014624 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014625
14626 return (ret);
14627}
14628
14629/**
14630 * xmlReadDoc:
14631 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014632 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014633 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014634 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014635 *
14636 * parse an XML in-memory document and build a tree.
14637 *
14638 * Returns the resulting document tree
14639 */
14640xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014641xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014642{
14643 xmlParserCtxtPtr ctxt;
14644
14645 if (cur == NULL)
14646 return (NULL);
14647
14648 ctxt = xmlCreateDocParserCtxt(cur);
14649 if (ctxt == NULL)
14650 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014651 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014652}
14653
14654/**
14655 * xmlReadFile:
14656 * @filename: a file or URL
14657 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014658 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014659 *
14660 * parse an XML file from the filesystem or the network.
14661 *
14662 * Returns the resulting document tree
14663 */
14664xmlDocPtr
14665xmlReadFile(const char *filename, const char *encoding, int options)
14666{
14667 xmlParserCtxtPtr ctxt;
14668
Daniel Veillard61b93382003-11-03 14:28:31 +000014669 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014670 if (ctxt == NULL)
14671 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014672 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014673}
14674
14675/**
14676 * xmlReadMemory:
14677 * @buffer: a pointer to a char array
14678 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014679 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014680 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014681 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014682 *
14683 * parse an XML in-memory document and build a tree.
14684 *
14685 * Returns the resulting document tree
14686 */
14687xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014688xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014689{
14690 xmlParserCtxtPtr ctxt;
14691
14692 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14693 if (ctxt == NULL)
14694 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014695 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014696}
14697
14698/**
14699 * xmlReadFd:
14700 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014701 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014702 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014703 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014704 *
14705 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014706 * NOTE that the file descriptor will not be closed when the
14707 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014708 *
14709 * Returns the resulting document tree
14710 */
14711xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014712xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014713{
14714 xmlParserCtxtPtr ctxt;
14715 xmlParserInputBufferPtr input;
14716 xmlParserInputPtr stream;
14717
14718 if (fd < 0)
14719 return (NULL);
14720
14721 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14722 if (input == NULL)
14723 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014724 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014725 ctxt = xmlNewParserCtxt();
14726 if (ctxt == NULL) {
14727 xmlFreeParserInputBuffer(input);
14728 return (NULL);
14729 }
14730 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14731 if (stream == NULL) {
14732 xmlFreeParserInputBuffer(input);
14733 xmlFreeParserCtxt(ctxt);
14734 return (NULL);
14735 }
14736 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014737 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014738}
14739
14740/**
14741 * xmlReadIO:
14742 * @ioread: an I/O read function
14743 * @ioclose: an I/O close function
14744 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014745 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014746 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014747 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014748 *
14749 * parse an XML document from I/O functions and source and build a tree.
14750 *
14751 * Returns the resulting document tree
14752 */
14753xmlDocPtr
14754xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014755 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014756{
14757 xmlParserCtxtPtr ctxt;
14758 xmlParserInputBufferPtr input;
14759 xmlParserInputPtr stream;
14760
14761 if (ioread == NULL)
14762 return (NULL);
14763
14764 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14765 XML_CHAR_ENCODING_NONE);
14766 if (input == NULL)
14767 return (NULL);
14768 ctxt = xmlNewParserCtxt();
14769 if (ctxt == NULL) {
14770 xmlFreeParserInputBuffer(input);
14771 return (NULL);
14772 }
14773 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14774 if (stream == NULL) {
14775 xmlFreeParserInputBuffer(input);
14776 xmlFreeParserCtxt(ctxt);
14777 return (NULL);
14778 }
14779 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014780 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014781}
14782
14783/**
14784 * xmlCtxtReadDoc:
14785 * @ctxt: an XML parser context
14786 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014787 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014788 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014789 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014790 *
14791 * parse an XML in-memory document and build a tree.
14792 * This reuses the existing @ctxt parser context
14793 *
14794 * Returns the resulting document tree
14795 */
14796xmlDocPtr
14797xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014798 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014799{
14800 xmlParserInputPtr stream;
14801
14802 if (cur == NULL)
14803 return (NULL);
14804 if (ctxt == NULL)
14805 return (NULL);
14806
14807 xmlCtxtReset(ctxt);
14808
14809 stream = xmlNewStringInputStream(ctxt, cur);
14810 if (stream == NULL) {
14811 return (NULL);
14812 }
14813 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014814 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014815}
14816
14817/**
14818 * xmlCtxtReadFile:
14819 * @ctxt: an XML parser context
14820 * @filename: a file or URL
14821 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014822 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014823 *
14824 * parse an XML file from the filesystem or the network.
14825 * This reuses the existing @ctxt parser context
14826 *
14827 * Returns the resulting document tree
14828 */
14829xmlDocPtr
14830xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14831 const char *encoding, int options)
14832{
14833 xmlParserInputPtr stream;
14834
14835 if (filename == NULL)
14836 return (NULL);
14837 if (ctxt == NULL)
14838 return (NULL);
14839
14840 xmlCtxtReset(ctxt);
14841
Daniel Veillard29614c72004-11-26 10:47:26 +000014842 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014843 if (stream == NULL) {
14844 return (NULL);
14845 }
14846 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014847 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014848}
14849
14850/**
14851 * xmlCtxtReadMemory:
14852 * @ctxt: an XML parser context
14853 * @buffer: a pointer to a char array
14854 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014855 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014856 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014857 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014858 *
14859 * parse an XML in-memory document and build a tree.
14860 * This reuses the existing @ctxt parser context
14861 *
14862 * Returns the resulting document tree
14863 */
14864xmlDocPtr
14865xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014866 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014867{
14868 xmlParserInputBufferPtr input;
14869 xmlParserInputPtr stream;
14870
14871 if (ctxt == NULL)
14872 return (NULL);
14873 if (buffer == NULL)
14874 return (NULL);
14875
14876 xmlCtxtReset(ctxt);
14877
14878 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14879 if (input == NULL) {
14880 return(NULL);
14881 }
14882
14883 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14884 if (stream == NULL) {
14885 xmlFreeParserInputBuffer(input);
14886 return(NULL);
14887 }
14888
14889 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014890 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014891}
14892
14893/**
14894 * xmlCtxtReadFd:
14895 * @ctxt: an XML parser context
14896 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014897 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014898 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014899 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014900 *
14901 * parse an XML from a file descriptor and build a tree.
14902 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014903 * NOTE that the file descriptor will not be closed when the
14904 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014905 *
14906 * Returns the resulting document tree
14907 */
14908xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014909xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14910 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014911{
14912 xmlParserInputBufferPtr input;
14913 xmlParserInputPtr stream;
14914
14915 if (fd < 0)
14916 return (NULL);
14917 if (ctxt == NULL)
14918 return (NULL);
14919
14920 xmlCtxtReset(ctxt);
14921
14922
14923 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14924 if (input == NULL)
14925 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014926 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014927 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14928 if (stream == NULL) {
14929 xmlFreeParserInputBuffer(input);
14930 return (NULL);
14931 }
14932 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014933 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014934}
14935
14936/**
14937 * xmlCtxtReadIO:
14938 * @ctxt: an XML parser context
14939 * @ioread: an I/O read function
14940 * @ioclose: an I/O close function
14941 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014942 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014943 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014944 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014945 *
14946 * parse an XML document from I/O functions and source and build a tree.
14947 * This reuses the existing @ctxt parser context
14948 *
14949 * Returns the resulting document tree
14950 */
14951xmlDocPtr
14952xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14953 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014954 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014955 const char *encoding, int options)
14956{
14957 xmlParserInputBufferPtr input;
14958 xmlParserInputPtr stream;
14959
14960 if (ioread == NULL)
14961 return (NULL);
14962 if (ctxt == NULL)
14963 return (NULL);
14964
14965 xmlCtxtReset(ctxt);
14966
14967 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14968 XML_CHAR_ENCODING_NONE);
14969 if (input == NULL)
14970 return (NULL);
14971 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14972 if (stream == NULL) {
14973 xmlFreeParserInputBuffer(input);
14974 return (NULL);
14975 }
14976 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014977 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014978}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014979
14980#define bottom_parser
14981#include "elfgcchack.h"