blob: 5f8411e1d43b06cd14c9ce7f8368bad098b5f048 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
Rob Richards9c0aa472009-03-26 18:10:19 +000086static xmlParserCtxtPtr
87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090/************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96#define XML_PARSER_BIG_ENTITY 1000
97#define XML_PARSER_LOT_ENTITY 5000
98
99/*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105#define XML_PARSER_NON_LINEAR 10
106
107/*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116static int
117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119{
Daniel Veillardcba68392008-08-29 12:43:40 +0000120 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175}
176
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000177/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000178 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000179 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000185unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000186
Daniel Veillard0fb18932003-09-07 09:14:37 +0000187
Daniel Veillard0161e632008-08-28 15:36:32 +0000188
189#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000190#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000191#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
Owen Taylor3473f882001-02-23 17:55:21 +0000194/*
Owen Taylor3473f882001-02-23 17:55:21 +0000195 * List of XML prefixed PI allowed by W3C specs
196 */
197
Daniel Veillardb44025c2001-10-11 22:55:55 +0000198static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000199 "xml-stylesheet",
200 NULL
201};
202
Daniel Veillarda07050d2003-10-19 14:46:32 +0000203
Owen Taylor3473f882001-02-23 17:55:21 +0000204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200205static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000207
Daniel Veillard7d515752003-09-26 19:12:37 +0000208static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000211 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000212 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000213
Daniel Veillard37334572008-07-31 08:20:02 +0000214static int
215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000218static void
219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000222
Daniel Veillard7d515752003-09-26 19:12:37 +0000223static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000227static int
228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230/************************************************************************
231 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244static void
245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247{
Daniel Veillard157fee02003-10-31 10:36:03 +0000248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200253
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000254 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000255 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200256 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 (const char *) localname, NULL, NULL, 0, 0,
258 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000259 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 (const char *) prefix, (const char *) localname,
263 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
264 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000265 if (ctxt != NULL) {
266 ctxt->wellFormed = 0;
267 if (ctxt->recovery == 0)
268 ctxt->disableSAX = 1;
269 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000270}
271
272/**
273 * xmlFatalErr:
274 * @ctxt: an XML parser context
275 * @error: the error number
276 * @extra: extra information string
277 *
278 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
279 */
280static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000282{
283 const char *errmsg;
284
Daniel Veillard157fee02003-10-31 10:36:03 +0000285 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
286 (ctxt->instate == XML_PARSER_EOF))
287 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000288 switch (error) {
289 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "CharRef: invalid hexadecimal value\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg = "CharRef: invalid decimal value\n";
294 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000295 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000296 errmsg = "CharRef: invalid value\n";
297 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000298 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000299 errmsg = "internal error";
300 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000301 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000302 errmsg = "PEReference at end of document\n";
303 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000304 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000305 errmsg = "PEReference in prolog\n";
306 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000307 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000308 errmsg = "PEReference in epilog\n";
309 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000310 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000311 errmsg = "PEReference: no name\n";
312 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000313 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000314 errmsg = "PEReference: expecting ';'\n";
315 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000316 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000317 errmsg = "Detected an entity reference loop\n";
318 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000319 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000320 errmsg = "EntityValue: \" or ' expected\n";
321 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000322 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000323 errmsg = "PEReferences forbidden in internal subset\n";
324 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000325 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 errmsg = "EntityValue: \" or ' expected\n";
327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000329 errmsg = "AttValue: \" or ' expected\n";
330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 errmsg = "Unescaped '<' not allowed in attributes values\n";
333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "SystemLiteral \" or ' expected\n";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000338 errmsg = "Unfinished System or Public ID \" or ' expected\n";
339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000341 errmsg = "Sequence ']]>' not allowed in content\n";
342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000344 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000347 errmsg = "PUBLIC, the Public Identifier is missing\n";
348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350 errmsg = "Comment must not contain '--' (double-hyphen)\n";
351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000353 errmsg = "xmlParsePI : no target name\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 errmsg = "Invalid PI name\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 errmsg = "NOTATION: Name expected here\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 errmsg = "'>' required to close NOTATION declaration\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 errmsg = "Entity value required\n";
366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 errmsg = "Fragment not allowed";
369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 errmsg = "'(' required to start ATTLIST enumeration\n";
372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 errmsg = "NmToken expected in ATTLIST enumeration\n";
375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377 errmsg = "')' required to finish ATTLIST enumeration\n";
378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000383 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386 errmsg = "ContentDecl : Name or '(' expected\n";
387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000392 errmsg =
393 "PEReference: forbidden within markup decl in internal subset\n";
394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 errmsg = "expected '>'\n";
397 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000398 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 errmsg = "XML conditional section '[' expected\n";
400 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000401 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 errmsg = "Content error in the external subset\n";
403 break;
404 case XML_ERR_CONDSEC_INVALID_KEYWORD:
405 errmsg =
406 "conditional section INCLUDE or IGNORE keyword expected\n";
407 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000408 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000409 errmsg = "XML conditional section not closed\n";
410 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000411 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 errmsg = "Text declaration '<?xml' required\n";
413 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000414 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000415 errmsg = "parsing XML declaration: '?>' expected\n";
416 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000417 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000418 errmsg = "external parsed entities cannot be standalone\n";
419 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000420 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000421 errmsg = "EntityRef: expecting ';'\n";
422 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000423 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000424 errmsg = "DOCTYPE improperly terminated\n";
425 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000426 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 errmsg = "EndTag: '</' not found\n";
428 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000429 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 errmsg = "expected '='\n";
431 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000432 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 errmsg = "String not closed expecting \" or '\n";
434 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000435 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 errmsg = "String not started expecting ' or \"\n";
437 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000438 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 errmsg = "Invalid XML encoding name\n";
440 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000441 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 errmsg = "standalone accepts only 'yes' or 'no'\n";
443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 errmsg = "Document is empty\n";
446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 errmsg = "Extra content at the end of the document\n";
449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 errmsg = "chunk is not well balanced\n";
452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 errmsg = "extra content at the end of well balanced chunk\n";
455 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000456 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000457 errmsg = "Malformed declaration expecting version\n";
458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000460 case:
461 errmsg = "\n";
462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 default:
465 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000466 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000467 if (ctxt != NULL)
468 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000469 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
471 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000472 if (ctxt != NULL) {
473 ctxt->wellFormed = 0;
474 if (ctxt->recovery == 0)
475 ctxt->disableSAX = 1;
476 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477}
478
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000479/**
480 * xmlFatalErrMsg:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 *
485 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
486 */
487static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
489 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000490{
Daniel Veillard157fee02003-10-31 10:36:03 +0000491 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
492 (ctxt->instate == XML_PARSER_EOF))
493 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000494 if (ctxt != NULL)
495 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200497 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000498 if (ctxt != NULL) {
499 ctxt->wellFormed = 0;
500 if (ctxt->recovery == 0)
501 ctxt->disableSAX = 1;
502 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000503}
504
505/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000506 * xmlWarningMsg:
507 * @ctxt: an XML parser context
508 * @error: the error number
509 * @msg: the error message
510 * @str1: extra data
511 * @str2: extra data
512 *
513 * Handle a warning.
514 */
515static void
516xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
517 const char *msg, const xmlChar *str1, const xmlChar *str2)
518{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000519 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000520
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000524 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
525 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000526 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200527 if (ctxt != NULL) {
528 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000529 (ctxt->sax) ? ctxt->sax->warning : NULL,
530 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000531 ctxt, NULL, XML_FROM_PARSER, error,
532 XML_ERR_WARNING, NULL, 0,
533 (const char *) str1, (const char *) str2, NULL, 0, 0,
534 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200535 } else {
536 __xmlRaiseError(schannel, NULL, NULL,
537 ctxt, NULL, XML_FROM_PARSER, error,
538 XML_ERR_WARNING, NULL, 0,
539 (const char *) str1, (const char *) str2, NULL, 0, 0,
540 msg, (const char *) str1, (const char *) str2);
541 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000542}
543
544/**
545 * xmlValidityError:
546 * @ctxt: an XML parser context
547 * @error: the error number
548 * @msg: the error message
549 * @str1: extra data
550 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000551 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000552 */
553static void
554xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000555 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000556{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000557 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000558
559 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
560 (ctxt->instate == XML_PARSER_EOF))
561 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000562 if (ctxt != NULL) {
563 ctxt->errNo = error;
564 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
565 schannel = ctxt->sax->serror;
566 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200567 if (ctxt != NULL) {
568 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000569 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000570 ctxt, NULL, XML_FROM_DTD, error,
571 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000572 (const char *) str2, NULL, 0, 0,
573 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000574 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200575 } else {
576 __xmlRaiseError(schannel, NULL, NULL,
577 ctxt, NULL, XML_FROM_DTD, error,
578 XML_ERR_ERROR, NULL, 0, (const char *) str1,
579 (const char *) str2, NULL, 0, 0,
580 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000581 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000582}
583
584/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000585 * xmlFatalErrMsgInt:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @val: an integer value
590 *
591 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592 */
593static void
594xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000595 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
604 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->wellFormed = 0;
607 if (ctxt->recovery == 0)
608 ctxt->disableSAX = 1;
609 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000610}
611
612/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000613 * xmlFatalErrMsgStrIntStr:
614 * @ctxt: an XML parser context
615 * @error: the error number
616 * @msg: the error message
617 * @str1: an string info
618 * @val: an integer value
619 * @str2: an string info
620 *
621 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
622 */
623static void
624xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
625 const char *msg, const xmlChar *str1, int val,
626 const xmlChar *str2)
627{
Daniel Veillard157fee02003-10-31 10:36:03 +0000628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000631 if (ctxt != NULL)
632 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000633 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000634 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
635 NULL, 0, (const char *) str1, (const char *) str2,
636 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000637 if (ctxt != NULL) {
638 ctxt->wellFormed = 0;
639 if (ctxt->recovery == 0)
640 ctxt->disableSAX = 1;
641 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000642}
643
644/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000645 * xmlFatalErrMsgStr:
646 * @ctxt: an XML parser context
647 * @error: the error number
648 * @msg: the error message
649 * @val: a string value
650 *
651 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
652 */
653static void
654xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000655 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000656{
Daniel Veillard157fee02003-10-31 10:36:03 +0000657 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
658 (ctxt->instate == XML_PARSER_EOF))
659 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000660 if (ctxt != NULL)
661 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000662 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000663 XML_FROM_PARSER, error, XML_ERR_FATAL,
664 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
665 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000666 if (ctxt != NULL) {
667 ctxt->wellFormed = 0;
668 if (ctxt->recovery == 0)
669 ctxt->disableSAX = 1;
670 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000671}
672
673/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000674 * xmlErrMsgStr:
675 * @ctxt: an XML parser context
676 * @error: the error number
677 * @msg: the error message
678 * @val: a string value
679 *
680 * Handle a non fatal parser error
681 */
682static void
683xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
684 const char *msg, const xmlChar * val)
685{
Daniel Veillard157fee02003-10-31 10:36:03 +0000686 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
687 (ctxt->instate == XML_PARSER_EOF))
688 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000689 if (ctxt != NULL)
690 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000691 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000692 XML_FROM_PARSER, error, XML_ERR_ERROR,
693 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
694 val);
695}
696
697/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000698 * xmlNsErr:
699 * @ctxt: an XML parser context
700 * @error: the error number
701 * @msg: the message
702 * @info1: extra information string
703 * @info2: extra information string
704 *
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706 */
707static void
708xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
709 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000710 const xmlChar * info1, const xmlChar * info2,
711 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000712{
Daniel Veillard157fee02003-10-31 10:36:03 +0000713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714 (ctxt->instate == XML_PARSER_EOF))
715 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000716 if (ctxt != NULL)
717 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000719 XML_ERR_ERROR, NULL, 0, (const char *) info1,
720 (const char *) info2, (const char *) info3, 0, 0, msg,
721 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000722 if (ctxt != NULL)
723 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000724}
725
Daniel Veillard37334572008-07-31 08:20:02 +0000726/**
727 * xmlNsWarn
728 * @ctxt: an XML parser context
729 * @error: the error number
730 * @msg: the message
731 * @info1: extra information string
732 * @info2: extra information string
733 *
734 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
735 */
736static void
737xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
738 const char *msg,
739 const xmlChar * info1, const xmlChar * info2,
740 const xmlChar * info3)
741{
742 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
743 (ctxt->instate == XML_PARSER_EOF))
744 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
746 XML_ERR_WARNING, NULL, 0, (const char *) info1,
747 (const char *) info2, (const char *) info3, 0, 0, msg,
748 info1, info2, info3);
749}
750
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000751/************************************************************************
752 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000753 * Library wide options *
754 * *
755 ************************************************************************/
756
757/**
758 * xmlHasFeature:
759 * @feature: the feature to be examined
760 *
761 * Examines if the library has been compiled with a given feature.
762 *
763 * Returns a non-zero value if the feature exist, otherwise zero.
764 * Returns zero (0) if the feature does not exist or an unknown
765 * unknown feature is requested, non-zero otherwise.
766 */
767int
768xmlHasFeature(xmlFeature feature)
769{
770 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_THREAD_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_TREE_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_OUTPUT_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_PUSH_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_READER_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef LIBXML_PATTERN_ENABLED
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_WRITER_ENABLED
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000813 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000814#ifdef LIBXML_SAX1_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000819 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000820#ifdef LIBXML_FTP_ENABLED
821 return(1);
822#else
823 return(0);
824#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000825 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000826#ifdef LIBXML_HTTP_ENABLED
827 return(1);
828#else
829 return(0);
830#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000831 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000832#ifdef LIBXML_VALID_ENABLED
833 return(1);
834#else
835 return(0);
836#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000837 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000838#ifdef LIBXML_HTML_ENABLED
839 return(1);
840#else
841 return(0);
842#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000843 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000844#ifdef LIBXML_LEGACY_ENABLED
845 return(1);
846#else
847 return(0);
848#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000849 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000850#ifdef LIBXML_C14N_ENABLED
851 return(1);
852#else
853 return(0);
854#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000855 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000856#ifdef LIBXML_CATALOG_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000861 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000862#ifdef LIBXML_XPATH_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000867 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000868#ifdef LIBXML_XPTR_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000873 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000874#ifdef LIBXML_XINCLUDE_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000879 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000880#ifdef LIBXML_ICONV_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000885 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000886#ifdef LIBXML_ISO8859X_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000891 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000892#ifdef LIBXML_UNICODE_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000897 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000898#ifdef LIBXML_REGEXP_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000903 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000904#ifdef LIBXML_AUTOMATA_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000909 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000910#ifdef LIBXML_EXPR_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000915 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000916#ifdef LIBXML_SCHEMAS_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000921 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000922#ifdef LIBXML_SCHEMATRON_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000927 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000928#ifdef LIBXML_MODULES_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000933 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000934#ifdef LIBXML_DEBUG_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000939 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940#ifdef DEBUG_MEMORY_LOCATION
941 return(1);
942#else
943 return(0);
944#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000945 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000946#ifdef LIBXML_DEBUG_RUNTIME
947 return(1);
948#else
949 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000950#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000951 case XML_WITH_ZLIB:
952#ifdef LIBXML_ZLIB_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000957 default:
958 break;
959 }
960 return(0);
961}
962
963/************************************************************************
964 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000965 * SAX2 defaulted attributes handling *
966 * *
967 ************************************************************************/
968
969/**
970 * xmlDetectSAX2:
971 * @ctxt: an XML parser context
972 *
973 * Do the SAX2 detection and specific intialization
974 */
975static void
976xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
977 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000978#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000979 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
980 ((ctxt->sax->startElementNs != NULL) ||
981 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000982#else
983 ctxt->sax2 = 1;
984#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985
986 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
987 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
988 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000989 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
990 (ctxt->str_xml_ns == NULL)) {
991 xmlErrMemory(ctxt, NULL);
992 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000993}
994
Daniel Veillarde57ec792003-09-10 10:50:59 +0000995typedef struct _xmlDefAttrs xmlDefAttrs;
996typedef xmlDefAttrs *xmlDefAttrsPtr;
997struct _xmlDefAttrs {
998 int nbAttrs; /* number of defaulted attributes on that element */
999 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001000 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001001};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001002
1003/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001004 * xmlAttrNormalizeSpace:
1005 * @src: the source string
1006 * @dst: the target string
1007 *
1008 * Normalize the space in non CDATA attribute values:
1009 * If the attribute type is not CDATA, then the XML processor MUST further
1010 * process the normalized attribute value by discarding any leading and
1011 * trailing space (#x20) characters, and by replacing sequences of space
1012 * (#x20) characters by a single space (#x20) character.
1013 * Note that the size of dst need to be at least src, and if one doesn't need
1014 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1015 * passing src as dst is just fine.
1016 *
1017 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1018 * is needed.
1019 */
1020static xmlChar *
1021xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1022{
1023 if ((src == NULL) || (dst == NULL))
1024 return(NULL);
1025
1026 while (*src == 0x20) src++;
1027 while (*src != 0) {
1028 if (*src == 0x20) {
1029 while (*src == 0x20) src++;
1030 if (*src != 0)
1031 *dst++ = 0x20;
1032 } else {
1033 *dst++ = *src++;
1034 }
1035 }
1036 *dst = 0;
1037 if (dst == src)
1038 return(NULL);
1039 return(dst);
1040}
1041
1042/**
1043 * xmlAttrNormalizeSpace2:
1044 * @src: the source string
1045 *
1046 * Normalize the space in non CDATA attribute values, a slightly more complex
1047 * front end to avoid allocation problems when running on attribute values
1048 * coming from the input.
1049 *
1050 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1051 * is needed.
1052 */
1053static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001054xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001055{
1056 int i;
1057 int remove_head = 0;
1058 int need_realloc = 0;
1059 const xmlChar *cur;
1060
1061 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1062 return(NULL);
1063 i = *len;
1064 if (i <= 0)
1065 return(NULL);
1066
1067 cur = src;
1068 while (*cur == 0x20) {
1069 cur++;
1070 remove_head++;
1071 }
1072 while (*cur != 0) {
1073 if (*cur == 0x20) {
1074 cur++;
1075 if ((*cur == 0x20) || (*cur == 0)) {
1076 need_realloc = 1;
1077 break;
1078 }
1079 } else
1080 cur++;
1081 }
1082 if (need_realloc) {
1083 xmlChar *ret;
1084
1085 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1086 if (ret == NULL) {
1087 xmlErrMemory(ctxt, NULL);
1088 return(NULL);
1089 }
1090 xmlAttrNormalizeSpace(ret, ret);
1091 *len = (int) strlen((const char *)ret);
1092 return(ret);
1093 } else if (remove_head) {
1094 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001095 memmove(src, src + remove_head, 1 + *len);
1096 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001097 }
1098 return(NULL);
1099}
1100
1101/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102 * xmlAddDefAttrs:
1103 * @ctxt: an XML parser context
1104 * @fullname: the element fullname
1105 * @fullattr: the attribute fullname
1106 * @value: the attribute value
1107 *
1108 * Add a defaulted attribute for an element
1109 */
1110static void
1111xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1112 const xmlChar *fullname,
1113 const xmlChar *fullattr,
1114 const xmlChar *value) {
1115 xmlDefAttrsPtr defaults;
1116 int len;
1117 const xmlChar *name;
1118 const xmlChar *prefix;
1119
Daniel Veillard6a31b832008-03-26 14:06:44 +00001120 /*
1121 * Allows to detect attribute redefinitions
1122 */
1123 if (ctxt->attsSpecial != NULL) {
1124 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1125 return;
1126 }
1127
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001129 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130 if (ctxt->attsDefault == NULL)
1131 goto mem_error;
1132 }
1133
1134 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001135 * split the element name into prefix:localname , the string found
1136 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001137 */
1138 name = xmlSplitQName3(fullname, &len);
1139 if (name == NULL) {
1140 name = xmlDictLookup(ctxt->dict, fullname, -1);
1141 prefix = NULL;
1142 } else {
1143 name = xmlDictLookup(ctxt->dict, name, -1);
1144 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1145 }
1146
1147 /*
1148 * make sure there is some storage
1149 */
1150 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1151 if (defaults == NULL) {
1152 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001153 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 if (defaults == NULL)
1155 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001157 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001158 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1159 defaults, NULL) < 0) {
1160 xmlFree(defaults);
1161 goto mem_error;
1162 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001163 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001164 xmlDefAttrsPtr temp;
1165
1166 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001167 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001168 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001169 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001170 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001171 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001172 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1173 defaults, NULL) < 0) {
1174 xmlFree(defaults);
1175 goto mem_error;
1176 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 }
1178
1179 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001180 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001181 * are within the DTD and hen not associated to namespace names.
1182 */
1183 name = xmlSplitQName3(fullattr, &len);
1184 if (name == NULL) {
1185 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1186 prefix = NULL;
1187 } else {
1188 name = xmlDictLookup(ctxt->dict, name, -1);
1189 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1190 }
1191
Daniel Veillardae0765b2008-07-31 19:54:59 +00001192 defaults->values[5 * defaults->nbAttrs] = name;
1193 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001194 /* intern the string and precompute the end */
1195 len = xmlStrlen(value);
1196 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001197 defaults->values[5 * defaults->nbAttrs + 2] = value;
1198 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1199 if (ctxt->external)
1200 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1201 else
1202 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001203 defaults->nbAttrs++;
1204
1205 return;
1206
1207mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001208 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 return;
1210}
1211
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001212/**
1213 * xmlAddSpecialAttr:
1214 * @ctxt: an XML parser context
1215 * @fullname: the element fullname
1216 * @fullattr: the attribute fullname
1217 * @type: the attribute type
1218 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001219 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001220 */
1221static void
1222xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1223 const xmlChar *fullname,
1224 const xmlChar *fullattr,
1225 int type)
1226{
1227 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001228 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001229 if (ctxt->attsSpecial == NULL)
1230 goto mem_error;
1231 }
1232
Daniel Veillardac4118d2008-01-11 05:27:32 +00001233 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1234 return;
1235
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001236 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1237 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001238 return;
1239
1240mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001241 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001242 return;
1243}
1244
Daniel Veillard4432df22003-09-28 18:58:27 +00001245/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001246 * xmlCleanSpecialAttrCallback:
1247 *
1248 * Removes CDATA attributes from the special attribute table
1249 */
1250static void
1251xmlCleanSpecialAttrCallback(void *payload, void *data,
1252 const xmlChar *fullname, const xmlChar *fullattr,
1253 const xmlChar *unused ATTRIBUTE_UNUSED) {
1254 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1255
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001256 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001257 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1258 }
1259}
1260
1261/**
1262 * xmlCleanSpecialAttr:
1263 * @ctxt: an XML parser context
1264 *
1265 * Trim the list of attributes defined to remove all those of type
1266 * CDATA as they are not special. This call should be done when finishing
1267 * to parse the DTD and before starting to parse the document root.
1268 */
1269static void
1270xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1271{
1272 if (ctxt->attsSpecial == NULL)
1273 return;
1274
1275 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1276
1277 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1278 xmlHashFree(ctxt->attsSpecial, NULL);
1279 ctxt->attsSpecial = NULL;
1280 }
1281 return;
1282}
1283
1284/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001285 * xmlCheckLanguageID:
1286 * @lang: pointer to the string value
1287 *
1288 * Checks that the value conforms to the LanguageID production:
1289 *
1290 * NOTE: this is somewhat deprecated, those productions were removed from
1291 * the XML Second edition.
1292 *
1293 * [33] LanguageID ::= Langcode ('-' Subcode)*
1294 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1295 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1296 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1297 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1298 * [38] Subcode ::= ([a-z] | [A-Z])+
1299 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001300 * The current REC reference the sucessors of RFC 1766, currently 5646
1301 *
1302 * http://www.rfc-editor.org/rfc/rfc5646.txt
1303 * langtag = language
1304 * ["-" script]
1305 * ["-" region]
1306 * *("-" variant)
1307 * *("-" extension)
1308 * ["-" privateuse]
1309 * language = 2*3ALPHA ; shortest ISO 639 code
1310 * ["-" extlang] ; sometimes followed by
1311 * ; extended language subtags
1312 * / 4ALPHA ; or reserved for future use
1313 * / 5*8ALPHA ; or registered language subtag
1314 *
1315 * extlang = 3ALPHA ; selected ISO 639 codes
1316 * *2("-" 3ALPHA) ; permanently reserved
1317 *
1318 * script = 4ALPHA ; ISO 15924 code
1319 *
1320 * region = 2ALPHA ; ISO 3166-1 code
1321 * / 3DIGIT ; UN M.49 code
1322 *
1323 * variant = 5*8alphanum ; registered variants
1324 * / (DIGIT 3alphanum)
1325 *
1326 * extension = singleton 1*("-" (2*8alphanum))
1327 *
1328 * ; Single alphanumerics
1329 * ; "x" reserved for private use
1330 * singleton = DIGIT ; 0 - 9
1331 * / %x41-57 ; A - W
1332 * / %x59-5A ; Y - Z
1333 * / %x61-77 ; a - w
1334 * / %x79-7A ; y - z
1335 *
1336 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1337 * The parser below doesn't try to cope with extension or privateuse
1338 * that could be added but that's not interoperable anyway
1339 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001340 * Returns 1 if correct 0 otherwise
1341 **/
1342int
1343xmlCheckLanguageID(const xmlChar * lang)
1344{
Daniel Veillard60587d62010-11-04 15:16:27 +01001345 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001346
1347 if (cur == NULL)
1348 return (0);
1349 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001350 ((cur[0] == 'I') && (cur[1] == '-')) ||
1351 ((cur[0] == 'x') && (cur[1] == '-')) ||
1352 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001353 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001354 * Still allow IANA code and user code which were coming
1355 * from the previous version of the XML-1.0 specification
1356 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001357 */
1358 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001359 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001360 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1361 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001362 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001363 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001364 nxt = cur;
1365 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1366 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1367 nxt++;
1368 if (nxt - cur >= 4) {
1369 /*
1370 * Reserved
1371 */
1372 if ((nxt - cur > 8) || (nxt[0] != 0))
1373 return(0);
1374 return(1);
1375 }
1376 if (nxt - cur < 2)
1377 return(0);
1378 /* we got an ISO 639 code */
1379 if (nxt[0] == 0)
1380 return(1);
1381 if (nxt[0] != '-')
1382 return(0);
1383
1384 nxt++;
1385 cur = nxt;
1386 /* now we can have extlang or script or region or variant */
1387 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1388 goto region_m49;
1389
1390 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1391 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1392 nxt++;
1393 if (nxt - cur == 4)
1394 goto script;
1395 if (nxt - cur == 2)
1396 goto region;
1397 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1398 goto variant;
1399 if (nxt - cur != 3)
1400 return(0);
1401 /* we parsed an extlang */
1402 if (nxt[0] == 0)
1403 return(1);
1404 if (nxt[0] != '-')
1405 return(0);
1406
1407 nxt++;
1408 cur = nxt;
1409 /* now we can have script or region or variant */
1410 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1411 goto region_m49;
1412
1413 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1414 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1415 nxt++;
1416 if (nxt - cur == 2)
1417 goto region;
1418 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1419 goto variant;
1420 if (nxt - cur != 4)
1421 return(0);
1422 /* we parsed a script */
1423script:
1424 if (nxt[0] == 0)
1425 return(1);
1426 if (nxt[0] != '-')
1427 return(0);
1428
1429 nxt++;
1430 cur = nxt;
1431 /* now we can have region or variant */
1432 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1433 goto region_m49;
1434
1435 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1436 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1437 nxt++;
1438
1439 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1440 goto variant;
1441 if (nxt - cur != 2)
1442 return(0);
1443 /* we parsed a region */
1444region:
1445 if (nxt[0] == 0)
1446 return(1);
1447 if (nxt[0] != '-')
1448 return(0);
1449
1450 nxt++;
1451 cur = nxt;
1452 /* now we can just have a variant */
1453 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1454 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1455 nxt++;
1456
1457 if ((nxt - cur < 5) || (nxt - cur > 8))
1458 return(0);
1459
1460 /* we parsed a variant */
1461variant:
1462 if (nxt[0] == 0)
1463 return(1);
1464 if (nxt[0] != '-')
1465 return(0);
1466 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001467 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001468
1469region_m49:
1470 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1471 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1472 nxt += 3;
1473 goto region;
1474 }
1475 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001476}
1477
Owen Taylor3473f882001-02-23 17:55:21 +00001478/************************************************************************
1479 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001480 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001481 * *
1482 ************************************************************************/
1483
Daniel Veillard8ed10722009-08-20 19:17:36 +02001484static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1485 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001486
Daniel Veillard0fb18932003-09-07 09:14:37 +00001487#ifdef SAX2
1488/**
1489 * nsPush:
1490 * @ctxt: an XML parser context
1491 * @prefix: the namespace prefix or NULL
1492 * @URL: the namespace name
1493 *
1494 * Pushes a new parser namespace on top of the ns stack
1495 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001496 * Returns -1 in case of error, -2 if the namespace should be discarded
1497 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001498 */
1499static int
1500nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1501{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001502 if (ctxt->options & XML_PARSE_NSCLEAN) {
1503 int i;
1504 for (i = 0;i < ctxt->nsNr;i += 2) {
1505 if (ctxt->nsTab[i] == prefix) {
1506 /* in scope */
1507 if (ctxt->nsTab[i + 1] == URL)
1508 return(-2);
1509 /* out of scope keep it */
1510 break;
1511 }
1512 }
1513 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001514 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1515 ctxt->nsMax = 10;
1516 ctxt->nsNr = 0;
1517 ctxt->nsTab = (const xmlChar **)
1518 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1519 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001520 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001521 ctxt->nsMax = 0;
1522 return (-1);
1523 }
1524 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001525 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001526 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001527 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1528 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1529 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001530 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001531 ctxt->nsMax /= 2;
1532 return (-1);
1533 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001534 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001535 }
1536 ctxt->nsTab[ctxt->nsNr++] = prefix;
1537 ctxt->nsTab[ctxt->nsNr++] = URL;
1538 return (ctxt->nsNr);
1539}
1540/**
1541 * nsPop:
1542 * @ctxt: an XML parser context
1543 * @nr: the number to pop
1544 *
1545 * Pops the top @nr parser prefix/namespace from the ns stack
1546 *
1547 * Returns the number of namespaces removed
1548 */
1549static int
1550nsPop(xmlParserCtxtPtr ctxt, int nr)
1551{
1552 int i;
1553
1554 if (ctxt->nsTab == NULL) return(0);
1555 if (ctxt->nsNr < nr) {
1556 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1557 nr = ctxt->nsNr;
1558 }
1559 if (ctxt->nsNr <= 0)
1560 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001561
Daniel Veillard0fb18932003-09-07 09:14:37 +00001562 for (i = 0;i < nr;i++) {
1563 ctxt->nsNr--;
1564 ctxt->nsTab[ctxt->nsNr] = NULL;
1565 }
1566 return(nr);
1567}
1568#endif
1569
1570static int
1571xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1572 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001573 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001574 int maxatts;
1575
1576 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001577 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001578 atts = (const xmlChar **)
1579 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001580 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001581 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001582 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1583 if (attallocs == NULL) goto mem_error;
1584 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001585 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001586 } else if (nr + 5 > ctxt->maxatts) {
1587 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001588 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1589 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001590 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001591 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001592 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1593 (maxatts / 5) * sizeof(int));
1594 if (attallocs == NULL) goto mem_error;
1595 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001596 ctxt->maxatts = maxatts;
1597 }
1598 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001599mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001600 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001601 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001602}
1603
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001604/**
1605 * inputPush:
1606 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001607 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001608 *
1609 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001610 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001611 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001612 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001613int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001614inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1615{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001616 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001617 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001618 if (ctxt->inputNr >= ctxt->inputMax) {
1619 ctxt->inputMax *= 2;
1620 ctxt->inputTab =
1621 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1622 ctxt->inputMax *
1623 sizeof(ctxt->inputTab[0]));
1624 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001625 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001626 xmlFreeInputStream(value);
1627 ctxt->inputMax /= 2;
1628 value = NULL;
1629 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001630 }
1631 }
1632 ctxt->inputTab[ctxt->inputNr] = value;
1633 ctxt->input = value;
1634 return (ctxt->inputNr++);
1635}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001636/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001637 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001638 * @ctxt: an XML parser context
1639 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001640 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001641 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001642 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001643 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001644xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001645inputPop(xmlParserCtxtPtr ctxt)
1646{
1647 xmlParserInputPtr ret;
1648
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001649 if (ctxt == NULL)
1650 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001651 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001652 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001653 ctxt->inputNr--;
1654 if (ctxt->inputNr > 0)
1655 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1656 else
1657 ctxt->input = NULL;
1658 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001659 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001660 return (ret);
1661}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001662/**
1663 * nodePush:
1664 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001665 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001666 *
1667 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001668 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001669 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001670 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001671int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001672nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1673{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001674 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001675 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001676 xmlNodePtr *tmp;
1677
1678 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1679 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001680 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001681 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001682 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001683 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001684 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001685 ctxt->nodeTab = tmp;
1686 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001687 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001688 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1689 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001690 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001691 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001692 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001693 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001694 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001695 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001696 ctxt->nodeTab[ctxt->nodeNr] = value;
1697 ctxt->node = value;
1698 return (ctxt->nodeNr++);
1699}
Daniel Veillard8915c152008-08-26 13:05:34 +00001700
Daniel Veillard1c732d22002-11-30 11:22:59 +00001701/**
1702 * nodePop:
1703 * @ctxt: an XML parser context
1704 *
1705 * Pops the top element node from the node stack
1706 *
1707 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001708 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001709xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001710nodePop(xmlParserCtxtPtr ctxt)
1711{
1712 xmlNodePtr ret;
1713
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001714 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001716 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001717 ctxt->nodeNr--;
1718 if (ctxt->nodeNr > 0)
1719 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1720 else
1721 ctxt->node = NULL;
1722 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001723 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001724 return (ret);
1725}
Daniel Veillarda2351322004-06-27 12:08:10 +00001726
1727#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001728/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001729 * nameNsPush:
1730 * @ctxt: an XML parser context
1731 * @value: the element name
1732 * @prefix: the element prefix
1733 * @URI: the element namespace name
1734 *
1735 * Pushes a new element name/prefix/URL on top of the name stack
1736 *
1737 * Returns -1 in case of error, the index in the stack otherwise
1738 */
1739static int
1740nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1741 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1742{
1743 if (ctxt->nameNr >= ctxt->nameMax) {
1744 const xmlChar * *tmp;
1745 void **tmp2;
1746 ctxt->nameMax *= 2;
1747 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1748 ctxt->nameMax *
1749 sizeof(ctxt->nameTab[0]));
1750 if (tmp == NULL) {
1751 ctxt->nameMax /= 2;
1752 goto mem_error;
1753 }
1754 ctxt->nameTab = tmp;
1755 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1756 ctxt->nameMax * 3 *
1757 sizeof(ctxt->pushTab[0]));
1758 if (tmp2 == NULL) {
1759 ctxt->nameMax /= 2;
1760 goto mem_error;
1761 }
1762 ctxt->pushTab = tmp2;
1763 }
1764 ctxt->nameTab[ctxt->nameNr] = value;
1765 ctxt->name = value;
1766 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1767 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001768 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001769 return (ctxt->nameNr++);
1770mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001771 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001772 return (-1);
1773}
1774/**
1775 * nameNsPop:
1776 * @ctxt: an XML parser context
1777 *
1778 * Pops the top element/prefix/URI name from the name stack
1779 *
1780 * Returns the name just removed
1781 */
1782static const xmlChar *
1783nameNsPop(xmlParserCtxtPtr ctxt)
1784{
1785 const xmlChar *ret;
1786
1787 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001788 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001789 ctxt->nameNr--;
1790 if (ctxt->nameNr > 0)
1791 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1792 else
1793 ctxt->name = NULL;
1794 ret = ctxt->nameTab[ctxt->nameNr];
1795 ctxt->nameTab[ctxt->nameNr] = NULL;
1796 return (ret);
1797}
Daniel Veillarda2351322004-06-27 12:08:10 +00001798#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001799
1800/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001801 * namePush:
1802 * @ctxt: an XML parser context
1803 * @value: the element name
1804 *
1805 * Pushes a new element name on top of the name stack
1806 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001807 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001808 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001809int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001810namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001811{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001812 if (ctxt == NULL) return (-1);
1813
Daniel Veillard1c732d22002-11-30 11:22:59 +00001814 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001815 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001816 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001817 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001818 ctxt->nameMax *
1819 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001820 if (tmp == NULL) {
1821 ctxt->nameMax /= 2;
1822 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001823 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001825 }
1826 ctxt->nameTab[ctxt->nameNr] = value;
1827 ctxt->name = value;
1828 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001829mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001830 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001831 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001832}
1833/**
1834 * namePop:
1835 * @ctxt: an XML parser context
1836 *
1837 * Pops the top element name from the name stack
1838 *
1839 * Returns the name just removed
1840 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001841const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001842namePop(xmlParserCtxtPtr ctxt)
1843{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001844 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001845
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001846 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1847 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001848 ctxt->nameNr--;
1849 if (ctxt->nameNr > 0)
1850 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1851 else
1852 ctxt->name = NULL;
1853 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001854 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001855 return (ret);
1856}
Owen Taylor3473f882001-02-23 17:55:21 +00001857
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001858static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001859 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001860 int *tmp;
1861
Owen Taylor3473f882001-02-23 17:55:21 +00001862 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001863 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1864 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1865 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001866 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001867 ctxt->spaceMax /=2;
1868 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001869 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001870 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001871 }
1872 ctxt->spaceTab[ctxt->spaceNr] = val;
1873 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1874 return(ctxt->spaceNr++);
1875}
1876
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001877static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001878 int ret;
1879 if (ctxt->spaceNr <= 0) return(0);
1880 ctxt->spaceNr--;
1881 if (ctxt->spaceNr > 0)
1882 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1883 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001884 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001885 ret = ctxt->spaceTab[ctxt->spaceNr];
1886 ctxt->spaceTab[ctxt->spaceNr] = -1;
1887 return(ret);
1888}
1889
1890/*
1891 * Macros for accessing the content. Those should be used only by the parser,
1892 * and not exported.
1893 *
1894 * Dirty macros, i.e. one often need to make assumption on the context to
1895 * use them
1896 *
1897 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1898 * To be used with extreme caution since operations consuming
1899 * characters may move the input buffer to a different location !
1900 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1901 * This should be used internally by the parser
1902 * only to compare to ASCII values otherwise it would break when
1903 * running with UTF-8 encoding.
1904 * RAW same as CUR but in the input buffer, bypass any token
1905 * extraction that may have been done
1906 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1907 * to compare on ASCII based substring.
1908 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001909 * strings without newlines within the parser.
1910 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1911 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1913 *
1914 * NEXT Skip to the next character, this does the proper decoding
1915 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001916 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001917 * CUR_CHAR(l) returns the current unicode character (int), set l
1918 * to the number of xmlChars used for the encoding [0-5].
1919 * CUR_SCHAR same but operate on a string instead of the context
1920 * COPY_BUF copy the current unicode char to the target buffer, increment
1921 * the index
1922 * GROW, SHRINK handling of input buffers
1923 */
1924
Daniel Veillardfdc91562002-07-01 21:52:03 +00001925#define RAW (*ctxt->input->cur)
1926#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001927#define NXT(val) ctxt->input->cur[(val)]
1928#define CUR_PTR ctxt->input->cur
1929
Daniel Veillarda07050d2003-10-19 14:46:32 +00001930#define CMP4( s, c1, c2, c3, c4 ) \
1931 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1932 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1933#define CMP5( s, c1, c2, c3, c4, c5 ) \
1934 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1935#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1936 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1937#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1938 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1939#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1940 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1941#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1942 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1943 ((unsigned char *) s)[ 8 ] == c9 )
1944#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1945 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1946 ((unsigned char *) s)[ 9 ] == c10 )
1947
Owen Taylor3473f882001-02-23 17:55:21 +00001948#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001949 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001950 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001951 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001952 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1953 xmlPopInput(ctxt); \
1954 } while (0)
1955
Daniel Veillard0b787f32004-03-26 17:29:53 +00001956#define SKIPL(val) do { \
1957 int skipl; \
1958 for(skipl=0; skipl<val; skipl++) { \
1959 if (*(ctxt->input->cur) == '\n') { \
1960 ctxt->input->line++; ctxt->input->col = 1; \
1961 } else ctxt->input->col++; \
1962 ctxt->nbChars++; \
1963 ctxt->input->cur++; \
1964 } \
1965 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1966 if ((*ctxt->input->cur == 0) && \
1967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1968 xmlPopInput(ctxt); \
1969 } while (0)
1970
Daniel Veillarda880b122003-04-21 21:36:41 +00001971#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001972 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1973 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001974 xmlSHRINK (ctxt);
1975
1976static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1977 xmlParserInputShrink(ctxt->input);
1978 if ((*ctxt->input->cur == 0) &&
1979 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1980 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001981 }
Owen Taylor3473f882001-02-23 17:55:21 +00001982
Daniel Veillarda880b122003-04-21 21:36:41 +00001983#define GROW if ((ctxt->progressive == 0) && \
1984 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001985 xmlGROW (ctxt);
1986
1987static void xmlGROW (xmlParserCtxtPtr ctxt) {
1988 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01001989 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00001990 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1991 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001992}
Owen Taylor3473f882001-02-23 17:55:21 +00001993
1994#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1995
1996#define NEXT xmlNextChar(ctxt)
1997
Daniel Veillard21a0f912001-02-25 19:54:14 +00001998#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001999 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002000 ctxt->input->cur++; \
2001 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002002 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002003 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2004 }
2005
Owen Taylor3473f882001-02-23 17:55:21 +00002006#define NEXTL(l) do { \
2007 if (*(ctxt->input->cur) == '\n') { \
2008 ctxt->input->line++; ctxt->input->col = 1; \
2009 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002010 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002011 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002012 } while (0)
2013
2014#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2015#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2016
2017#define COPY_BUF(l,b,i,v) \
2018 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002019 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002020
2021/**
2022 * xmlSkipBlankChars:
2023 * @ctxt: the XML parser context
2024 *
2025 * skip all blanks character found at that point in the input streams.
2026 * It pops up finished entities in the process if allowable at that point.
2027 *
2028 * Returns the number of space chars skipped
2029 */
2030
2031int
2032xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002033 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002034
2035 /*
2036 * It's Okay to use CUR/NEXT here since all the blanks are on
2037 * the ASCII range.
2038 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002039 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2040 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002041 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002042 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002043 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002044 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002045 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002046 if (*cur == '\n') {
2047 ctxt->input->line++; ctxt->input->col = 1;
2048 }
2049 cur++;
2050 res++;
2051 if (*cur == 0) {
2052 ctxt->input->cur = cur;
2053 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2054 cur = ctxt->input->cur;
2055 }
2056 }
2057 ctxt->input->cur = cur;
2058 } else {
2059 int cur;
2060 do {
2061 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002062 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002063 NEXT;
2064 cur = CUR;
2065 res++;
2066 }
2067 while ((cur == 0) && (ctxt->inputNr > 1) &&
2068 (ctxt->instate != XML_PARSER_COMMENT)) {
2069 xmlPopInput(ctxt);
2070 cur = CUR;
2071 }
2072 /*
2073 * Need to handle support of entities branching here
2074 */
2075 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2076 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2077 }
Owen Taylor3473f882001-02-23 17:55:21 +00002078 return(res);
2079}
2080
2081/************************************************************************
2082 * *
2083 * Commodity functions to handle entities *
2084 * *
2085 ************************************************************************/
2086
2087/**
2088 * xmlPopInput:
2089 * @ctxt: an XML parser context
2090 *
2091 * xmlPopInput: the current input pointed by ctxt->input came to an end
2092 * pop it and return the next char.
2093 *
2094 * Returns the current xmlChar in the parser context
2095 */
2096xmlChar
2097xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002098 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002099 if (xmlParserDebugEntities)
2100 xmlGenericError(xmlGenericErrorContext,
2101 "Popping input %d\n", ctxt->inputNr);
2102 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002103 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002104 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2105 return(xmlPopInput(ctxt));
2106 return(CUR);
2107}
2108
2109/**
2110 * xmlPushInput:
2111 * @ctxt: an XML parser context
2112 * @input: an XML parser input fragment (entity, XML fragment ...).
2113 *
2114 * xmlPushInput: switch to a new input stream which is stacked on top
2115 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002116 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002117 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002118int
Owen Taylor3473f882001-02-23 17:55:21 +00002119xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002120 int ret;
2121 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002122
2123 if (xmlParserDebugEntities) {
2124 if ((ctxt->input != NULL) && (ctxt->input->filename))
2125 xmlGenericError(xmlGenericErrorContext,
2126 "%s(%d): ", ctxt->input->filename,
2127 ctxt->input->line);
2128 xmlGenericError(xmlGenericErrorContext,
2129 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2130 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002131 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002132 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002133 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002134}
2135
2136/**
2137 * xmlParseCharRef:
2138 * @ctxt: an XML parser context
2139 *
2140 * parse Reference declarations
2141 *
2142 * [66] CharRef ::= '&#' [0-9]+ ';' |
2143 * '&#x' [0-9a-fA-F]+ ';'
2144 *
2145 * [ WFC: Legal Character ]
2146 * Characters referred to using character references must match the
2147 * production for Char.
2148 *
2149 * Returns the value parsed (as an int), 0 in case of error
2150 */
2151int
2152xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002153 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002154 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002155 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002156
Owen Taylor3473f882001-02-23 17:55:21 +00002157 /*
2158 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2159 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002160 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002161 (NXT(2) == 'x')) {
2162 SKIP(3);
2163 GROW;
2164 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002165 if (count++ > 20) {
2166 count = 0;
2167 GROW;
2168 }
2169 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002170 val = val * 16 + (CUR - '0');
2171 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2172 val = val * 16 + (CUR - 'a') + 10;
2173 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2174 val = val * 16 + (CUR - 'A') + 10;
2175 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002176 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002177 val = 0;
2178 break;
2179 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002180 if (val > 0x10FFFF)
2181 outofrange = val;
2182
Owen Taylor3473f882001-02-23 17:55:21 +00002183 NEXT;
2184 count++;
2185 }
2186 if (RAW == ';') {
2187 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002188 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002189 ctxt->nbChars ++;
2190 ctxt->input->cur++;
2191 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002192 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002193 SKIP(2);
2194 GROW;
2195 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002196 if (count++ > 20) {
2197 count = 0;
2198 GROW;
2199 }
2200 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002201 val = val * 10 + (CUR - '0');
2202 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002203 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002204 val = 0;
2205 break;
2206 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002207 if (val > 0x10FFFF)
2208 outofrange = val;
2209
Owen Taylor3473f882001-02-23 17:55:21 +00002210 NEXT;
2211 count++;
2212 }
2213 if (RAW == ';') {
2214 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002215 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002216 ctxt->nbChars ++;
2217 ctxt->input->cur++;
2218 }
2219 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002220 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002221 }
2222
2223 /*
2224 * [ WFC: Legal Character ]
2225 * Characters referred to using character references must match the
2226 * production for Char.
2227 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002228 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002229 return(val);
2230 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002231 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2232 "xmlParseCharRef: invalid xmlChar value %d\n",
2233 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002234 }
2235 return(0);
2236}
2237
2238/**
2239 * xmlParseStringCharRef:
2240 * @ctxt: an XML parser context
2241 * @str: a pointer to an index in the string
2242 *
2243 * parse Reference declarations, variant parsing from a string rather
2244 * than an an input flow.
2245 *
2246 * [66] CharRef ::= '&#' [0-9]+ ';' |
2247 * '&#x' [0-9a-fA-F]+ ';'
2248 *
2249 * [ WFC: Legal Character ]
2250 * Characters referred to using character references must match the
2251 * production for Char.
2252 *
2253 * Returns the value parsed (as an int), 0 in case of error, str will be
2254 * updated to the current value of the index
2255 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002256static int
Owen Taylor3473f882001-02-23 17:55:21 +00002257xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2258 const xmlChar *ptr;
2259 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002260 unsigned int val = 0;
2261 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002262
2263 if ((str == NULL) || (*str == NULL)) return(0);
2264 ptr = *str;
2265 cur = *ptr;
2266 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2267 ptr += 3;
2268 cur = *ptr;
2269 while (cur != ';') { /* Non input consuming loop */
2270 if ((cur >= '0') && (cur <= '9'))
2271 val = val * 16 + (cur - '0');
2272 else if ((cur >= 'a') && (cur <= 'f'))
2273 val = val * 16 + (cur - 'a') + 10;
2274 else if ((cur >= 'A') && (cur <= 'F'))
2275 val = val * 16 + (cur - 'A') + 10;
2276 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002277 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002278 val = 0;
2279 break;
2280 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002281 if (val > 0x10FFFF)
2282 outofrange = val;
2283
Owen Taylor3473f882001-02-23 17:55:21 +00002284 ptr++;
2285 cur = *ptr;
2286 }
2287 if (cur == ';')
2288 ptr++;
2289 } else if ((cur == '&') && (ptr[1] == '#')){
2290 ptr += 2;
2291 cur = *ptr;
2292 while (cur != ';') { /* Non input consuming loops */
2293 if ((cur >= '0') && (cur <= '9'))
2294 val = val * 10 + (cur - '0');
2295 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002296 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002297 val = 0;
2298 break;
2299 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002300 if (val > 0x10FFFF)
2301 outofrange = val;
2302
Owen Taylor3473f882001-02-23 17:55:21 +00002303 ptr++;
2304 cur = *ptr;
2305 }
2306 if (cur == ';')
2307 ptr++;
2308 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002309 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002310 return(0);
2311 }
2312 *str = ptr;
2313
2314 /*
2315 * [ WFC: Legal Character ]
2316 * Characters referred to using character references must match the
2317 * production for Char.
2318 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002319 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002320 return(val);
2321 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002322 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2323 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2324 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002325 }
2326 return(0);
2327}
2328
2329/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002330 * xmlNewBlanksWrapperInputStream:
2331 * @ctxt: an XML parser context
2332 * @entity: an Entity pointer
2333 *
2334 * Create a new input stream for wrapping
2335 * blanks around a PEReference
2336 *
2337 * Returns the new input stream or NULL
2338 */
2339
2340static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2341
Daniel Veillardf4862f02002-09-10 11:13:43 +00002342static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002343xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2344 xmlParserInputPtr input;
2345 xmlChar *buffer;
2346 size_t length;
2347 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002348 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2349 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002350 return(NULL);
2351 }
2352 if (xmlParserDebugEntities)
2353 xmlGenericError(xmlGenericErrorContext,
2354 "new blanks wrapper for entity: %s\n", entity->name);
2355 input = xmlNewInputStream(ctxt);
2356 if (input == NULL) {
2357 return(NULL);
2358 }
2359 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002360 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002361 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002362 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002363 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002364 return(NULL);
2365 }
2366 buffer [0] = ' ';
2367 buffer [1] = '%';
2368 buffer [length-3] = ';';
2369 buffer [length-2] = ' ';
2370 buffer [length-1] = 0;
2371 memcpy(buffer + 2, entity->name, length - 5);
2372 input->free = deallocblankswrapper;
2373 input->base = buffer;
2374 input->cur = buffer;
2375 input->length = length;
2376 input->end = &buffer[length];
2377 return(input);
2378}
2379
2380/**
Owen Taylor3473f882001-02-23 17:55:21 +00002381 * xmlParserHandlePEReference:
2382 * @ctxt: the parser context
2383 *
2384 * [69] PEReference ::= '%' Name ';'
2385 *
2386 * [ WFC: No Recursion ]
2387 * A parsed entity must not contain a recursive
2388 * reference to itself, either directly or indirectly.
2389 *
2390 * [ WFC: Entity Declared ]
2391 * In a document without any DTD, a document with only an internal DTD
2392 * subset which contains no parameter entity references, or a document
2393 * with "standalone='yes'", ... ... The declaration of a parameter
2394 * entity must precede any reference to it...
2395 *
2396 * [ VC: Entity Declared ]
2397 * In a document with an external subset or external parameter entities
2398 * with "standalone='no'", ... ... The declaration of a parameter entity
2399 * must precede any reference to it...
2400 *
2401 * [ WFC: In DTD ]
2402 * Parameter-entity references may only appear in the DTD.
2403 * NOTE: misleading but this is handled.
2404 *
2405 * A PEReference may have been detected in the current input stream
2406 * the handling is done accordingly to
2407 * http://www.w3.org/TR/REC-xml#entproc
2408 * i.e.
2409 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002410 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002411 */
2412void
2413xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002414 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002415 xmlEntityPtr entity = NULL;
2416 xmlParserInputPtr input;
2417
Owen Taylor3473f882001-02-23 17:55:21 +00002418 if (RAW != '%') return;
2419 switch(ctxt->instate) {
2420 case XML_PARSER_CDATA_SECTION:
2421 return;
2422 case XML_PARSER_COMMENT:
2423 return;
2424 case XML_PARSER_START_TAG:
2425 return;
2426 case XML_PARSER_END_TAG:
2427 return;
2428 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002429 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002430 return;
2431 case XML_PARSER_PROLOG:
2432 case XML_PARSER_START:
2433 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002434 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002435 return;
2436 case XML_PARSER_ENTITY_DECL:
2437 case XML_PARSER_CONTENT:
2438 case XML_PARSER_ATTRIBUTE_VALUE:
2439 case XML_PARSER_PI:
2440 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002441 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002442 /* we just ignore it there */
2443 return;
2444 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002445 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002446 return;
2447 case XML_PARSER_ENTITY_VALUE:
2448 /*
2449 * NOTE: in the case of entity values, we don't do the
2450 * substitution here since we need the literal
2451 * entity value to be able to save the internal
2452 * subset of the document.
2453 * This will be handled by xmlStringDecodeEntities
2454 */
2455 return;
2456 case XML_PARSER_DTD:
2457 /*
2458 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2459 * In the internal DTD subset, parameter-entity references
2460 * can occur only where markup declarations can occur, not
2461 * within markup declarations.
2462 * In that case this is handled in xmlParseMarkupDecl
2463 */
2464 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2465 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002466 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002467 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002468 break;
2469 case XML_PARSER_IGNORE:
2470 return;
2471 }
2472
2473 NEXT;
2474 name = xmlParseName(ctxt);
2475 if (xmlParserDebugEntities)
2476 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002477 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002478 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002479 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002480 } else {
2481 if (RAW == ';') {
2482 NEXT;
2483 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2484 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2485 if (entity == NULL) {
2486
2487 /*
2488 * [ WFC: Entity Declared ]
2489 * In a document without any DTD, a document with only an
2490 * internal DTD subset which contains no parameter entity
2491 * references, or a document with "standalone='yes'", ...
2492 * ... The declaration of a parameter entity must precede
2493 * any reference to it...
2494 */
2495 if ((ctxt->standalone == 1) ||
2496 ((ctxt->hasExternalSubset == 0) &&
2497 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002498 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002499 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002500 } else {
2501 /*
2502 * [ VC: Entity Declared ]
2503 * In a document with an external subset or external
2504 * parameter entities with "standalone='no'", ...
2505 * ... The declaration of a parameter entity must precede
2506 * any reference to it...
2507 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002508 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2509 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2510 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002511 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002512 } else
2513 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2514 "PEReference: %%%s; not found\n",
2515 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 ctxt->valid = 0;
2517 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002518 } else if (ctxt->input->free != deallocblankswrapper) {
2519 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002520 if (xmlPushInput(ctxt, input) < 0)
2521 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002522 } else {
2523 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2524 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002525 xmlChar start[4];
2526 xmlCharEncoding enc;
2527
Owen Taylor3473f882001-02-23 17:55:21 +00002528 /*
2529 * handle the extra spaces added before and after
2530 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002531 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002532 */
2533 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002534 if (xmlPushInput(ctxt, input) < 0)
2535 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002536
2537 /*
2538 * Get the 4 first bytes and decode the charset
2539 * if enc != XML_CHAR_ENCODING_NONE
2540 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002541 * Note that, since we may have some non-UTF8
2542 * encoding (like UTF16, bug 135229), the 'length'
2543 * is not known, but we can calculate based upon
2544 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002545 */
2546 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002547 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002548 start[0] = RAW;
2549 start[1] = NXT(1);
2550 start[2] = NXT(2);
2551 start[3] = NXT(3);
2552 enc = xmlDetectCharEncoding(start, 4);
2553 if (enc != XML_CHAR_ENCODING_NONE) {
2554 xmlSwitchEncoding(ctxt, enc);
2555 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002556 }
2557
Owen Taylor3473f882001-02-23 17:55:21 +00002558 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002559 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2560 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002561 xmlParseTextDecl(ctxt);
2562 }
Owen Taylor3473f882001-02-23 17:55:21 +00002563 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002564 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2565 "PEReference: %s is not a parameter entity\n",
2566 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002567 }
2568 }
2569 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002570 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002571 }
Owen Taylor3473f882001-02-23 17:55:21 +00002572 }
2573}
2574
2575/*
2576 * Macro used to grow the current buffer.
2577 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002578#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002579 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002580 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002581 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002582 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002583 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002584 if (tmp == NULL) goto mem_error; \
2585 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002586}
2587
2588/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002589 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002590 * @ctxt: the parser context
2591 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002592 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002593 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2594 * @end: an end marker xmlChar, 0 if none
2595 * @end2: an end marker xmlChar, 0 if none
2596 * @end3: an end marker xmlChar, 0 if none
2597 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002598 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002599 *
2600 * [67] Reference ::= EntityRef | CharRef
2601 *
2602 * [69] PEReference ::= '%' Name ';'
2603 *
2604 * Returns A newly allocated string with the substitution done. The caller
2605 * must deallocate it !
2606 */
2607xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002608xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2609 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002610 xmlChar *buffer = NULL;
2611 int buffer_size = 0;
2612
2613 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002614 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002615 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002616 xmlEntityPtr ent;
2617 int c,l;
2618 int nbchars = 0;
2619
Daniel Veillarda82b1822004-11-08 16:24:57 +00002620 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002621 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002622 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002623
Daniel Veillard0161e632008-08-28 15:36:32 +00002624 if (((ctxt->depth > 40) &&
2625 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2626 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002627 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002628 return(NULL);
2629 }
2630
2631 /*
2632 * allocate a translation buffer.
2633 */
2634 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002635 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002636 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002637
2638 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002639 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002640 * we are operating on already parsed values.
2641 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002642 if (str < last)
2643 c = CUR_SCHAR(str, l);
2644 else
2645 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002646 while ((c != 0) && (c != end) && /* non input consuming loop */
2647 (c != end2) && (c != end3)) {
2648
2649 if (c == 0) break;
2650 if ((c == '&') && (str[1] == '#')) {
2651 int val = xmlParseStringCharRef(ctxt, &str);
2652 if (val != 0) {
2653 COPY_BUF(0,buffer,nbchars,val);
2654 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002655 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002656 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002657 }
Owen Taylor3473f882001-02-23 17:55:21 +00002658 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2659 if (xmlParserDebugEntities)
2660 xmlGenericError(xmlGenericErrorContext,
2661 "String decoding Entity Reference: %.30s\n",
2662 str);
2663 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002664 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2665 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002666 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002667 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002668 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002669 if ((ent != NULL) &&
2670 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2671 if (ent->content != NULL) {
2672 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002673 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002674 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002675 }
Owen Taylor3473f882001-02-23 17:55:21 +00002676 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002677 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2678 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002679 }
2680 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002681 ctxt->depth++;
2682 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2683 0, 0, 0);
2684 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002685
Owen Taylor3473f882001-02-23 17:55:21 +00002686 if (rep != NULL) {
2687 current = rep;
2688 while (*current != 0) { /* non input consuming loop */
2689 buffer[nbchars++] = *current++;
2690 if (nbchars >
2691 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002692 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2693 goto int_error;
2694 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002695 }
2696 }
2697 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002698 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002699 }
2700 } else if (ent != NULL) {
2701 int i = xmlStrlen(ent->name);
2702 const xmlChar *cur = ent->name;
2703
2704 buffer[nbchars++] = '&';
2705 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002706 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002707 }
2708 for (;i > 0;i--)
2709 buffer[nbchars++] = *cur++;
2710 buffer[nbchars++] = ';';
2711 }
2712 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2713 if (xmlParserDebugEntities)
2714 xmlGenericError(xmlGenericErrorContext,
2715 "String decoding PE Reference: %.30s\n", str);
2716 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002717 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2718 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002719 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002720 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002721 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002722 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002723 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002724 }
Owen Taylor3473f882001-02-23 17:55:21 +00002725 ctxt->depth++;
2726 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2727 0, 0, 0);
2728 ctxt->depth--;
2729 if (rep != NULL) {
2730 current = rep;
2731 while (*current != 0) { /* non input consuming loop */
2732 buffer[nbchars++] = *current++;
2733 if (nbchars >
2734 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002735 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2736 goto int_error;
2737 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002738 }
2739 }
2740 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002741 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002742 }
2743 }
2744 } else {
2745 COPY_BUF(l,buffer,nbchars,c);
2746 str += l;
2747 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002748 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002749 }
2750 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002751 if (str < last)
2752 c = CUR_SCHAR(str, l);
2753 else
2754 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002755 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002756 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002757 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002758
2759mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002760 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002761int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002762 if (rep != NULL)
2763 xmlFree(rep);
2764 if (buffer != NULL)
2765 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002766 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002767}
2768
Daniel Veillarde57ec792003-09-10 10:50:59 +00002769/**
2770 * xmlStringDecodeEntities:
2771 * @ctxt: the parser context
2772 * @str: the input string
2773 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2774 * @end: an end marker xmlChar, 0 if none
2775 * @end2: an end marker xmlChar, 0 if none
2776 * @end3: an end marker xmlChar, 0 if none
2777 *
2778 * Takes a entity string content and process to do the adequate substitutions.
2779 *
2780 * [67] Reference ::= EntityRef | CharRef
2781 *
2782 * [69] PEReference ::= '%' Name ';'
2783 *
2784 * Returns A newly allocated string with the substitution done. The caller
2785 * must deallocate it !
2786 */
2787xmlChar *
2788xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2789 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002790 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002791 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2792 end, end2, end3));
2793}
Owen Taylor3473f882001-02-23 17:55:21 +00002794
2795/************************************************************************
2796 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002797 * Commodity functions, cleanup needed ? *
2798 * *
2799 ************************************************************************/
2800
2801/**
2802 * areBlanks:
2803 * @ctxt: an XML parser context
2804 * @str: a xmlChar *
2805 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002806 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002807 *
2808 * Is this a sequence of blank chars that one can ignore ?
2809 *
2810 * Returns 1 if ignorable 0 otherwise.
2811 */
2812
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002813static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2814 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002815 int i, ret;
2816 xmlNodePtr lastChild;
2817
Daniel Veillard05c13a22001-09-09 08:38:09 +00002818 /*
2819 * Don't spend time trying to differentiate them, the same callback is
2820 * used !
2821 */
2822 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002823 return(0);
2824
Owen Taylor3473f882001-02-23 17:55:21 +00002825 /*
2826 * Check for xml:space value.
2827 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002828 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2829 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002830 return(0);
2831
2832 /*
2833 * Check that the string is made of blanks
2834 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002835 if (blank_chars == 0) {
2836 for (i = 0;i < len;i++)
2837 if (!(IS_BLANK_CH(str[i]))) return(0);
2838 }
Owen Taylor3473f882001-02-23 17:55:21 +00002839
2840 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002841 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002842 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002843 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002844 if (ctxt->myDoc != NULL) {
2845 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2846 if (ret == 0) return(1);
2847 if (ret == 1) return(0);
2848 }
2849
2850 /*
2851 * Otherwise, heuristic :-\
2852 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002853 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002854 if ((ctxt->node->children == NULL) &&
2855 (RAW == '<') && (NXT(1) == '/')) return(0);
2856
2857 lastChild = xmlGetLastChild(ctxt->node);
2858 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002859 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2860 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002861 } else if (xmlNodeIsText(lastChild))
2862 return(0);
2863 else if ((ctxt->node->children != NULL) &&
2864 (xmlNodeIsText(ctxt->node->children)))
2865 return(0);
2866 return(1);
2867}
2868
Owen Taylor3473f882001-02-23 17:55:21 +00002869/************************************************************************
2870 * *
2871 * Extra stuff for namespace support *
2872 * Relates to http://www.w3.org/TR/WD-xml-names *
2873 * *
2874 ************************************************************************/
2875
2876/**
2877 * xmlSplitQName:
2878 * @ctxt: an XML parser context
2879 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002880 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002881 *
2882 * parse an UTF8 encoded XML qualified name string
2883 *
2884 * [NS 5] QName ::= (Prefix ':')? LocalPart
2885 *
2886 * [NS 6] Prefix ::= NCName
2887 *
2888 * [NS 7] LocalPart ::= NCName
2889 *
2890 * Returns the local part, and prefix is updated
2891 * to get the Prefix if any.
2892 */
2893
2894xmlChar *
2895xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2896 xmlChar buf[XML_MAX_NAMELEN + 5];
2897 xmlChar *buffer = NULL;
2898 int len = 0;
2899 int max = XML_MAX_NAMELEN;
2900 xmlChar *ret = NULL;
2901 const xmlChar *cur = name;
2902 int c;
2903
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002904 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002905 *prefix = NULL;
2906
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002907 if (cur == NULL) return(NULL);
2908
Owen Taylor3473f882001-02-23 17:55:21 +00002909#ifndef XML_XML_NAMESPACE
2910 /* xml: prefix is not really a namespace */
2911 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2912 (cur[2] == 'l') && (cur[3] == ':'))
2913 return(xmlStrdup(name));
2914#endif
2915
Daniel Veillard597bc482003-07-24 16:08:28 +00002916 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002917 if (cur[0] == ':')
2918 return(xmlStrdup(name));
2919
2920 c = *cur++;
2921 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2922 buf[len++] = c;
2923 c = *cur++;
2924 }
2925 if (len >= max) {
2926 /*
2927 * Okay someone managed to make a huge name, so he's ready to pay
2928 * for the processing speed.
2929 */
2930 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002931
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002932 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002933 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002934 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002935 return(NULL);
2936 }
2937 memcpy(buffer, buf, len);
2938 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2939 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002940 xmlChar *tmp;
2941
Owen Taylor3473f882001-02-23 17:55:21 +00002942 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002943 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002944 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002945 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002946 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002947 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002948 return(NULL);
2949 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002950 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002951 }
2952 buffer[len++] = c;
2953 c = *cur++;
2954 }
2955 buffer[len] = 0;
2956 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002957
Daniel Veillard597bc482003-07-24 16:08:28 +00002958 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002959 if (buffer != NULL)
2960 xmlFree(buffer);
2961 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002962 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002963 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002964
Owen Taylor3473f882001-02-23 17:55:21 +00002965 if (buffer == NULL)
2966 ret = xmlStrndup(buf, len);
2967 else {
2968 ret = buffer;
2969 buffer = NULL;
2970 max = XML_MAX_NAMELEN;
2971 }
2972
2973
2974 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002975 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002976 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002977 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002978 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002979 }
Owen Taylor3473f882001-02-23 17:55:21 +00002980 len = 0;
2981
Daniel Veillardbb284f42002-10-16 18:02:47 +00002982 /*
2983 * Check that the first character is proper to start
2984 * a new name
2985 */
2986 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2987 ((c >= 0x41) && (c <= 0x5A)) ||
2988 (c == '_') || (c == ':'))) {
2989 int l;
2990 int first = CUR_SCHAR(cur, l);
2991
2992 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002993 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002994 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002995 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002996 }
2997 }
2998 cur++;
2999
Owen Taylor3473f882001-02-23 17:55:21 +00003000 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3001 buf[len++] = c;
3002 c = *cur++;
3003 }
3004 if (len >= max) {
3005 /*
3006 * Okay someone managed to make a huge name, so he's ready to pay
3007 * for the processing speed.
3008 */
3009 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003010
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003011 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003012 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003013 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003014 return(NULL);
3015 }
3016 memcpy(buffer, buf, len);
3017 while (c != 0) { /* tested bigname2.xml */
3018 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003019 xmlChar *tmp;
3020
Owen Taylor3473f882001-02-23 17:55:21 +00003021 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003022 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003023 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003024 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003025 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003026 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 return(NULL);
3028 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003029 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003030 }
3031 buffer[len++] = c;
3032 c = *cur++;
3033 }
3034 buffer[len] = 0;
3035 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003036
Owen Taylor3473f882001-02-23 17:55:21 +00003037 if (buffer == NULL)
3038 ret = xmlStrndup(buf, len);
3039 else {
3040 ret = buffer;
3041 }
3042 }
3043
3044 return(ret);
3045}
3046
3047/************************************************************************
3048 * *
3049 * The parser itself *
3050 * Relates to http://www.w3.org/TR/REC-xml *
3051 * *
3052 ************************************************************************/
3053
Daniel Veillard34e3f642008-07-29 09:02:27 +00003054/************************************************************************
3055 * *
3056 * Routines to parse Name, NCName and NmToken *
3057 * *
3058 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003059#ifdef DEBUG
3060static unsigned long nbParseName = 0;
3061static unsigned long nbParseNmToken = 0;
3062static unsigned long nbParseNCName = 0;
3063static unsigned long nbParseNCNameComplex = 0;
3064static unsigned long nbParseNameComplex = 0;
3065static unsigned long nbParseStringName = 0;
3066#endif
3067
Daniel Veillard34e3f642008-07-29 09:02:27 +00003068/*
3069 * The two following functions are related to the change of accepted
3070 * characters for Name and NmToken in the Revision 5 of XML-1.0
3071 * They correspond to the modified production [4] and the new production [4a]
3072 * changes in that revision. Also note that the macros used for the
3073 * productions Letter, Digit, CombiningChar and Extender are not needed
3074 * anymore.
3075 * We still keep compatibility to pre-revision5 parsing semantic if the
3076 * new XML_PARSE_OLD10 option is given to the parser.
3077 */
3078static int
3079xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3080 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3081 /*
3082 * Use the new checks of production [4] [4a] amd [5] of the
3083 * Update 5 of XML-1.0
3084 */
3085 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3086 (((c >= 'a') && (c <= 'z')) ||
3087 ((c >= 'A') && (c <= 'Z')) ||
3088 (c == '_') || (c == ':') ||
3089 ((c >= 0xC0) && (c <= 0xD6)) ||
3090 ((c >= 0xD8) && (c <= 0xF6)) ||
3091 ((c >= 0xF8) && (c <= 0x2FF)) ||
3092 ((c >= 0x370) && (c <= 0x37D)) ||
3093 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3094 ((c >= 0x200C) && (c <= 0x200D)) ||
3095 ((c >= 0x2070) && (c <= 0x218F)) ||
3096 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3097 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3098 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3099 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3100 ((c >= 0x10000) && (c <= 0xEFFFF))))
3101 return(1);
3102 } else {
3103 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3104 return(1);
3105 }
3106 return(0);
3107}
3108
3109static int
3110xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3111 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3112 /*
3113 * Use the new checks of production [4] [4a] amd [5] of the
3114 * Update 5 of XML-1.0
3115 */
3116 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3117 (((c >= 'a') && (c <= 'z')) ||
3118 ((c >= 'A') && (c <= 'Z')) ||
3119 ((c >= '0') && (c <= '9')) || /* !start */
3120 (c == '_') || (c == ':') ||
3121 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3122 ((c >= 0xC0) && (c <= 0xD6)) ||
3123 ((c >= 0xD8) && (c <= 0xF6)) ||
3124 ((c >= 0xF8) && (c <= 0x2FF)) ||
3125 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3126 ((c >= 0x370) && (c <= 0x37D)) ||
3127 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3128 ((c >= 0x200C) && (c <= 0x200D)) ||
3129 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3130 ((c >= 0x2070) && (c <= 0x218F)) ||
3131 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3132 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3133 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3134 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3135 ((c >= 0x10000) && (c <= 0xEFFFF))))
3136 return(1);
3137 } else {
3138 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3139 (c == '.') || (c == '-') ||
3140 (c == '_') || (c == ':') ||
3141 (IS_COMBINING(c)) ||
3142 (IS_EXTENDER(c)))
3143 return(1);
3144 }
3145 return(0);
3146}
3147
Daniel Veillarde57ec792003-09-10 10:50:59 +00003148static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003149 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003150
Daniel Veillard34e3f642008-07-29 09:02:27 +00003151static const xmlChar *
3152xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3153 int len = 0, l;
3154 int c;
3155 int count = 0;
3156
Daniel Veillardc6561462009-03-25 10:22:31 +00003157#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003158 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003159#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003160
3161 /*
3162 * Handler for more complex cases
3163 */
3164 GROW;
3165 c = CUR_CHAR(l);
3166 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3167 /*
3168 * Use the new checks of production [4] [4a] amd [5] of the
3169 * Update 5 of XML-1.0
3170 */
3171 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3172 (!(((c >= 'a') && (c <= 'z')) ||
3173 ((c >= 'A') && (c <= 'Z')) ||
3174 (c == '_') || (c == ':') ||
3175 ((c >= 0xC0) && (c <= 0xD6)) ||
3176 ((c >= 0xD8) && (c <= 0xF6)) ||
3177 ((c >= 0xF8) && (c <= 0x2FF)) ||
3178 ((c >= 0x370) && (c <= 0x37D)) ||
3179 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3180 ((c >= 0x200C) && (c <= 0x200D)) ||
3181 ((c >= 0x2070) && (c <= 0x218F)) ||
3182 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3183 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3184 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3185 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3186 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3187 return(NULL);
3188 }
3189 len += l;
3190 NEXTL(l);
3191 c = CUR_CHAR(l);
3192 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3193 (((c >= 'a') && (c <= 'z')) ||
3194 ((c >= 'A') && (c <= 'Z')) ||
3195 ((c >= '0') && (c <= '9')) || /* !start */
3196 (c == '_') || (c == ':') ||
3197 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3198 ((c >= 0xC0) && (c <= 0xD6)) ||
3199 ((c >= 0xD8) && (c <= 0xF6)) ||
3200 ((c >= 0xF8) && (c <= 0x2FF)) ||
3201 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3202 ((c >= 0x370) && (c <= 0x37D)) ||
3203 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3204 ((c >= 0x200C) && (c <= 0x200D)) ||
3205 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3206 ((c >= 0x2070) && (c <= 0x218F)) ||
3207 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3208 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3209 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3210 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3211 ((c >= 0x10000) && (c <= 0xEFFFF))
3212 )) {
3213 if (count++ > 100) {
3214 count = 0;
3215 GROW;
3216 }
3217 len += l;
3218 NEXTL(l);
3219 c = CUR_CHAR(l);
3220 }
3221 } else {
3222 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3223 (!IS_LETTER(c) && (c != '_') &&
3224 (c != ':'))) {
3225 return(NULL);
3226 }
3227 len += l;
3228 NEXTL(l);
3229 c = CUR_CHAR(l);
3230
3231 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3232 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3233 (c == '.') || (c == '-') ||
3234 (c == '_') || (c == ':') ||
3235 (IS_COMBINING(c)) ||
3236 (IS_EXTENDER(c)))) {
3237 if (count++ > 100) {
3238 count = 0;
3239 GROW;
3240 }
3241 len += l;
3242 NEXTL(l);
3243 c = CUR_CHAR(l);
3244 }
3245 }
3246 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3247 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3248 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3249}
3250
Owen Taylor3473f882001-02-23 17:55:21 +00003251/**
3252 * xmlParseName:
3253 * @ctxt: an XML parser context
3254 *
3255 * parse an XML name.
3256 *
3257 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3258 * CombiningChar | Extender
3259 *
3260 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3261 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003262 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003263 *
3264 * Returns the Name parsed or NULL
3265 */
3266
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003267const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003268xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003269 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003270 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003271 int count = 0;
3272
3273 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003274
Daniel Veillardc6561462009-03-25 10:22:31 +00003275#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003276 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003277#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003278
Daniel Veillard48b2f892001-02-25 16:11:03 +00003279 /*
3280 * Accelerator for simple ASCII names
3281 */
3282 in = ctxt->input->cur;
3283 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3284 ((*in >= 0x41) && (*in <= 0x5A)) ||
3285 (*in == '_') || (*in == ':')) {
3286 in++;
3287 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3288 ((*in >= 0x41) && (*in <= 0x5A)) ||
3289 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003290 (*in == '_') || (*in == '-') ||
3291 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003292 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003293 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003294 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003295 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003296 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003297 ctxt->nbChars += count;
3298 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003299 if (ret == NULL)
3300 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003301 return(ret);
3302 }
3303 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003304 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003305 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003306}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003307
Daniel Veillard34e3f642008-07-29 09:02:27 +00003308static const xmlChar *
3309xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3310 int len = 0, l;
3311 int c;
3312 int count = 0;
3313
Daniel Veillardc6561462009-03-25 10:22:31 +00003314#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003315 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003316#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003317
3318 /*
3319 * Handler for more complex cases
3320 */
3321 GROW;
3322 c = CUR_CHAR(l);
3323 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3324 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3325 return(NULL);
3326 }
3327
3328 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3329 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3330 if (count++ > 100) {
3331 count = 0;
3332 GROW;
3333 }
3334 len += l;
3335 NEXTL(l);
3336 c = CUR_CHAR(l);
3337 }
3338 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3339}
3340
3341/**
3342 * xmlParseNCName:
3343 * @ctxt: an XML parser context
3344 * @len: lenght of the string parsed
3345 *
3346 * parse an XML name.
3347 *
3348 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3349 * CombiningChar | Extender
3350 *
3351 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3352 *
3353 * Returns the Name parsed or NULL
3354 */
3355
3356static const xmlChar *
3357xmlParseNCName(xmlParserCtxtPtr ctxt) {
3358 const xmlChar *in;
3359 const xmlChar *ret;
3360 int count = 0;
3361
Daniel Veillardc6561462009-03-25 10:22:31 +00003362#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003363 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003364#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003365
3366 /*
3367 * Accelerator for simple ASCII names
3368 */
3369 in = ctxt->input->cur;
3370 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3371 ((*in >= 0x41) && (*in <= 0x5A)) ||
3372 (*in == '_')) {
3373 in++;
3374 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3375 ((*in >= 0x41) && (*in <= 0x5A)) ||
3376 ((*in >= 0x30) && (*in <= 0x39)) ||
3377 (*in == '_') || (*in == '-') ||
3378 (*in == '.'))
3379 in++;
3380 if ((*in > 0) && (*in < 0x80)) {
3381 count = in - ctxt->input->cur;
3382 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3383 ctxt->input->cur = in;
3384 ctxt->nbChars += count;
3385 ctxt->input->col += count;
3386 if (ret == NULL) {
3387 xmlErrMemory(ctxt, NULL);
3388 }
3389 return(ret);
3390 }
3391 }
3392 return(xmlParseNCNameComplex(ctxt));
3393}
3394
Daniel Veillard46de64e2002-05-29 08:21:33 +00003395/**
3396 * xmlParseNameAndCompare:
3397 * @ctxt: an XML parser context
3398 *
3399 * parse an XML name and compares for match
3400 * (specialized for endtag parsing)
3401 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003402 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3403 * and the name for mismatch
3404 */
3405
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003406static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003407xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003408 register const xmlChar *cmp = other;
3409 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003410 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003411
3412 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003413
Daniel Veillard46de64e2002-05-29 08:21:33 +00003414 in = ctxt->input->cur;
3415 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003416 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003417 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003418 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003419 }
William M. Brack76e95df2003-10-18 16:20:14 +00003420 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003421 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003422 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003423 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003424 }
3425 /* failure (or end of input buffer), check with full function */
3426 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003427 /* strings coming from the dictionnary direct compare possible */
3428 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003429 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003430 }
3431 return ret;
3432}
3433
Owen Taylor3473f882001-02-23 17:55:21 +00003434/**
3435 * xmlParseStringName:
3436 * @ctxt: an XML parser context
3437 * @str: a pointer to the string pointer (IN/OUT)
3438 *
3439 * parse an XML name.
3440 *
3441 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3442 * CombiningChar | Extender
3443 *
3444 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3445 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003446 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003447 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003448 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003449 * is updated to the current location in the string.
3450 */
3451
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003452static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003453xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3454 xmlChar buf[XML_MAX_NAMELEN + 5];
3455 const xmlChar *cur = *str;
3456 int len = 0, l;
3457 int c;
3458
Daniel Veillardc6561462009-03-25 10:22:31 +00003459#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003460 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003461#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003462
Owen Taylor3473f882001-02-23 17:55:21 +00003463 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003464 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003465 return(NULL);
3466 }
3467
Daniel Veillard34e3f642008-07-29 09:02:27 +00003468 COPY_BUF(l,buf,len,c);
3469 cur += l;
3470 c = CUR_SCHAR(cur, l);
3471 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003472 COPY_BUF(l,buf,len,c);
3473 cur += l;
3474 c = CUR_SCHAR(cur, l);
3475 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3476 /*
3477 * Okay someone managed to make a huge name, so he's ready to pay
3478 * for the processing speed.
3479 */
3480 xmlChar *buffer;
3481 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003482
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003483 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003484 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003485 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003486 return(NULL);
3487 }
3488 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003489 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003490 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003491 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003492 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003493 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003494 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003495 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003496 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003497 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003498 return(NULL);
3499 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003500 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003501 }
3502 COPY_BUF(l,buffer,len,c);
3503 cur += l;
3504 c = CUR_SCHAR(cur, l);
3505 }
3506 buffer[len] = 0;
3507 *str = cur;
3508 return(buffer);
3509 }
3510 }
3511 *str = cur;
3512 return(xmlStrndup(buf, len));
3513}
3514
3515/**
3516 * xmlParseNmtoken:
3517 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003518 *
Owen Taylor3473f882001-02-23 17:55:21 +00003519 * parse an XML Nmtoken.
3520 *
3521 * [7] Nmtoken ::= (NameChar)+
3522 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003523 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003524 *
3525 * Returns the Nmtoken parsed or NULL
3526 */
3527
3528xmlChar *
3529xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3530 xmlChar buf[XML_MAX_NAMELEN + 5];
3531 int len = 0, l;
3532 int c;
3533 int count = 0;
3534
Daniel Veillardc6561462009-03-25 10:22:31 +00003535#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003536 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003537#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003538
Owen Taylor3473f882001-02-23 17:55:21 +00003539 GROW;
3540 c = CUR_CHAR(l);
3541
Daniel Veillard34e3f642008-07-29 09:02:27 +00003542 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003543 if (count++ > 100) {
3544 count = 0;
3545 GROW;
3546 }
3547 COPY_BUF(l,buf,len,c);
3548 NEXTL(l);
3549 c = CUR_CHAR(l);
3550 if (len >= XML_MAX_NAMELEN) {
3551 /*
3552 * Okay someone managed to make a huge token, so he's ready to pay
3553 * for the processing speed.
3554 */
3555 xmlChar *buffer;
3556 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003557
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003558 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003559 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003560 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003561 return(NULL);
3562 }
3563 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003564 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003565 if (count++ > 100) {
3566 count = 0;
3567 GROW;
3568 }
3569 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003570 xmlChar *tmp;
3571
Owen Taylor3473f882001-02-23 17:55:21 +00003572 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003573 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003574 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003575 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003576 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003577 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003578 return(NULL);
3579 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003580 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003581 }
3582 COPY_BUF(l,buffer,len,c);
3583 NEXTL(l);
3584 c = CUR_CHAR(l);
3585 }
3586 buffer[len] = 0;
3587 return(buffer);
3588 }
3589 }
3590 if (len == 0)
3591 return(NULL);
3592 return(xmlStrndup(buf, len));
3593}
3594
3595/**
3596 * xmlParseEntityValue:
3597 * @ctxt: an XML parser context
3598 * @orig: if non-NULL store a copy of the original entity value
3599 *
3600 * parse a value for ENTITY declarations
3601 *
3602 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3603 * "'" ([^%&'] | PEReference | Reference)* "'"
3604 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003605 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003606 */
3607
3608xmlChar *
3609xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3610 xmlChar *buf = NULL;
3611 int len = 0;
3612 int size = XML_PARSER_BUFFER_SIZE;
3613 int c, l;
3614 xmlChar stop;
3615 xmlChar *ret = NULL;
3616 const xmlChar *cur = NULL;
3617 xmlParserInputPtr input;
3618
3619 if (RAW == '"') stop = '"';
3620 else if (RAW == '\'') stop = '\'';
3621 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003622 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003623 return(NULL);
3624 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003625 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003626 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003627 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003628 return(NULL);
3629 }
3630
3631 /*
3632 * The content of the entity definition is copied in a buffer.
3633 */
3634
3635 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3636 input = ctxt->input;
3637 GROW;
3638 NEXT;
3639 c = CUR_CHAR(l);
3640 /*
3641 * NOTE: 4.4.5 Included in Literal
3642 * When a parameter entity reference appears in a literal entity
3643 * value, ... a single or double quote character in the replacement
3644 * text is always treated as a normal data character and will not
3645 * terminate the literal.
3646 * In practice it means we stop the loop only when back at parsing
3647 * the initial entity and the quote is found
3648 */
William M. Brack871611b2003-10-18 04:53:14 +00003649 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003650 (ctxt->input != input))) {
3651 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003652 xmlChar *tmp;
3653
Owen Taylor3473f882001-02-23 17:55:21 +00003654 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003655 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3656 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003657 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003658 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003659 return(NULL);
3660 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003661 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003662 }
3663 COPY_BUF(l,buf,len,c);
3664 NEXTL(l);
3665 /*
3666 * Pop-up of finished entities.
3667 */
3668 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3669 xmlPopInput(ctxt);
3670
3671 GROW;
3672 c = CUR_CHAR(l);
3673 if (c == 0) {
3674 GROW;
3675 c = CUR_CHAR(l);
3676 }
3677 }
3678 buf[len] = 0;
3679
3680 /*
3681 * Raise problem w.r.t. '&' and '%' being used in non-entities
3682 * reference constructs. Note Charref will be handled in
3683 * xmlStringDecodeEntities()
3684 */
3685 cur = buf;
3686 while (*cur != 0) { /* non input consuming */
3687 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3688 xmlChar *name;
3689 xmlChar tmp = *cur;
3690
3691 cur++;
3692 name = xmlParseStringName(ctxt, &cur);
3693 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003694 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003695 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003696 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003697 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003698 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3699 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003700 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003701 }
3702 if (name != NULL)
3703 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003704 if (*cur == 0)
3705 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003706 }
3707 cur++;
3708 }
3709
3710 /*
3711 * Then PEReference entities are substituted.
3712 */
3713 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003714 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003715 xmlFree(buf);
3716 } else {
3717 NEXT;
3718 /*
3719 * NOTE: 4.4.7 Bypassed
3720 * When a general entity reference appears in the EntityValue in
3721 * an entity declaration, it is bypassed and left as is.
3722 * so XML_SUBSTITUTE_REF is not set here.
3723 */
3724 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3725 0, 0, 0);
3726 if (orig != NULL)
3727 *orig = buf;
3728 else
3729 xmlFree(buf);
3730 }
3731
3732 return(ret);
3733}
3734
3735/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003736 * xmlParseAttValueComplex:
3737 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003738 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003739 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003740 *
3741 * parse a value for an attribute, this is the fallback function
3742 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003743 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003744 *
3745 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3746 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003747static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003748xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003749 xmlChar limit = 0;
3750 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003751 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003752 int len = 0;
3753 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003754 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003755 xmlChar *current = NULL;
3756 xmlEntityPtr ent;
3757
Owen Taylor3473f882001-02-23 17:55:21 +00003758 if (NXT(0) == '"') {
3759 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3760 limit = '"';
3761 NEXT;
3762 } else if (NXT(0) == '\'') {
3763 limit = '\'';
3764 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3765 NEXT;
3766 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003767 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003768 return(NULL);
3769 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003770
Owen Taylor3473f882001-02-23 17:55:21 +00003771 /*
3772 * allocate a translation buffer.
3773 */
3774 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003775 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003776 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003777
3778 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003779 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003780 */
3781 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003782 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003783 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003784 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003785 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003786 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003787 if (NXT(1) == '#') {
3788 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003789
Owen Taylor3473f882001-02-23 17:55:21 +00003790 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003791 if (ctxt->replaceEntities) {
3792 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003793 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003794 }
3795 buf[len++] = '&';
3796 } else {
3797 /*
3798 * The reparsing will be done in xmlStringGetNodeList()
3799 * called by the attribute() function in SAX.c
3800 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003801 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003802 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003803 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003804 buf[len++] = '&';
3805 buf[len++] = '#';
3806 buf[len++] = '3';
3807 buf[len++] = '8';
3808 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003809 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003810 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003811 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003812 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003813 }
Owen Taylor3473f882001-02-23 17:55:21 +00003814 len += xmlCopyChar(0, &buf[len], val);
3815 }
3816 } else {
3817 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003818 ctxt->nbentities++;
3819 if (ent != NULL)
3820 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003821 if ((ent != NULL) &&
3822 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3823 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003824 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003825 }
3826 if ((ctxt->replaceEntities == 0) &&
3827 (ent->content[0] == '&')) {
3828 buf[len++] = '&';
3829 buf[len++] = '#';
3830 buf[len++] = '3';
3831 buf[len++] = '8';
3832 buf[len++] = ';';
3833 } else {
3834 buf[len++] = ent->content[0];
3835 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003836 } else if ((ent != NULL) &&
3837 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003838 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3839 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003840 XML_SUBSTITUTE_REF,
3841 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003842 if (rep != NULL) {
3843 current = rep;
3844 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003845 if ((*current == 0xD) || (*current == 0xA) ||
3846 (*current == 0x9)) {
3847 buf[len++] = 0x20;
3848 current++;
3849 } else
3850 buf[len++] = *current++;
Owen Taylor3473f882001-02-23 17:55:21 +00003851 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003852 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003853 }
3854 }
3855 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003856 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003857 }
3858 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003859 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003860 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003861 }
Owen Taylor3473f882001-02-23 17:55:21 +00003862 if (ent->content != NULL)
3863 buf[len++] = ent->content[0];
3864 }
3865 } else if (ent != NULL) {
3866 int i = xmlStrlen(ent->name);
3867 const xmlChar *cur = ent->name;
3868
3869 /*
3870 * This may look absurd but is needed to detect
3871 * entities problems
3872 */
3873 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3874 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003875 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003876 XML_SUBSTITUTE_REF, 0, 0, 0);
3877 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003878 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003879 rep = NULL;
3880 }
Owen Taylor3473f882001-02-23 17:55:21 +00003881 }
3882
3883 /*
3884 * Just output the reference
3885 */
3886 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003887 while (len > buf_size - i - 10) {
3888 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003889 }
3890 for (;i > 0;i--)
3891 buf[len++] = *cur++;
3892 buf[len++] = ';';
3893 }
3894 }
3895 } else {
3896 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003897 if ((len != 0) || (!normalize)) {
3898 if ((!normalize) || (!in_space)) {
3899 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003900 while (len > buf_size - 10) {
3901 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003902 }
3903 }
3904 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003905 }
3906 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003907 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003908 COPY_BUF(l,buf,len,c);
3909 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003910 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003911 }
3912 }
3913 NEXTL(l);
3914 }
3915 GROW;
3916 c = CUR_CHAR(l);
3917 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003918 if ((in_space) && (normalize)) {
3919 while (buf[len - 1] == 0x20) len--;
3920 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003921 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003922 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003923 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003924 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003925 if ((c != 0) && (!IS_CHAR(c))) {
3926 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3927 "invalid character in attribute value\n");
3928 } else {
3929 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3930 "AttValue: ' expected\n");
3931 }
Owen Taylor3473f882001-02-23 17:55:21 +00003932 } else
3933 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003934 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003935 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003936
3937mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003938 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003939 if (buf != NULL)
3940 xmlFree(buf);
3941 if (rep != NULL)
3942 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003943 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003944}
3945
3946/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003947 * xmlParseAttValue:
3948 * @ctxt: an XML parser context
3949 *
3950 * parse a value for an attribute
3951 * Note: the parser won't do substitution of entities here, this
3952 * will be handled later in xmlStringGetNodeList
3953 *
3954 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3955 * "'" ([^<&'] | Reference)* "'"
3956 *
3957 * 3.3.3 Attribute-Value Normalization:
3958 * Before the value of an attribute is passed to the application or
3959 * checked for validity, the XML processor must normalize it as follows:
3960 * - a character reference is processed by appending the referenced
3961 * character to the attribute value
3962 * - an entity reference is processed by recursively processing the
3963 * replacement text of the entity
3964 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3965 * appending #x20 to the normalized value, except that only a single
3966 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3967 * parsed entity or the literal entity value of an internal parsed entity
3968 * - other characters are processed by appending them to the normalized value
3969 * If the declared value is not CDATA, then the XML processor must further
3970 * process the normalized attribute value by discarding any leading and
3971 * trailing space (#x20) characters, and by replacing sequences of space
3972 * (#x20) characters by a single space (#x20) character.
3973 * All attributes for which no declaration has been read should be treated
3974 * by a non-validating parser as if declared CDATA.
3975 *
3976 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3977 */
3978
3979
3980xmlChar *
3981xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003982 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003983 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003984}
3985
3986/**
Owen Taylor3473f882001-02-23 17:55:21 +00003987 * xmlParseSystemLiteral:
3988 * @ctxt: an XML parser context
3989 *
3990 * parse an XML Literal
3991 *
3992 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3993 *
3994 * Returns the SystemLiteral parsed or NULL
3995 */
3996
3997xmlChar *
3998xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3999 xmlChar *buf = NULL;
4000 int len = 0;
4001 int size = XML_PARSER_BUFFER_SIZE;
4002 int cur, l;
4003 xmlChar stop;
4004 int state = ctxt->instate;
4005 int count = 0;
4006
4007 SHRINK;
4008 if (RAW == '"') {
4009 NEXT;
4010 stop = '"';
4011 } else if (RAW == '\'') {
4012 NEXT;
4013 stop = '\'';
4014 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004015 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004016 return(NULL);
4017 }
4018
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004019 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004020 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004021 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004022 return(NULL);
4023 }
4024 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4025 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004026 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004027 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004028 xmlChar *tmp;
4029
Owen Taylor3473f882001-02-23 17:55:21 +00004030 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004031 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4032 if (tmp == NULL) {
4033 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004034 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004035 ctxt->instate = (xmlParserInputState) state;
4036 return(NULL);
4037 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004038 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004039 }
4040 count++;
4041 if (count > 50) {
4042 GROW;
4043 count = 0;
4044 }
4045 COPY_BUF(l,buf,len,cur);
4046 NEXTL(l);
4047 cur = CUR_CHAR(l);
4048 if (cur == 0) {
4049 GROW;
4050 SHRINK;
4051 cur = CUR_CHAR(l);
4052 }
4053 }
4054 buf[len] = 0;
4055 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004056 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004057 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004058 } else {
4059 NEXT;
4060 }
4061 return(buf);
4062}
4063
4064/**
4065 * xmlParsePubidLiteral:
4066 * @ctxt: an XML parser context
4067 *
4068 * parse an XML public literal
4069 *
4070 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4071 *
4072 * Returns the PubidLiteral parsed or NULL.
4073 */
4074
4075xmlChar *
4076xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4077 xmlChar *buf = NULL;
4078 int len = 0;
4079 int size = XML_PARSER_BUFFER_SIZE;
4080 xmlChar cur;
4081 xmlChar stop;
4082 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004083 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004084
4085 SHRINK;
4086 if (RAW == '"') {
4087 NEXT;
4088 stop = '"';
4089 } else if (RAW == '\'') {
4090 NEXT;
4091 stop = '\'';
4092 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004093 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004094 return(NULL);
4095 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004096 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004097 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004098 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004099 return(NULL);
4100 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004101 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004102 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004103 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004104 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004105 xmlChar *tmp;
4106
Owen Taylor3473f882001-02-23 17:55:21 +00004107 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004108 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4109 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004110 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004111 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004112 return(NULL);
4113 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004114 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004115 }
4116 buf[len++] = cur;
4117 count++;
4118 if (count > 50) {
4119 GROW;
4120 count = 0;
4121 }
4122 NEXT;
4123 cur = CUR;
4124 if (cur == 0) {
4125 GROW;
4126 SHRINK;
4127 cur = CUR;
4128 }
4129 }
4130 buf[len] = 0;
4131 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004132 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004133 } else {
4134 NEXT;
4135 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004136 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004137 return(buf);
4138}
4139
Daniel Veillard8ed10722009-08-20 19:17:36 +02004140static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004141
4142/*
4143 * used for the test in the inner loop of the char data testing
4144 */
4145static const unsigned char test_char_data[256] = {
4146 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4147 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4148 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4149 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4150 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4151 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4152 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4153 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4154 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4155 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4156 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4157 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4158 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4159 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4160 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4161 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4162 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4163 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4164 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4165 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4166 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4167 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4168 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4169 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4170 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4171 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4172 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4173 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4174 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4175 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4178};
4179
Owen Taylor3473f882001-02-23 17:55:21 +00004180/**
4181 * xmlParseCharData:
4182 * @ctxt: an XML parser context
4183 * @cdata: int indicating whether we are within a CDATA section
4184 *
4185 * parse a CharData section.
4186 * if we are within a CDATA section ']]>' marks an end of section.
4187 *
4188 * The right angle bracket (>) may be represented using the string "&gt;",
4189 * and must, for compatibility, be escaped using "&gt;" or a character
4190 * reference when it appears in the string "]]>" in content, when that
4191 * string is not marking the end of a CDATA section.
4192 *
4193 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4194 */
4195
4196void
4197xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004198 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004199 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004200 int line = ctxt->input->line;
4201 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004202 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004203
4204 SHRINK;
4205 GROW;
4206 /*
4207 * Accelerated common case where input don't need to be
4208 * modified before passing it to the handler.
4209 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004210 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004211 in = ctxt->input->cur;
4212 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004213get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004214 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004215 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004216 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004217 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004218 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004219 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004220 goto get_more_space;
4221 }
4222 if (*in == '<') {
4223 nbchar = in - ctxt->input->cur;
4224 if (nbchar > 0) {
4225 const xmlChar *tmp = ctxt->input->cur;
4226 ctxt->input->cur = in;
4227
Daniel Veillard34099b42004-11-04 17:34:35 +00004228 if ((ctxt->sax != NULL) &&
4229 (ctxt->sax->ignorableWhitespace !=
4230 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004231 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004232 if (ctxt->sax->ignorableWhitespace != NULL)
4233 ctxt->sax->ignorableWhitespace(ctxt->userData,
4234 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004235 } else {
4236 if (ctxt->sax->characters != NULL)
4237 ctxt->sax->characters(ctxt->userData,
4238 tmp, nbchar);
4239 if (*ctxt->space == -1)
4240 *ctxt->space = -2;
4241 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004242 } else if ((ctxt->sax != NULL) &&
4243 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004244 ctxt->sax->characters(ctxt->userData,
4245 tmp, nbchar);
4246 }
4247 }
4248 return;
4249 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004250
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004251get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004252 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004253 while (test_char_data[*in]) {
4254 in++;
4255 ccol++;
4256 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004257 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004258 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004259 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004260 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004261 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004262 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004263 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004264 }
4265 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004266 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004267 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004268 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004269 return;
4270 }
4271 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004272 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004273 goto get_more;
4274 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004275 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004276 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004277 if ((ctxt->sax != NULL) &&
4278 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004279 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004280 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004281 const xmlChar *tmp = ctxt->input->cur;
4282 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004283
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004284 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004285 if (ctxt->sax->ignorableWhitespace != NULL)
4286 ctxt->sax->ignorableWhitespace(ctxt->userData,
4287 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004288 } else {
4289 if (ctxt->sax->characters != NULL)
4290 ctxt->sax->characters(ctxt->userData,
4291 tmp, nbchar);
4292 if (*ctxt->space == -1)
4293 *ctxt->space = -2;
4294 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004295 line = ctxt->input->line;
4296 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004297 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004298 if (ctxt->sax->characters != NULL)
4299 ctxt->sax->characters(ctxt->userData,
4300 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004301 line = ctxt->input->line;
4302 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004303 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004304 /* something really bad happened in the SAX callback */
4305 if (ctxt->instate != XML_PARSER_CONTENT)
4306 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004307 }
4308 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004309 if (*in == 0xD) {
4310 in++;
4311 if (*in == 0xA) {
4312 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004313 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004314 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004315 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004316 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004317 in--;
4318 }
4319 if (*in == '<') {
4320 return;
4321 }
4322 if (*in == '&') {
4323 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004324 }
4325 SHRINK;
4326 GROW;
4327 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004328 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004329 nbchar = 0;
4330 }
Daniel Veillard50582112001-03-26 22:52:16 +00004331 ctxt->input->line = line;
4332 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004333 xmlParseCharDataComplex(ctxt, cdata);
4334}
4335
Daniel Veillard01c13b52002-12-10 15:19:08 +00004336/**
4337 * xmlParseCharDataComplex:
4338 * @ctxt: an XML parser context
4339 * @cdata: int indicating whether we are within a CDATA section
4340 *
4341 * parse a CharData section.this is the fallback function
4342 * of xmlParseCharData() when the parsing requires handling
4343 * of non-ASCII characters.
4344 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004345static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004346xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004347 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4348 int nbchar = 0;
4349 int cur, l;
4350 int count = 0;
4351
4352 SHRINK;
4353 GROW;
4354 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004355 while ((cur != '<') && /* checked */
4356 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004357 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004358 if ((cur == ']') && (NXT(1) == ']') &&
4359 (NXT(2) == '>')) {
4360 if (cdata) break;
4361 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004362 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004363 }
4364 }
4365 COPY_BUF(l,buf,nbchar,cur);
4366 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004367 buf[nbchar] = 0;
4368
Owen Taylor3473f882001-02-23 17:55:21 +00004369 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004370 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004371 */
4372 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004373 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004374 if (ctxt->sax->ignorableWhitespace != NULL)
4375 ctxt->sax->ignorableWhitespace(ctxt->userData,
4376 buf, nbchar);
4377 } else {
4378 if (ctxt->sax->characters != NULL)
4379 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004380 if ((ctxt->sax->characters !=
4381 ctxt->sax->ignorableWhitespace) &&
4382 (*ctxt->space == -1))
4383 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004384 }
4385 }
4386 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004387 /* something really bad happened in the SAX callback */
4388 if (ctxt->instate != XML_PARSER_CONTENT)
4389 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004390 }
4391 count++;
4392 if (count > 50) {
4393 GROW;
4394 count = 0;
4395 }
4396 NEXTL(l);
4397 cur = CUR_CHAR(l);
4398 }
4399 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004400 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004401 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004402 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004403 */
4404 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004405 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004406 if (ctxt->sax->ignorableWhitespace != NULL)
4407 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4408 } else {
4409 if (ctxt->sax->characters != NULL)
4410 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004411 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4412 (*ctxt->space == -1))
4413 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004414 }
4415 }
4416 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004417 if ((cur != 0) && (!IS_CHAR(cur))) {
4418 /* Generate the error and skip the offending character */
4419 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4420 "PCDATA invalid Char value %d\n",
4421 cur);
4422 NEXTL(l);
4423 }
Owen Taylor3473f882001-02-23 17:55:21 +00004424}
4425
4426/**
4427 * xmlParseExternalID:
4428 * @ctxt: an XML parser context
4429 * @publicID: a xmlChar** receiving PubidLiteral
4430 * @strict: indicate whether we should restrict parsing to only
4431 * production [75], see NOTE below
4432 *
4433 * Parse an External ID or a Public ID
4434 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004435 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004436 * 'PUBLIC' S PubidLiteral S SystemLiteral
4437 *
4438 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4439 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4440 *
4441 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4442 *
4443 * Returns the function returns SystemLiteral and in the second
4444 * case publicID receives PubidLiteral, is strict is off
4445 * it is possible to return NULL and have publicID set.
4446 */
4447
4448xmlChar *
4449xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4450 xmlChar *URI = NULL;
4451
4452 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004453
4454 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004455 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004456 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004457 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004458 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4459 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004460 }
4461 SKIP_BLANKS;
4462 URI = xmlParseSystemLiteral(ctxt);
4463 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004464 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004465 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004466 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004467 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004468 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004469 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004470 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004471 }
4472 SKIP_BLANKS;
4473 *publicID = xmlParsePubidLiteral(ctxt);
4474 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004475 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004476 }
4477 if (strict) {
4478 /*
4479 * We don't handle [83] so "S SystemLiteral" is required.
4480 */
William M. Brack76e95df2003-10-18 16:20:14 +00004481 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004482 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004483 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004484 }
4485 } else {
4486 /*
4487 * We handle [83] so we return immediately, if
4488 * "S SystemLiteral" is not detected. From a purely parsing
4489 * point of view that's a nice mess.
4490 */
4491 const xmlChar *ptr;
4492 GROW;
4493
4494 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004495 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004496
William M. Brack76e95df2003-10-18 16:20:14 +00004497 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004498 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4499 }
4500 SKIP_BLANKS;
4501 URI = xmlParseSystemLiteral(ctxt);
4502 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004503 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004504 }
4505 }
4506 return(URI);
4507}
4508
4509/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004510 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004511 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004512 * @buf: the already parsed part of the buffer
4513 * @len: number of bytes filles in the buffer
4514 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004515 *
4516 * Skip an XML (SGML) comment <!-- .... -->
4517 * The spec says that "For compatibility, the string "--" (double-hyphen)
4518 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004519 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004520 *
4521 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4522 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004523static void
4524xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004525 int q, ql;
4526 int r, rl;
4527 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004528 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004529 int inputid;
4530
4531 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004532
Owen Taylor3473f882001-02-23 17:55:21 +00004533 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004534 len = 0;
4535 size = XML_PARSER_BUFFER_SIZE;
4536 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4537 if (buf == NULL) {
4538 xmlErrMemory(ctxt, NULL);
4539 return;
4540 }
Owen Taylor3473f882001-02-23 17:55:21 +00004541 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004542 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004543 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004544 if (q == 0)
4545 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004546 if (!IS_CHAR(q)) {
4547 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4548 "xmlParseComment: invalid xmlChar value %d\n",
4549 q);
4550 xmlFree (buf);
4551 return;
4552 }
Owen Taylor3473f882001-02-23 17:55:21 +00004553 NEXTL(ql);
4554 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004555 if (r == 0)
4556 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004557 if (!IS_CHAR(r)) {
4558 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4559 "xmlParseComment: invalid xmlChar value %d\n",
4560 q);
4561 xmlFree (buf);
4562 return;
4563 }
Owen Taylor3473f882001-02-23 17:55:21 +00004564 NEXTL(rl);
4565 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004566 if (cur == 0)
4567 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004568 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004569 ((cur != '>') ||
4570 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004571 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004572 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004573 }
4574 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004575 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004576 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004577 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4578 if (new_buf == NULL) {
4579 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004580 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004581 return;
4582 }
William M. Bracka3215c72004-07-31 16:24:01 +00004583 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004584 }
4585 COPY_BUF(ql,buf,len,q);
4586 q = r;
4587 ql = rl;
4588 r = cur;
4589 rl = l;
4590
4591 count++;
4592 if (count > 50) {
4593 GROW;
4594 count = 0;
4595 }
4596 NEXTL(l);
4597 cur = CUR_CHAR(l);
4598 if (cur == 0) {
4599 SHRINK;
4600 GROW;
4601 cur = CUR_CHAR(l);
4602 }
4603 }
4604 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004605 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004606 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004607 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004608 } else if (!IS_CHAR(cur)) {
4609 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4610 "xmlParseComment: invalid xmlChar value %d\n",
4611 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004612 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004613 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004614 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4615 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004616 }
4617 NEXT;
4618 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4619 (!ctxt->disableSAX))
4620 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004621 }
Daniel Veillardda629342007-08-01 07:49:06 +00004622 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004623 return;
4624not_terminated:
4625 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4626 "Comment not terminated\n", NULL);
4627 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004628 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004629}
Daniel Veillardda629342007-08-01 07:49:06 +00004630
Daniel Veillard4c778d82005-01-23 17:37:44 +00004631/**
4632 * xmlParseComment:
4633 * @ctxt: an XML parser context
4634 *
4635 * Skip an XML (SGML) comment <!-- .... -->
4636 * The spec says that "For compatibility, the string "--" (double-hyphen)
4637 * must not occur within comments. "
4638 *
4639 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4640 */
4641void
4642xmlParseComment(xmlParserCtxtPtr ctxt) {
4643 xmlChar *buf = NULL;
4644 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004645 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004646 xmlParserInputState state;
4647 const xmlChar *in;
4648 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004649 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004650
4651 /*
4652 * Check that there is a comment right here.
4653 */
4654 if ((RAW != '<') || (NXT(1) != '!') ||
4655 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004656 state = ctxt->instate;
4657 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004658 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004659 SKIP(4);
4660 SHRINK;
4661 GROW;
4662
4663 /*
4664 * Accelerated common case where input don't need to be
4665 * modified before passing it to the handler.
4666 */
4667 in = ctxt->input->cur;
4668 do {
4669 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004670 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004671 ctxt->input->line++; ctxt->input->col = 1;
4672 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004673 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004674 }
4675get_more:
4676 ccol = ctxt->input->col;
4677 while (((*in > '-') && (*in <= 0x7F)) ||
4678 ((*in >= 0x20) && (*in < '-')) ||
4679 (*in == 0x09)) {
4680 in++;
4681 ccol++;
4682 }
4683 ctxt->input->col = ccol;
4684 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004685 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004686 ctxt->input->line++; ctxt->input->col = 1;
4687 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004688 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004689 goto get_more;
4690 }
4691 nbchar = in - ctxt->input->cur;
4692 /*
4693 * save current set of data
4694 */
4695 if (nbchar > 0) {
4696 if ((ctxt->sax != NULL) &&
4697 (ctxt->sax->comment != NULL)) {
4698 if (buf == NULL) {
4699 if ((*in == '-') && (in[1] == '-'))
4700 size = nbchar + 1;
4701 else
4702 size = XML_PARSER_BUFFER_SIZE + nbchar;
4703 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4704 if (buf == NULL) {
4705 xmlErrMemory(ctxt, NULL);
4706 ctxt->instate = state;
4707 return;
4708 }
4709 len = 0;
4710 } else if (len + nbchar + 1 >= size) {
4711 xmlChar *new_buf;
4712 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4713 new_buf = (xmlChar *) xmlRealloc(buf,
4714 size * sizeof(xmlChar));
4715 if (new_buf == NULL) {
4716 xmlFree (buf);
4717 xmlErrMemory(ctxt, NULL);
4718 ctxt->instate = state;
4719 return;
4720 }
4721 buf = new_buf;
4722 }
4723 memcpy(&buf[len], ctxt->input->cur, nbchar);
4724 len += nbchar;
4725 buf[len] = 0;
4726 }
4727 }
4728 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004729 if (*in == 0xA) {
4730 in++;
4731 ctxt->input->line++; ctxt->input->col = 1;
4732 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004733 if (*in == 0xD) {
4734 in++;
4735 if (*in == 0xA) {
4736 ctxt->input->cur = in;
4737 in++;
4738 ctxt->input->line++; ctxt->input->col = 1;
4739 continue; /* while */
4740 }
4741 in--;
4742 }
4743 SHRINK;
4744 GROW;
4745 in = ctxt->input->cur;
4746 if (*in == '-') {
4747 if (in[1] == '-') {
4748 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004749 if (ctxt->input->id != inputid) {
4750 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4751 "comment doesn't start and stop in the same entity\n");
4752 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004753 SKIP(3);
4754 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4755 (!ctxt->disableSAX)) {
4756 if (buf != NULL)
4757 ctxt->sax->comment(ctxt->userData, buf);
4758 else
4759 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4760 }
4761 if (buf != NULL)
4762 xmlFree(buf);
4763 ctxt->instate = state;
4764 return;
4765 }
4766 if (buf != NULL)
4767 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4768 "Comment not terminated \n<!--%.50s\n",
4769 buf);
4770 else
4771 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4772 "Comment not terminated \n", NULL);
4773 in++;
4774 ctxt->input->col++;
4775 }
4776 in++;
4777 ctxt->input->col++;
4778 goto get_more;
4779 }
4780 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4781 xmlParseCommentComplex(ctxt, buf, len, size);
4782 ctxt->instate = state;
4783 return;
4784}
4785
Owen Taylor3473f882001-02-23 17:55:21 +00004786
4787/**
4788 * xmlParsePITarget:
4789 * @ctxt: an XML parser context
4790 *
4791 * parse the name of a PI
4792 *
4793 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4794 *
4795 * Returns the PITarget name or NULL
4796 */
4797
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004798const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004799xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004800 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004801
4802 name = xmlParseName(ctxt);
4803 if ((name != NULL) &&
4804 ((name[0] == 'x') || (name[0] == 'X')) &&
4805 ((name[1] == 'm') || (name[1] == 'M')) &&
4806 ((name[2] == 'l') || (name[2] == 'L'))) {
4807 int i;
4808 if ((name[0] == 'x') && (name[1] == 'm') &&
4809 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004810 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004811 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004812 return(name);
4813 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004814 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004815 return(name);
4816 }
4817 for (i = 0;;i++) {
4818 if (xmlW3CPIs[i] == NULL) break;
4819 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4820 return(name);
4821 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004822 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4823 "xmlParsePITarget: invalid name prefix 'xml'\n",
4824 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004825 }
Daniel Veillard37334572008-07-31 08:20:02 +00004826 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4827 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4828 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4829 }
Owen Taylor3473f882001-02-23 17:55:21 +00004830 return(name);
4831}
4832
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004833#ifdef LIBXML_CATALOG_ENABLED
4834/**
4835 * xmlParseCatalogPI:
4836 * @ctxt: an XML parser context
4837 * @catalog: the PI value string
4838 *
4839 * parse an XML Catalog Processing Instruction.
4840 *
4841 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4842 *
4843 * Occurs only if allowed by the user and if happening in the Misc
4844 * part of the document before any doctype informations
4845 * This will add the given catalog to the parsing context in order
4846 * to be used if there is a resolution need further down in the document
4847 */
4848
4849static void
4850xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4851 xmlChar *URL = NULL;
4852 const xmlChar *tmp, *base;
4853 xmlChar marker;
4854
4855 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004856 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004857 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4858 goto error;
4859 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004860 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004861 if (*tmp != '=') {
4862 return;
4863 }
4864 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004865 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004866 marker = *tmp;
4867 if ((marker != '\'') && (marker != '"'))
4868 goto error;
4869 tmp++;
4870 base = tmp;
4871 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4872 if (*tmp == 0)
4873 goto error;
4874 URL = xmlStrndup(base, tmp - base);
4875 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004876 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004877 if (*tmp != 0)
4878 goto error;
4879
4880 if (URL != NULL) {
4881 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4882 xmlFree(URL);
4883 }
4884 return;
4885
4886error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004887 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4888 "Catalog PI syntax error: %s\n",
4889 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004890 if (URL != NULL)
4891 xmlFree(URL);
4892}
4893#endif
4894
Owen Taylor3473f882001-02-23 17:55:21 +00004895/**
4896 * xmlParsePI:
4897 * @ctxt: an XML parser context
4898 *
4899 * parse an XML Processing Instruction.
4900 *
4901 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4902 *
4903 * The processing is transfered to SAX once parsed.
4904 */
4905
4906void
4907xmlParsePI(xmlParserCtxtPtr ctxt) {
4908 xmlChar *buf = NULL;
4909 int len = 0;
4910 int size = XML_PARSER_BUFFER_SIZE;
4911 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004912 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004913 xmlParserInputState state;
4914 int count = 0;
4915
4916 if ((RAW == '<') && (NXT(1) == '?')) {
4917 xmlParserInputPtr input = ctxt->input;
4918 state = ctxt->instate;
4919 ctxt->instate = XML_PARSER_PI;
4920 /*
4921 * this is a Processing Instruction.
4922 */
4923 SKIP(2);
4924 SHRINK;
4925
4926 /*
4927 * Parse the target name and check for special support like
4928 * namespace.
4929 */
4930 target = xmlParsePITarget(ctxt);
4931 if (target != NULL) {
4932 if ((RAW == '?') && (NXT(1) == '>')) {
4933 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004934 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4935 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004936 }
4937 SKIP(2);
4938
4939 /*
4940 * SAX: PI detected.
4941 */
4942 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4943 (ctxt->sax->processingInstruction != NULL))
4944 ctxt->sax->processingInstruction(ctxt->userData,
4945 target, NULL);
4946 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004947 return;
4948 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004949 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004950 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004951 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004952 ctxt->instate = state;
4953 return;
4954 }
4955 cur = CUR;
4956 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004957 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4958 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004959 }
4960 SKIP_BLANKS;
4961 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004962 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004963 ((cur != '?') || (NXT(1) != '>'))) {
4964 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004965 xmlChar *tmp;
4966
Owen Taylor3473f882001-02-23 17:55:21 +00004967 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004968 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4969 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004970 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004971 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004972 ctxt->instate = state;
4973 return;
4974 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004975 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004976 }
4977 count++;
4978 if (count > 50) {
4979 GROW;
4980 count = 0;
4981 }
4982 COPY_BUF(l,buf,len,cur);
4983 NEXTL(l);
4984 cur = CUR_CHAR(l);
4985 if (cur == 0) {
4986 SHRINK;
4987 GROW;
4988 cur = CUR_CHAR(l);
4989 }
4990 }
4991 buf[len] = 0;
4992 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004993 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4994 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004995 } else {
4996 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004997 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4998 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004999 }
5000 SKIP(2);
5001
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005002#ifdef LIBXML_CATALOG_ENABLED
5003 if (((state == XML_PARSER_MISC) ||
5004 (state == XML_PARSER_START)) &&
5005 (xmlStrEqual(target, XML_CATALOG_PI))) {
5006 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5007 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5008 (allow == XML_CATA_ALLOW_ALL))
5009 xmlParseCatalogPI(ctxt, buf);
5010 }
5011#endif
5012
5013
Owen Taylor3473f882001-02-23 17:55:21 +00005014 /*
5015 * SAX: PI detected.
5016 */
5017 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5018 (ctxt->sax->processingInstruction != NULL))
5019 ctxt->sax->processingInstruction(ctxt->userData,
5020 target, buf);
5021 }
5022 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005023 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005024 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005025 }
5026 ctxt->instate = state;
5027 }
5028}
5029
5030/**
5031 * xmlParseNotationDecl:
5032 * @ctxt: an XML parser context
5033 *
5034 * parse a notation declaration
5035 *
5036 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5037 *
5038 * Hence there is actually 3 choices:
5039 * 'PUBLIC' S PubidLiteral
5040 * 'PUBLIC' S PubidLiteral S SystemLiteral
5041 * and 'SYSTEM' S SystemLiteral
5042 *
5043 * See the NOTE on xmlParseExternalID().
5044 */
5045
5046void
5047xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005048 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005049 xmlChar *Pubid;
5050 xmlChar *Systemid;
5051
Daniel Veillarda07050d2003-10-19 14:46:32 +00005052 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005053 xmlParserInputPtr input = ctxt->input;
5054 SHRINK;
5055 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005056 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005057 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5058 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005059 return;
5060 }
5061 SKIP_BLANKS;
5062
Daniel Veillard76d66f42001-05-16 21:05:17 +00005063 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005064 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005065 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005066 return;
5067 }
William M. Brack76e95df2003-10-18 16:20:14 +00005068 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005069 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005070 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005071 return;
5072 }
Daniel Veillard37334572008-07-31 08:20:02 +00005073 if (xmlStrchr(name, ':') != NULL) {
5074 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5075 "colon are forbidden from notation names '%s'\n",
5076 name, NULL, NULL);
5077 }
Owen Taylor3473f882001-02-23 17:55:21 +00005078 SKIP_BLANKS;
5079
5080 /*
5081 * Parse the IDs.
5082 */
5083 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5084 SKIP_BLANKS;
5085
5086 if (RAW == '>') {
5087 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005088 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5089 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005090 }
5091 NEXT;
5092 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5093 (ctxt->sax->notationDecl != NULL))
5094 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5095 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005096 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005097 }
Owen Taylor3473f882001-02-23 17:55:21 +00005098 if (Systemid != NULL) xmlFree(Systemid);
5099 if (Pubid != NULL) xmlFree(Pubid);
5100 }
5101}
5102
5103/**
5104 * xmlParseEntityDecl:
5105 * @ctxt: an XML parser context
5106 *
5107 * parse <!ENTITY declarations
5108 *
5109 * [70] EntityDecl ::= GEDecl | PEDecl
5110 *
5111 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5112 *
5113 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5114 *
5115 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5116 *
5117 * [74] PEDef ::= EntityValue | ExternalID
5118 *
5119 * [76] NDataDecl ::= S 'NDATA' S Name
5120 *
5121 * [ VC: Notation Declared ]
5122 * The Name must match the declared name of a notation.
5123 */
5124
5125void
5126xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005127 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005128 xmlChar *value = NULL;
5129 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005130 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005131 int isParameter = 0;
5132 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005133 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005134
Daniel Veillard4c778d82005-01-23 17:37:44 +00005135 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005136 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005137 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005138 SHRINK;
5139 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005140 skipped = SKIP_BLANKS;
5141 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005142 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5143 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005144 }
Owen Taylor3473f882001-02-23 17:55:21 +00005145
5146 if (RAW == '%') {
5147 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005148 skipped = SKIP_BLANKS;
5149 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005150 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5151 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005152 }
Owen Taylor3473f882001-02-23 17:55:21 +00005153 isParameter = 1;
5154 }
5155
Daniel Veillard76d66f42001-05-16 21:05:17 +00005156 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005157 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005158 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5159 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005160 return;
5161 }
Daniel Veillard37334572008-07-31 08:20:02 +00005162 if (xmlStrchr(name, ':') != NULL) {
5163 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5164 "colon are forbidden from entities names '%s'\n",
5165 name, NULL, NULL);
5166 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005167 skipped = SKIP_BLANKS;
5168 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005169 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5170 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005171 }
Owen Taylor3473f882001-02-23 17:55:21 +00005172
Daniel Veillardf5582f12002-06-11 10:08:16 +00005173 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005174 /*
5175 * handle the various case of definitions...
5176 */
5177 if (isParameter) {
5178 if ((RAW == '"') || (RAW == '\'')) {
5179 value = xmlParseEntityValue(ctxt, &orig);
5180 if (value) {
5181 if ((ctxt->sax != NULL) &&
5182 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5183 ctxt->sax->entityDecl(ctxt->userData, name,
5184 XML_INTERNAL_PARAMETER_ENTITY,
5185 NULL, NULL, value);
5186 }
5187 } else {
5188 URI = xmlParseExternalID(ctxt, &literal, 1);
5189 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005190 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005191 }
5192 if (URI) {
5193 xmlURIPtr uri;
5194
5195 uri = xmlParseURI((const char *) URI);
5196 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005197 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5198 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005199 /*
5200 * This really ought to be a well formedness error
5201 * but the XML Core WG decided otherwise c.f. issue
5202 * E26 of the XML erratas.
5203 */
Owen Taylor3473f882001-02-23 17:55:21 +00005204 } else {
5205 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005206 /*
5207 * Okay this is foolish to block those but not
5208 * invalid URIs.
5209 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005210 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005211 } else {
5212 if ((ctxt->sax != NULL) &&
5213 (!ctxt->disableSAX) &&
5214 (ctxt->sax->entityDecl != NULL))
5215 ctxt->sax->entityDecl(ctxt->userData, name,
5216 XML_EXTERNAL_PARAMETER_ENTITY,
5217 literal, URI, NULL);
5218 }
5219 xmlFreeURI(uri);
5220 }
5221 }
5222 }
5223 } else {
5224 if ((RAW == '"') || (RAW == '\'')) {
5225 value = xmlParseEntityValue(ctxt, &orig);
5226 if ((ctxt->sax != NULL) &&
5227 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5228 ctxt->sax->entityDecl(ctxt->userData, name,
5229 XML_INTERNAL_GENERAL_ENTITY,
5230 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005231 /*
5232 * For expat compatibility in SAX mode.
5233 */
5234 if ((ctxt->myDoc == NULL) ||
5235 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5236 if (ctxt->myDoc == NULL) {
5237 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005238 if (ctxt->myDoc == NULL) {
5239 xmlErrMemory(ctxt, "New Doc failed");
5240 return;
5241 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005242 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005243 }
5244 if (ctxt->myDoc->intSubset == NULL)
5245 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5246 BAD_CAST "fake", NULL, NULL);
5247
Daniel Veillard1af9a412003-08-20 22:54:39 +00005248 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5249 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005250 }
Owen Taylor3473f882001-02-23 17:55:21 +00005251 } else {
5252 URI = xmlParseExternalID(ctxt, &literal, 1);
5253 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005254 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005255 }
5256 if (URI) {
5257 xmlURIPtr uri;
5258
5259 uri = xmlParseURI((const char *)URI);
5260 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005261 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5262 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005263 /*
5264 * This really ought to be a well formedness error
5265 * but the XML Core WG decided otherwise c.f. issue
5266 * E26 of the XML erratas.
5267 */
Owen Taylor3473f882001-02-23 17:55:21 +00005268 } else {
5269 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005270 /*
5271 * Okay this is foolish to block those but not
5272 * invalid URIs.
5273 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005274 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005275 }
5276 xmlFreeURI(uri);
5277 }
5278 }
William M. Brack76e95df2003-10-18 16:20:14 +00005279 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005280 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5281 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005282 }
5283 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005284 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005285 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005286 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005287 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5288 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005289 }
5290 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005291 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005292 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5293 (ctxt->sax->unparsedEntityDecl != NULL))
5294 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5295 literal, URI, ndata);
5296 } else {
5297 if ((ctxt->sax != NULL) &&
5298 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5299 ctxt->sax->entityDecl(ctxt->userData, name,
5300 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5301 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005302 /*
5303 * For expat compatibility in SAX mode.
5304 * assuming the entity repalcement was asked for
5305 */
5306 if ((ctxt->replaceEntities != 0) &&
5307 ((ctxt->myDoc == NULL) ||
5308 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5309 if (ctxt->myDoc == NULL) {
5310 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005311 if (ctxt->myDoc == NULL) {
5312 xmlErrMemory(ctxt, "New Doc failed");
5313 return;
5314 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005315 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005316 }
5317
5318 if (ctxt->myDoc->intSubset == NULL)
5319 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5320 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005321 xmlSAX2EntityDecl(ctxt, name,
5322 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5323 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005324 }
Owen Taylor3473f882001-02-23 17:55:21 +00005325 }
5326 }
5327 }
5328 SKIP_BLANKS;
5329 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005330 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005331 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005332 } else {
5333 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005334 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5335 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005336 }
5337 NEXT;
5338 }
5339 if (orig != NULL) {
5340 /*
5341 * Ugly mechanism to save the raw entity value.
5342 */
5343 xmlEntityPtr cur = NULL;
5344
5345 if (isParameter) {
5346 if ((ctxt->sax != NULL) &&
5347 (ctxt->sax->getParameterEntity != NULL))
5348 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5349 } else {
5350 if ((ctxt->sax != NULL) &&
5351 (ctxt->sax->getEntity != NULL))
5352 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005353 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005354 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005355 }
Owen Taylor3473f882001-02-23 17:55:21 +00005356 }
5357 if (cur != NULL) {
5358 if (cur->orig != NULL)
5359 xmlFree(orig);
5360 else
5361 cur->orig = orig;
5362 } else
5363 xmlFree(orig);
5364 }
Owen Taylor3473f882001-02-23 17:55:21 +00005365 if (value != NULL) xmlFree(value);
5366 if (URI != NULL) xmlFree(URI);
5367 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005368 }
5369}
5370
5371/**
5372 * xmlParseDefaultDecl:
5373 * @ctxt: an XML parser context
5374 * @value: Receive a possible fixed default value for the attribute
5375 *
5376 * Parse an attribute default declaration
5377 *
5378 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5379 *
5380 * [ VC: Required Attribute ]
5381 * if the default declaration is the keyword #REQUIRED, then the
5382 * attribute must be specified for all elements of the type in the
5383 * attribute-list declaration.
5384 *
5385 * [ VC: Attribute Default Legal ]
5386 * The declared default value must meet the lexical constraints of
5387 * the declared attribute type c.f. xmlValidateAttributeDecl()
5388 *
5389 * [ VC: Fixed Attribute Default ]
5390 * if an attribute has a default value declared with the #FIXED
5391 * keyword, instances of that attribute must match the default value.
5392 *
5393 * [ WFC: No < in Attribute Values ]
5394 * handled in xmlParseAttValue()
5395 *
5396 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5397 * or XML_ATTRIBUTE_FIXED.
5398 */
5399
5400int
5401xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5402 int val;
5403 xmlChar *ret;
5404
5405 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005406 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005407 SKIP(9);
5408 return(XML_ATTRIBUTE_REQUIRED);
5409 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005410 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005411 SKIP(8);
5412 return(XML_ATTRIBUTE_IMPLIED);
5413 }
5414 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005415 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005416 SKIP(6);
5417 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005418 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005419 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5420 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005421 }
5422 SKIP_BLANKS;
5423 }
5424 ret = xmlParseAttValue(ctxt);
5425 ctxt->instate = XML_PARSER_DTD;
5426 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005427 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005428 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005429 } else
5430 *value = ret;
5431 return(val);
5432}
5433
5434/**
5435 * xmlParseNotationType:
5436 * @ctxt: an XML parser context
5437 *
5438 * parse an Notation attribute type.
5439 *
5440 * Note: the leading 'NOTATION' S part has already being parsed...
5441 *
5442 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5443 *
5444 * [ VC: Notation Attributes ]
5445 * Values of this type must match one of the notation names included
5446 * in the declaration; all notation names in the declaration must be declared.
5447 *
5448 * Returns: the notation attribute tree built while parsing
5449 */
5450
5451xmlEnumerationPtr
5452xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005453 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005454 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005455
5456 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005457 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005458 return(NULL);
5459 }
5460 SHRINK;
5461 do {
5462 NEXT;
5463 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005464 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005465 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005466 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5467 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005468 xmlFreeEnumeration(ret);
5469 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005470 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005471 tmp = ret;
5472 while (tmp != NULL) {
5473 if (xmlStrEqual(name, tmp->name)) {
5474 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5475 "standalone: attribute notation value token %s duplicated\n",
5476 name, NULL);
5477 if (!xmlDictOwns(ctxt->dict, name))
5478 xmlFree((xmlChar *) name);
5479 break;
5480 }
5481 tmp = tmp->next;
5482 }
5483 if (tmp == NULL) {
5484 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005485 if (cur == NULL) {
5486 xmlFreeEnumeration(ret);
5487 return(NULL);
5488 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005489 if (last == NULL) ret = last = cur;
5490 else {
5491 last->next = cur;
5492 last = cur;
5493 }
Owen Taylor3473f882001-02-23 17:55:21 +00005494 }
5495 SKIP_BLANKS;
5496 } while (RAW == '|');
5497 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005498 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005499 xmlFreeEnumeration(ret);
5500 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005501 }
5502 NEXT;
5503 return(ret);
5504}
5505
5506/**
5507 * xmlParseEnumerationType:
5508 * @ctxt: an XML parser context
5509 *
5510 * parse an Enumeration attribute type.
5511 *
5512 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5513 *
5514 * [ VC: Enumeration ]
5515 * Values of this type must match one of the Nmtoken tokens in
5516 * the declaration
5517 *
5518 * Returns: the enumeration attribute tree built while parsing
5519 */
5520
5521xmlEnumerationPtr
5522xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5523 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005524 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005525
5526 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005527 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005528 return(NULL);
5529 }
5530 SHRINK;
5531 do {
5532 NEXT;
5533 SKIP_BLANKS;
5534 name = xmlParseNmtoken(ctxt);
5535 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005536 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005537 return(ret);
5538 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005539 tmp = ret;
5540 while (tmp != NULL) {
5541 if (xmlStrEqual(name, tmp->name)) {
5542 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5543 "standalone: attribute enumeration value token %s duplicated\n",
5544 name, NULL);
5545 if (!xmlDictOwns(ctxt->dict, name))
5546 xmlFree(name);
5547 break;
5548 }
5549 tmp = tmp->next;
5550 }
5551 if (tmp == NULL) {
5552 cur = xmlCreateEnumeration(name);
5553 if (!xmlDictOwns(ctxt->dict, name))
5554 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005555 if (cur == NULL) {
5556 xmlFreeEnumeration(ret);
5557 return(NULL);
5558 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005559 if (last == NULL) ret = last = cur;
5560 else {
5561 last->next = cur;
5562 last = cur;
5563 }
Owen Taylor3473f882001-02-23 17:55:21 +00005564 }
5565 SKIP_BLANKS;
5566 } while (RAW == '|');
5567 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005568 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005569 return(ret);
5570 }
5571 NEXT;
5572 return(ret);
5573}
5574
5575/**
5576 * xmlParseEnumeratedType:
5577 * @ctxt: an XML parser context
5578 * @tree: the enumeration tree built while parsing
5579 *
5580 * parse an Enumerated attribute type.
5581 *
5582 * [57] EnumeratedType ::= NotationType | Enumeration
5583 *
5584 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5585 *
5586 *
5587 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5588 */
5589
5590int
5591xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005592 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005593 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005594 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005595 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5596 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005597 return(0);
5598 }
5599 SKIP_BLANKS;
5600 *tree = xmlParseNotationType(ctxt);
5601 if (*tree == NULL) return(0);
5602 return(XML_ATTRIBUTE_NOTATION);
5603 }
5604 *tree = xmlParseEnumerationType(ctxt);
5605 if (*tree == NULL) return(0);
5606 return(XML_ATTRIBUTE_ENUMERATION);
5607}
5608
5609/**
5610 * xmlParseAttributeType:
5611 * @ctxt: an XML parser context
5612 * @tree: the enumeration tree built while parsing
5613 *
5614 * parse the Attribute list def for an element
5615 *
5616 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5617 *
5618 * [55] StringType ::= 'CDATA'
5619 *
5620 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5621 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5622 *
5623 * Validity constraints for attribute values syntax are checked in
5624 * xmlValidateAttributeValue()
5625 *
5626 * [ VC: ID ]
5627 * Values of type ID must match the Name production. A name must not
5628 * appear more than once in an XML document as a value of this type;
5629 * i.e., ID values must uniquely identify the elements which bear them.
5630 *
5631 * [ VC: One ID per Element Type ]
5632 * No element type may have more than one ID attribute specified.
5633 *
5634 * [ VC: ID Attribute Default ]
5635 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5636 *
5637 * [ VC: IDREF ]
5638 * Values of type IDREF must match the Name production, and values
5639 * of type IDREFS must match Names; each IDREF Name must match the value
5640 * of an ID attribute on some element in the XML document; i.e. IDREF
5641 * values must match the value of some ID attribute.
5642 *
5643 * [ VC: Entity Name ]
5644 * Values of type ENTITY must match the Name production, values
5645 * of type ENTITIES must match Names; each Entity Name must match the
5646 * name of an unparsed entity declared in the DTD.
5647 *
5648 * [ VC: Name Token ]
5649 * Values of type NMTOKEN must match the Nmtoken production; values
5650 * of type NMTOKENS must match Nmtokens.
5651 *
5652 * Returns the attribute type
5653 */
5654int
5655xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5656 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005657 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005658 SKIP(5);
5659 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005660 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005661 SKIP(6);
5662 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005663 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005664 SKIP(5);
5665 return(XML_ATTRIBUTE_IDREF);
5666 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5667 SKIP(2);
5668 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005669 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005670 SKIP(6);
5671 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005672 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005673 SKIP(8);
5674 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005675 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005676 SKIP(8);
5677 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005678 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005679 SKIP(7);
5680 return(XML_ATTRIBUTE_NMTOKEN);
5681 }
5682 return(xmlParseEnumeratedType(ctxt, tree));
5683}
5684
5685/**
5686 * xmlParseAttributeListDecl:
5687 * @ctxt: an XML parser context
5688 *
5689 * : parse the Attribute list def for an element
5690 *
5691 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5692 *
5693 * [53] AttDef ::= S Name S AttType S DefaultDecl
5694 *
5695 */
5696void
5697xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005698 const xmlChar *elemName;
5699 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005700 xmlEnumerationPtr tree;
5701
Daniel Veillarda07050d2003-10-19 14:46:32 +00005702 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005703 xmlParserInputPtr input = ctxt->input;
5704
5705 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005706 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005707 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005708 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005709 }
5710 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005711 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005712 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005713 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5714 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005715 return;
5716 }
5717 SKIP_BLANKS;
5718 GROW;
5719 while (RAW != '>') {
5720 const xmlChar *check = CUR_PTR;
5721 int type;
5722 int def;
5723 xmlChar *defaultValue = NULL;
5724
5725 GROW;
5726 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005727 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005728 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005729 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5730 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005731 break;
5732 }
5733 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005734 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005735 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005736 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005737 break;
5738 }
5739 SKIP_BLANKS;
5740
5741 type = xmlParseAttributeType(ctxt, &tree);
5742 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005743 break;
5744 }
5745
5746 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005747 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005748 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5749 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005750 if (tree != NULL)
5751 xmlFreeEnumeration(tree);
5752 break;
5753 }
5754 SKIP_BLANKS;
5755
5756 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5757 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005758 if (defaultValue != NULL)
5759 xmlFree(defaultValue);
5760 if (tree != NULL)
5761 xmlFreeEnumeration(tree);
5762 break;
5763 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005764 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5765 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005766
5767 GROW;
5768 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005769 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005770 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005771 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005772 if (defaultValue != NULL)
5773 xmlFree(defaultValue);
5774 if (tree != NULL)
5775 xmlFreeEnumeration(tree);
5776 break;
5777 }
5778 SKIP_BLANKS;
5779 }
5780 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005781 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5782 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005783 if (defaultValue != NULL)
5784 xmlFree(defaultValue);
5785 if (tree != NULL)
5786 xmlFreeEnumeration(tree);
5787 break;
5788 }
5789 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5790 (ctxt->sax->attributeDecl != NULL))
5791 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5792 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005793 else if (tree != NULL)
5794 xmlFreeEnumeration(tree);
5795
5796 if ((ctxt->sax2) && (defaultValue != NULL) &&
5797 (def != XML_ATTRIBUTE_IMPLIED) &&
5798 (def != XML_ATTRIBUTE_REQUIRED)) {
5799 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5800 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005801 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005802 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5803 }
Owen Taylor3473f882001-02-23 17:55:21 +00005804 if (defaultValue != NULL)
5805 xmlFree(defaultValue);
5806 GROW;
5807 }
5808 if (RAW == '>') {
5809 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005810 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5811 "Attribute list declaration doesn't start and stop in the same entity\n",
5812 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005813 }
5814 NEXT;
5815 }
Owen Taylor3473f882001-02-23 17:55:21 +00005816 }
5817}
5818
5819/**
5820 * xmlParseElementMixedContentDecl:
5821 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005822 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005823 *
5824 * parse the declaration for a Mixed Element content
5825 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5826 *
5827 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5828 * '(' S? '#PCDATA' S? ')'
5829 *
5830 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5831 *
5832 * [ VC: No Duplicate Types ]
5833 * The same name must not appear more than once in a single
5834 * mixed-content declaration.
5835 *
5836 * returns: the list of the xmlElementContentPtr describing the element choices
5837 */
5838xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005839xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005840 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005841 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005842
5843 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005844 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005845 SKIP(7);
5846 SKIP_BLANKS;
5847 SHRINK;
5848 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005849 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005850 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5851"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005852 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005853 }
Owen Taylor3473f882001-02-23 17:55:21 +00005854 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005855 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005856 if (ret == NULL)
5857 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005858 if (RAW == '*') {
5859 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5860 NEXT;
5861 }
5862 return(ret);
5863 }
5864 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005865 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005866 if (ret == NULL) return(NULL);
5867 }
5868 while (RAW == '|') {
5869 NEXT;
5870 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005871 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005872 if (ret == NULL) return(NULL);
5873 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005874 if (cur != NULL)
5875 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005876 cur = ret;
5877 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005878 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005879 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005880 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005881 if (n->c1 != NULL)
5882 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005883 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005884 if (n != NULL)
5885 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005886 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005887 }
5888 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005889 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005890 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005891 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005892 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005893 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005894 return(NULL);
5895 }
5896 SKIP_BLANKS;
5897 GROW;
5898 }
5899 if ((RAW == ')') && (NXT(1) == '*')) {
5900 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005901 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005902 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005903 if (cur->c2 != NULL)
5904 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005905 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02005906 if (ret != NULL)
5907 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005908 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005909 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5910"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005911 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005912 }
Owen Taylor3473f882001-02-23 17:55:21 +00005913 SKIP(2);
5914 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005915 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005916 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005917 return(NULL);
5918 }
5919
5920 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005921 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005922 }
5923 return(ret);
5924}
5925
5926/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005927 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005928 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005929 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005930 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005931 *
5932 * parse the declaration for a Mixed Element content
5933 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5934 *
5935 *
5936 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5937 *
5938 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5939 *
5940 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5941 *
5942 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5943 *
5944 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5945 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005946 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005947 * opening or closing parentheses in a choice, seq, or Mixed
5948 * construct is contained in the replacement text for a parameter
5949 * entity, both must be contained in the same replacement text. For
5950 * interoperability, if a parameter-entity reference appears in a
5951 * choice, seq, or Mixed construct, its replacement text should not
5952 * be empty, and neither the first nor last non-blank character of
5953 * the replacement text should be a connector (| or ,).
5954 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005955 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005956 * hierarchy.
5957 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005958static xmlElementContentPtr
5959xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5960 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005961 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005962 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005963 xmlChar type = 0;
5964
Daniel Veillard489f9672009-08-10 16:49:30 +02005965 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5966 (depth > 2048)) {
5967 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5968"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5969 depth);
5970 return(NULL);
5971 }
Owen Taylor3473f882001-02-23 17:55:21 +00005972 SKIP_BLANKS;
5973 GROW;
5974 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005975 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005976
Owen Taylor3473f882001-02-23 17:55:21 +00005977 /* Recurse on first child */
5978 NEXT;
5979 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005980 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5981 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005982 SKIP_BLANKS;
5983 GROW;
5984 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005985 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005986 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005987 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005988 return(NULL);
5989 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005990 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005991 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005992 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005993 return(NULL);
5994 }
Owen Taylor3473f882001-02-23 17:55:21 +00005995 GROW;
5996 if (RAW == '?') {
5997 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5998 NEXT;
5999 } else if (RAW == '*') {
6000 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6001 NEXT;
6002 } else if (RAW == '+') {
6003 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6004 NEXT;
6005 } else {
6006 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6007 }
Owen Taylor3473f882001-02-23 17:55:21 +00006008 GROW;
6009 }
6010 SKIP_BLANKS;
6011 SHRINK;
6012 while (RAW != ')') {
6013 /*
6014 * Each loop we parse one separator and one element.
6015 */
6016 if (RAW == ',') {
6017 if (type == 0) type = CUR;
6018
6019 /*
6020 * Detect "Name | Name , Name" error
6021 */
6022 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006023 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006024 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006025 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006026 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006027 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006028 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006029 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006030 return(NULL);
6031 }
6032 NEXT;
6033
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006034 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006035 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006036 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006037 xmlFreeDocElementContent(ctxt->myDoc, last);
6038 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006039 return(NULL);
6040 }
6041 if (last == NULL) {
6042 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006043 if (ret != NULL)
6044 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006045 ret = cur = op;
6046 } else {
6047 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006048 if (op != NULL)
6049 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006050 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006051 if (last != NULL)
6052 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006053 cur =op;
6054 last = NULL;
6055 }
6056 } else if (RAW == '|') {
6057 if (type == 0) type = CUR;
6058
6059 /*
6060 * Detect "Name , Name | Name" error
6061 */
6062 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006063 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006064 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006065 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006066 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006067 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006068 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006069 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006070 return(NULL);
6071 }
6072 NEXT;
6073
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006074 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006075 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006076 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006077 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006078 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006079 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006080 return(NULL);
6081 }
6082 if (last == NULL) {
6083 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006084 if (ret != NULL)
6085 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006086 ret = cur = op;
6087 } else {
6088 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006089 if (op != NULL)
6090 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006091 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006092 if (last != NULL)
6093 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006094 cur =op;
6095 last = NULL;
6096 }
6097 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006098 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006099 if ((last != NULL) && (last != ret))
6100 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006101 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006102 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006103 return(NULL);
6104 }
6105 GROW;
6106 SKIP_BLANKS;
6107 GROW;
6108 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006109 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006110 /* Recurse on second child */
6111 NEXT;
6112 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006113 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6114 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006115 SKIP_BLANKS;
6116 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006117 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006118 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006119 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006120 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006121 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006122 return(NULL);
6123 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006124 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006125 if (last == NULL) {
6126 if (ret != NULL)
6127 xmlFreeDocElementContent(ctxt->myDoc, ret);
6128 return(NULL);
6129 }
Owen Taylor3473f882001-02-23 17:55:21 +00006130 if (RAW == '?') {
6131 last->ocur = XML_ELEMENT_CONTENT_OPT;
6132 NEXT;
6133 } else if (RAW == '*') {
6134 last->ocur = XML_ELEMENT_CONTENT_MULT;
6135 NEXT;
6136 } else if (RAW == '+') {
6137 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6138 NEXT;
6139 } else {
6140 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6141 }
6142 }
6143 SKIP_BLANKS;
6144 GROW;
6145 }
6146 if ((cur != NULL) && (last != NULL)) {
6147 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006148 if (last != NULL)
6149 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006150 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006151 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006152 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6153"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006154 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006155 }
Owen Taylor3473f882001-02-23 17:55:21 +00006156 NEXT;
6157 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006158 if (ret != NULL) {
6159 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6160 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6161 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6162 else
6163 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6164 }
Owen Taylor3473f882001-02-23 17:55:21 +00006165 NEXT;
6166 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006167 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006168 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006169 cur = ret;
6170 /*
6171 * Some normalization:
6172 * (a | b* | c?)* == (a | b | c)*
6173 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006174 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006175 if ((cur->c1 != NULL) &&
6176 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6177 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6178 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6179 if ((cur->c2 != NULL) &&
6180 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6181 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6182 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6183 cur = cur->c2;
6184 }
6185 }
Owen Taylor3473f882001-02-23 17:55:21 +00006186 NEXT;
6187 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006188 if (ret != NULL) {
6189 int found = 0;
6190
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006191 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6192 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6193 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006194 else
6195 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006196 /*
6197 * Some normalization:
6198 * (a | b*)+ == (a | b)*
6199 * (a | b?)+ == (a | b)*
6200 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006201 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006202 if ((cur->c1 != NULL) &&
6203 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6204 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6205 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6206 found = 1;
6207 }
6208 if ((cur->c2 != NULL) &&
6209 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6211 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6212 found = 1;
6213 }
6214 cur = cur->c2;
6215 }
6216 if (found)
6217 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6218 }
Owen Taylor3473f882001-02-23 17:55:21 +00006219 NEXT;
6220 }
6221 return(ret);
6222}
6223
6224/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006225 * xmlParseElementChildrenContentDecl:
6226 * @ctxt: an XML parser context
6227 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006228 *
6229 * parse the declaration for a Mixed Element content
6230 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6231 *
6232 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6233 *
6234 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6235 *
6236 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6237 *
6238 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6239 *
6240 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6241 * TODO Parameter-entity replacement text must be properly nested
6242 * with parenthesized groups. That is to say, if either of the
6243 * opening or closing parentheses in a choice, seq, or Mixed
6244 * construct is contained in the replacement text for a parameter
6245 * entity, both must be contained in the same replacement text. For
6246 * interoperability, if a parameter-entity reference appears in a
6247 * choice, seq, or Mixed construct, its replacement text should not
6248 * be empty, and neither the first nor last non-blank character of
6249 * the replacement text should be a connector (| or ,).
6250 *
6251 * Returns the tree of xmlElementContentPtr describing the element
6252 * hierarchy.
6253 */
6254xmlElementContentPtr
6255xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6256 /* stub left for API/ABI compat */
6257 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6258}
6259
6260/**
Owen Taylor3473f882001-02-23 17:55:21 +00006261 * xmlParseElementContentDecl:
6262 * @ctxt: an XML parser context
6263 * @name: the name of the element being defined.
6264 * @result: the Element Content pointer will be stored here if any
6265 *
6266 * parse the declaration for an Element content either Mixed or Children,
6267 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6268 *
6269 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6270 *
6271 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6272 */
6273
6274int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006275xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006276 xmlElementContentPtr *result) {
6277
6278 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006279 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006280 int res;
6281
6282 *result = NULL;
6283
6284 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006285 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006286 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006287 return(-1);
6288 }
6289 NEXT;
6290 GROW;
6291 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006292 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006293 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006294 res = XML_ELEMENT_TYPE_MIXED;
6295 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006296 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006297 res = XML_ELEMENT_TYPE_ELEMENT;
6298 }
Owen Taylor3473f882001-02-23 17:55:21 +00006299 SKIP_BLANKS;
6300 *result = tree;
6301 return(res);
6302}
6303
6304/**
6305 * xmlParseElementDecl:
6306 * @ctxt: an XML parser context
6307 *
6308 * parse an Element declaration.
6309 *
6310 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6311 *
6312 * [ VC: Unique Element Type Declaration ]
6313 * No element type may be declared more than once
6314 *
6315 * Returns the type of the element, or -1 in case of error
6316 */
6317int
6318xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006319 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006320 int ret = -1;
6321 xmlElementContentPtr content = NULL;
6322
Daniel Veillard4c778d82005-01-23 17:37:44 +00006323 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006324 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006325 xmlParserInputPtr input = ctxt->input;
6326
6327 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006328 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006329 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6330 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006331 }
6332 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006333 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006334 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006335 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6336 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006337 return(-1);
6338 }
6339 while ((RAW == 0) && (ctxt->inputNr > 1))
6340 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006341 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006342 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6343 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006344 }
6345 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006346 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006347 SKIP(5);
6348 /*
6349 * Element must always be empty.
6350 */
6351 ret = XML_ELEMENT_TYPE_EMPTY;
6352 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6353 (NXT(2) == 'Y')) {
6354 SKIP(3);
6355 /*
6356 * Element is a generic container.
6357 */
6358 ret = XML_ELEMENT_TYPE_ANY;
6359 } else if (RAW == '(') {
6360 ret = xmlParseElementContentDecl(ctxt, name, &content);
6361 } else {
6362 /*
6363 * [ WFC: PEs in Internal Subset ] error handling.
6364 */
6365 if ((RAW == '%') && (ctxt->external == 0) &&
6366 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006367 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006368 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006369 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006370 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006371 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6372 }
Owen Taylor3473f882001-02-23 17:55:21 +00006373 return(-1);
6374 }
6375
6376 SKIP_BLANKS;
6377 /*
6378 * Pop-up of finished entities.
6379 */
6380 while ((RAW == 0) && (ctxt->inputNr > 1))
6381 xmlPopInput(ctxt);
6382 SKIP_BLANKS;
6383
6384 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006385 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006386 if (content != NULL) {
6387 xmlFreeDocElementContent(ctxt->myDoc, content);
6388 }
Owen Taylor3473f882001-02-23 17:55:21 +00006389 } else {
6390 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006391 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6392 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006393 }
6394
6395 NEXT;
6396 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006397 (ctxt->sax->elementDecl != NULL)) {
6398 if (content != NULL)
6399 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006400 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6401 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006402 if ((content != NULL) && (content->parent == NULL)) {
6403 /*
6404 * this is a trick: if xmlAddElementDecl is called,
6405 * instead of copying the full tree it is plugged directly
6406 * if called from the parser. Avoid duplicating the
6407 * interfaces or change the API/ABI
6408 */
6409 xmlFreeDocElementContent(ctxt->myDoc, content);
6410 }
6411 } else if (content != NULL) {
6412 xmlFreeDocElementContent(ctxt->myDoc, content);
6413 }
Owen Taylor3473f882001-02-23 17:55:21 +00006414 }
Owen Taylor3473f882001-02-23 17:55:21 +00006415 }
6416 return(ret);
6417}
6418
6419/**
Owen Taylor3473f882001-02-23 17:55:21 +00006420 * xmlParseConditionalSections
6421 * @ctxt: an XML parser context
6422 *
6423 * [61] conditionalSect ::= includeSect | ignoreSect
6424 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6425 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6426 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6427 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6428 */
6429
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006430static void
Owen Taylor3473f882001-02-23 17:55:21 +00006431xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006432 int id = ctxt->input->id;
6433
Owen Taylor3473f882001-02-23 17:55:21 +00006434 SKIP(3);
6435 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006436 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006437 SKIP(7);
6438 SKIP_BLANKS;
6439 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006440 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006441 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006442 if (ctxt->input->id != id) {
6443 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6444 "All markup of the conditional section is not in the same entity\n",
6445 NULL, NULL);
6446 }
Owen Taylor3473f882001-02-23 17:55:21 +00006447 NEXT;
6448 }
6449 if (xmlParserDebugEntities) {
6450 if ((ctxt->input != NULL) && (ctxt->input->filename))
6451 xmlGenericError(xmlGenericErrorContext,
6452 "%s(%d): ", ctxt->input->filename,
6453 ctxt->input->line);
6454 xmlGenericError(xmlGenericErrorContext,
6455 "Entering INCLUDE Conditional Section\n");
6456 }
6457
6458 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6459 (NXT(2) != '>'))) {
6460 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006461 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006462
6463 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6464 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006465 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006466 NEXT;
6467 } else if (RAW == '%') {
6468 xmlParsePEReference(ctxt);
6469 } else
6470 xmlParseMarkupDecl(ctxt);
6471
6472 /*
6473 * Pop-up of finished entities.
6474 */
6475 while ((RAW == 0) && (ctxt->inputNr > 1))
6476 xmlPopInput(ctxt);
6477
Daniel Veillardfdc91562002-07-01 21:52:03 +00006478 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006479 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006480 break;
6481 }
6482 }
6483 if (xmlParserDebugEntities) {
6484 if ((ctxt->input != NULL) && (ctxt->input->filename))
6485 xmlGenericError(xmlGenericErrorContext,
6486 "%s(%d): ", ctxt->input->filename,
6487 ctxt->input->line);
6488 xmlGenericError(xmlGenericErrorContext,
6489 "Leaving INCLUDE Conditional Section\n");
6490 }
6491
Daniel Veillarda07050d2003-10-19 14:46:32 +00006492 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006493 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006494 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006495 int depth = 0;
6496
6497 SKIP(6);
6498 SKIP_BLANKS;
6499 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006500 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006501 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006502 if (ctxt->input->id != id) {
6503 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6504 "All markup of the conditional section is not in the same entity\n",
6505 NULL, NULL);
6506 }
Owen Taylor3473f882001-02-23 17:55:21 +00006507 NEXT;
6508 }
6509 if (xmlParserDebugEntities) {
6510 if ((ctxt->input != NULL) && (ctxt->input->filename))
6511 xmlGenericError(xmlGenericErrorContext,
6512 "%s(%d): ", ctxt->input->filename,
6513 ctxt->input->line);
6514 xmlGenericError(xmlGenericErrorContext,
6515 "Entering IGNORE Conditional Section\n");
6516 }
6517
6518 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006519 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006520 * But disable SAX event generating DTD building in the meantime
6521 */
6522 state = ctxt->disableSAX;
6523 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006524 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006525 ctxt->instate = XML_PARSER_IGNORE;
6526
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006527 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006528 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6529 depth++;
6530 SKIP(3);
6531 continue;
6532 }
6533 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6534 if (--depth >= 0) SKIP(3);
6535 continue;
6536 }
6537 NEXT;
6538 continue;
6539 }
6540
6541 ctxt->disableSAX = state;
6542 ctxt->instate = instate;
6543
6544 if (xmlParserDebugEntities) {
6545 if ((ctxt->input != NULL) && (ctxt->input->filename))
6546 xmlGenericError(xmlGenericErrorContext,
6547 "%s(%d): ", ctxt->input->filename,
6548 ctxt->input->line);
6549 xmlGenericError(xmlGenericErrorContext,
6550 "Leaving IGNORE Conditional Section\n");
6551 }
6552
6553 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006554 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006555 }
6556
6557 if (RAW == 0)
6558 SHRINK;
6559
6560 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006561 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006562 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006563 if (ctxt->input->id != id) {
6564 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6565 "All markup of the conditional section is not in the same entity\n",
6566 NULL, NULL);
6567 }
Owen Taylor3473f882001-02-23 17:55:21 +00006568 SKIP(3);
6569 }
6570}
6571
6572/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006573 * xmlParseMarkupDecl:
6574 * @ctxt: an XML parser context
6575 *
6576 * parse Markup declarations
6577 *
6578 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6579 * NotationDecl | PI | Comment
6580 *
6581 * [ VC: Proper Declaration/PE Nesting ]
6582 * Parameter-entity replacement text must be properly nested with
6583 * markup declarations. That is to say, if either the first character
6584 * or the last character of a markup declaration (markupdecl above) is
6585 * contained in the replacement text for a parameter-entity reference,
6586 * both must be contained in the same replacement text.
6587 *
6588 * [ WFC: PEs in Internal Subset ]
6589 * In the internal DTD subset, parameter-entity references can occur
6590 * only where markup declarations can occur, not within markup declarations.
6591 * (This does not apply to references that occur in external parameter
6592 * entities or to the external subset.)
6593 */
6594void
6595xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6596 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006597 if (CUR == '<') {
6598 if (NXT(1) == '!') {
6599 switch (NXT(2)) {
6600 case 'E':
6601 if (NXT(3) == 'L')
6602 xmlParseElementDecl(ctxt);
6603 else if (NXT(3) == 'N')
6604 xmlParseEntityDecl(ctxt);
6605 break;
6606 case 'A':
6607 xmlParseAttributeListDecl(ctxt);
6608 break;
6609 case 'N':
6610 xmlParseNotationDecl(ctxt);
6611 break;
6612 case '-':
6613 xmlParseComment(ctxt);
6614 break;
6615 default:
6616 /* there is an error but it will be detected later */
6617 break;
6618 }
6619 } else if (NXT(1) == '?') {
6620 xmlParsePI(ctxt);
6621 }
6622 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006623 /*
6624 * This is only for internal subset. On external entities,
6625 * the replacement is done before parsing stage
6626 */
6627 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6628 xmlParsePEReference(ctxt);
6629
6630 /*
6631 * Conditional sections are allowed from entities included
6632 * by PE References in the internal subset.
6633 */
6634 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6635 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6636 xmlParseConditionalSections(ctxt);
6637 }
6638 }
6639
6640 ctxt->instate = XML_PARSER_DTD;
6641}
6642
6643/**
6644 * xmlParseTextDecl:
6645 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006646 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006647 * parse an XML declaration header for external entities
6648 *
6649 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006650 */
6651
6652void
6653xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6654 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006655 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006656
6657 /*
6658 * We know that '<?xml' is here.
6659 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006660 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006661 SKIP(5);
6662 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006663 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006664 return;
6665 }
6666
William M. Brack76e95df2003-10-18 16:20:14 +00006667 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006668 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6669 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006670 }
6671 SKIP_BLANKS;
6672
6673 /*
6674 * We may have the VersionInfo here.
6675 */
6676 version = xmlParseVersionInfo(ctxt);
6677 if (version == NULL)
6678 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006679 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006680 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006681 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6682 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006683 }
6684 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006685 ctxt->input->version = version;
6686
6687 /*
6688 * We must have the encoding declaration
6689 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006690 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006691 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6692 /*
6693 * The XML REC instructs us to stop parsing right here
6694 */
6695 return;
6696 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006697 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6698 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6699 "Missing encoding in text declaration\n");
6700 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006701
6702 SKIP_BLANKS;
6703 if ((RAW == '?') && (NXT(1) == '>')) {
6704 SKIP(2);
6705 } else if (RAW == '>') {
6706 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006707 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006708 NEXT;
6709 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006710 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006711 MOVETO_ENDTAG(CUR_PTR);
6712 NEXT;
6713 }
6714}
6715
6716/**
Owen Taylor3473f882001-02-23 17:55:21 +00006717 * xmlParseExternalSubset:
6718 * @ctxt: an XML parser context
6719 * @ExternalID: the external identifier
6720 * @SystemID: the system identifier (or URL)
6721 *
6722 * parse Markup declarations from an external subset
6723 *
6724 * [30] extSubset ::= textDecl? extSubsetDecl
6725 *
6726 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6727 */
6728void
6729xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6730 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006731 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006732 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006733
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006734 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006735 (ctxt->input->end - ctxt->input->cur >= 4)) {
6736 xmlChar start[4];
6737 xmlCharEncoding enc;
6738
6739 start[0] = RAW;
6740 start[1] = NXT(1);
6741 start[2] = NXT(2);
6742 start[3] = NXT(3);
6743 enc = xmlDetectCharEncoding(start, 4);
6744 if (enc != XML_CHAR_ENCODING_NONE)
6745 xmlSwitchEncoding(ctxt, enc);
6746 }
6747
Daniel Veillarda07050d2003-10-19 14:46:32 +00006748 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006749 xmlParseTextDecl(ctxt);
6750 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6751 /*
6752 * The XML REC instructs us to stop parsing right here
6753 */
6754 ctxt->instate = XML_PARSER_EOF;
6755 return;
6756 }
6757 }
6758 if (ctxt->myDoc == NULL) {
6759 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006760 if (ctxt->myDoc == NULL) {
6761 xmlErrMemory(ctxt, "New Doc failed");
6762 return;
6763 }
6764 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006765 }
6766 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6767 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6768
6769 ctxt->instate = XML_PARSER_DTD;
6770 ctxt->external = 1;
6771 while (((RAW == '<') && (NXT(1) == '?')) ||
6772 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006773 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006774 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006775 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006776
6777 GROW;
6778 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6779 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006780 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006781 NEXT;
6782 } else if (RAW == '%') {
6783 xmlParsePEReference(ctxt);
6784 } else
6785 xmlParseMarkupDecl(ctxt);
6786
6787 /*
6788 * Pop-up of finished entities.
6789 */
6790 while ((RAW == 0) && (ctxt->inputNr > 1))
6791 xmlPopInput(ctxt);
6792
Daniel Veillardfdc91562002-07-01 21:52:03 +00006793 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006794 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006795 break;
6796 }
6797 }
6798
6799 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006800 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006801 }
6802
6803}
6804
6805/**
6806 * xmlParseReference:
6807 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006808 *
Owen Taylor3473f882001-02-23 17:55:21 +00006809 * parse and handle entity references in content, depending on the SAX
6810 * interface, this may end-up in a call to character() if this is a
6811 * CharRef, a predefined entity, if there is no reference() callback.
6812 * or if the parser was asked to switch to that mode.
6813 *
6814 * [67] Reference ::= EntityRef | CharRef
6815 */
6816void
6817xmlParseReference(xmlParserCtxtPtr ctxt) {
6818 xmlEntityPtr ent;
6819 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006820 int was_checked;
6821 xmlNodePtr list = NULL;
6822 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006823
Daniel Veillard0161e632008-08-28 15:36:32 +00006824
6825 if (RAW != '&')
6826 return;
6827
6828 /*
6829 * Simple case of a CharRef
6830 */
Owen Taylor3473f882001-02-23 17:55:21 +00006831 if (NXT(1) == '#') {
6832 int i = 0;
6833 xmlChar out[10];
6834 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006835 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006836
Daniel Veillarddc171602008-03-26 17:41:38 +00006837 if (value == 0)
6838 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006839 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6840 /*
6841 * So we are using non-UTF-8 buffers
6842 * Check that the char fit on 8bits, if not
6843 * generate a CharRef.
6844 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006845 if (value <= 0xFF) {
6846 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006847 out[1] = 0;
6848 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6849 (!ctxt->disableSAX))
6850 ctxt->sax->characters(ctxt->userData, out, 1);
6851 } else {
6852 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006853 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006854 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006855 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006856 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6857 (!ctxt->disableSAX))
6858 ctxt->sax->reference(ctxt->userData, out);
6859 }
6860 } else {
6861 /*
6862 * Just encode the value in UTF-8
6863 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006864 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006865 out[i] = 0;
6866 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6867 (!ctxt->disableSAX))
6868 ctxt->sax->characters(ctxt->userData, out, i);
6869 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006870 return;
6871 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006872
Daniel Veillard0161e632008-08-28 15:36:32 +00006873 /*
6874 * We are seeing an entity reference
6875 */
6876 ent = xmlParseEntityRef(ctxt);
6877 if (ent == NULL) return;
6878 if (!ctxt->wellFormed)
6879 return;
6880 was_checked = ent->checked;
6881
6882 /* special case of predefined entities */
6883 if ((ent->name == NULL) ||
6884 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6885 val = ent->content;
6886 if (val == NULL) return;
6887 /*
6888 * inline the entity.
6889 */
6890 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6891 (!ctxt->disableSAX))
6892 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6893 return;
6894 }
6895
6896 /*
6897 * The first reference to the entity trigger a parsing phase
6898 * where the ent->children is filled with the result from
6899 * the parsing.
6900 */
6901 if (ent->checked == 0) {
6902 unsigned long oldnbent = ctxt->nbentities;
6903
6904 /*
6905 * This is a bit hackish but this seems the best
6906 * way to make sure both SAX and DOM entity support
6907 * behaves okay.
6908 */
6909 void *user_data;
6910 if (ctxt->userData == ctxt)
6911 user_data = NULL;
6912 else
6913 user_data = ctxt->userData;
6914
6915 /*
6916 * Check that this entity is well formed
6917 * 4.3.2: An internal general parsed entity is well-formed
6918 * if its replacement text matches the production labeled
6919 * content.
6920 */
6921 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6922 ctxt->depth++;
6923 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6924 user_data, &list);
6925 ctxt->depth--;
6926
6927 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6928 ctxt->depth++;
6929 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6930 user_data, ctxt->depth, ent->URI,
6931 ent->ExternalID, &list);
6932 ctxt->depth--;
6933 } else {
6934 ret = XML_ERR_ENTITY_PE_INTERNAL;
6935 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6936 "invalid entity type found\n", NULL);
6937 }
6938
6939 /*
6940 * Store the number of entities needing parsing for this entity
6941 * content and do checkings
6942 */
6943 ent->checked = ctxt->nbentities - oldnbent;
6944 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006945 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006946 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006947 return;
6948 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006949 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6950 xmlFreeNodeList(list);
6951 return;
6952 }
Owen Taylor3473f882001-02-23 17:55:21 +00006953
Daniel Veillard0161e632008-08-28 15:36:32 +00006954 if ((ret == XML_ERR_OK) && (list != NULL)) {
6955 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6956 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6957 (ent->children == NULL)) {
6958 ent->children = list;
6959 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006960 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006961 * Prune it directly in the generated document
6962 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006963 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006964 if (((list->type == XML_TEXT_NODE) &&
6965 (list->next == NULL)) ||
6966 (ctxt->parseMode == XML_PARSE_READER)) {
6967 list->parent = (xmlNodePtr) ent;
6968 list = NULL;
6969 ent->owner = 1;
6970 } else {
6971 ent->owner = 0;
6972 while (list != NULL) {
6973 list->parent = (xmlNodePtr) ctxt->node;
6974 list->doc = ctxt->myDoc;
6975 if (list->next == NULL)
6976 ent->last = list;
6977 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006978 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006979 list = ent->children;
6980#ifdef LIBXML_LEGACY_ENABLED
6981 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6982 xmlAddEntityReference(ent, list, NULL);
6983#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006984 }
6985 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006986 ent->owner = 1;
6987 while (list != NULL) {
6988 list->parent = (xmlNodePtr) ent;
6989 if (list->next == NULL)
6990 ent->last = list;
6991 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006992 }
6993 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006994 } else {
6995 xmlFreeNodeList(list);
6996 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006997 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006998 } else if ((ret != XML_ERR_OK) &&
6999 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7000 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7001 "Entity '%s' failed to parse\n", ent->name);
7002 } else if (list != NULL) {
7003 xmlFreeNodeList(list);
7004 list = NULL;
7005 }
7006 if (ent->checked == 0)
7007 ent->checked = 1;
7008 } else if (ent->checked != 1) {
7009 ctxt->nbentities += ent->checked;
7010 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007011
Daniel Veillard0161e632008-08-28 15:36:32 +00007012 /*
7013 * Now that the entity content has been gathered
7014 * provide it to the application, this can take different forms based
7015 * on the parsing modes.
7016 */
7017 if (ent->children == NULL) {
7018 /*
7019 * Probably running in SAX mode and the callbacks don't
7020 * build the entity content. So unless we already went
7021 * though parsing for first checking go though the entity
7022 * content to generate callbacks associated to the entity
7023 */
7024 if (was_checked != 0) {
7025 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007026 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007027 * This is a bit hackish but this seems the best
7028 * way to make sure both SAX and DOM entity support
7029 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007030 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007031 if (ctxt->userData == ctxt)
7032 user_data = NULL;
7033 else
7034 user_data = ctxt->userData;
7035
7036 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7037 ctxt->depth++;
7038 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7039 ent->content, user_data, NULL);
7040 ctxt->depth--;
7041 } else if (ent->etype ==
7042 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7043 ctxt->depth++;
7044 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7045 ctxt->sax, user_data, ctxt->depth,
7046 ent->URI, ent->ExternalID, NULL);
7047 ctxt->depth--;
7048 } else {
7049 ret = XML_ERR_ENTITY_PE_INTERNAL;
7050 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7051 "invalid entity type found\n", NULL);
7052 }
7053 if (ret == XML_ERR_ENTITY_LOOP) {
7054 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7055 return;
7056 }
7057 }
7058 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7059 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7060 /*
7061 * Entity reference callback comes second, it's somewhat
7062 * superfluous but a compatibility to historical behaviour
7063 */
7064 ctxt->sax->reference(ctxt->userData, ent->name);
7065 }
7066 return;
7067 }
7068
7069 /*
7070 * If we didn't get any children for the entity being built
7071 */
7072 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7073 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7074 /*
7075 * Create a node.
7076 */
7077 ctxt->sax->reference(ctxt->userData, ent->name);
7078 return;
7079 }
7080
7081 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7082 /*
7083 * There is a problem on the handling of _private for entities
7084 * (bug 155816): Should we copy the content of the field from
7085 * the entity (possibly overwriting some value set by the user
7086 * when a copy is created), should we leave it alone, or should
7087 * we try to take care of different situations? The problem
7088 * is exacerbated by the usage of this field by the xmlReader.
7089 * To fix this bug, we look at _private on the created node
7090 * and, if it's NULL, we copy in whatever was in the entity.
7091 * If it's not NULL we leave it alone. This is somewhat of a
7092 * hack - maybe we should have further tests to determine
7093 * what to do.
7094 */
7095 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7096 /*
7097 * Seems we are generating the DOM content, do
7098 * a simple tree copy for all references except the first
7099 * In the first occurrence list contains the replacement.
7100 * progressive == 2 means we are operating on the Reader
7101 * and since nodes are discarded we must copy all the time.
7102 */
7103 if (((list == NULL) && (ent->owner == 0)) ||
7104 (ctxt->parseMode == XML_PARSE_READER)) {
7105 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7106
7107 /*
7108 * when operating on a reader, the entities definitions
7109 * are always owning the entities subtree.
7110 if (ctxt->parseMode == XML_PARSE_READER)
7111 ent->owner = 1;
7112 */
7113
7114 cur = ent->children;
7115 while (cur != NULL) {
7116 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7117 if (nw != NULL) {
7118 if (nw->_private == NULL)
7119 nw->_private = cur->_private;
7120 if (firstChild == NULL){
7121 firstChild = nw;
7122 }
7123 nw = xmlAddChild(ctxt->node, nw);
7124 }
7125 if (cur == ent->last) {
7126 /*
7127 * needed to detect some strange empty
7128 * node cases in the reader tests
7129 */
7130 if ((ctxt->parseMode == XML_PARSE_READER) &&
7131 (nw != NULL) &&
7132 (nw->type == XML_ELEMENT_NODE) &&
7133 (nw->children == NULL))
7134 nw->extra = 1;
7135
7136 break;
7137 }
7138 cur = cur->next;
7139 }
7140#ifdef LIBXML_LEGACY_ENABLED
7141 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7142 xmlAddEntityReference(ent, firstChild, nw);
7143#endif /* LIBXML_LEGACY_ENABLED */
7144 } else if (list == NULL) {
7145 xmlNodePtr nw = NULL, cur, next, last,
7146 firstChild = NULL;
7147 /*
7148 * Copy the entity child list and make it the new
7149 * entity child list. The goal is to make sure any
7150 * ID or REF referenced will be the one from the
7151 * document content and not the entity copy.
7152 */
7153 cur = ent->children;
7154 ent->children = NULL;
7155 last = ent->last;
7156 ent->last = NULL;
7157 while (cur != NULL) {
7158 next = cur->next;
7159 cur->next = NULL;
7160 cur->parent = NULL;
7161 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7162 if (nw != NULL) {
7163 if (nw->_private == NULL)
7164 nw->_private = cur->_private;
7165 if (firstChild == NULL){
7166 firstChild = cur;
7167 }
7168 xmlAddChild((xmlNodePtr) ent, nw);
7169 xmlAddChild(ctxt->node, cur);
7170 }
7171 if (cur == last)
7172 break;
7173 cur = next;
7174 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007175 if (ent->owner == 0)
7176 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007177#ifdef LIBXML_LEGACY_ENABLED
7178 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7179 xmlAddEntityReference(ent, firstChild, nw);
7180#endif /* LIBXML_LEGACY_ENABLED */
7181 } else {
7182 const xmlChar *nbktext;
7183
7184 /*
7185 * the name change is to avoid coalescing of the
7186 * node with a possible previous text one which
7187 * would make ent->children a dangling pointer
7188 */
7189 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7190 -1);
7191 if (ent->children->type == XML_TEXT_NODE)
7192 ent->children->name = nbktext;
7193 if ((ent->last != ent->children) &&
7194 (ent->last->type == XML_TEXT_NODE))
7195 ent->last->name = nbktext;
7196 xmlAddChildList(ctxt->node, ent->children);
7197 }
7198
7199 /*
7200 * This is to avoid a nasty side effect, see
7201 * characters() in SAX.c
7202 */
7203 ctxt->nodemem = 0;
7204 ctxt->nodelen = 0;
7205 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007206 }
7207 }
7208}
7209
7210/**
7211 * xmlParseEntityRef:
7212 * @ctxt: an XML parser context
7213 *
7214 * parse ENTITY references declarations
7215 *
7216 * [68] EntityRef ::= '&' Name ';'
7217 *
7218 * [ WFC: Entity Declared ]
7219 * In a document without any DTD, a document with only an internal DTD
7220 * subset which contains no parameter entity references, or a document
7221 * with "standalone='yes'", the Name given in the entity reference
7222 * must match that in an entity declaration, except that well-formed
7223 * documents need not declare any of the following entities: amp, lt,
7224 * gt, apos, quot. The declaration of a parameter entity must precede
7225 * any reference to it. Similarly, the declaration of a general entity
7226 * must precede any reference to it which appears in a default value in an
7227 * attribute-list declaration. Note that if entities are declared in the
7228 * external subset or in external parameter entities, a non-validating
7229 * processor is not obligated to read and process their declarations;
7230 * for such documents, the rule that an entity must be declared is a
7231 * well-formedness constraint only if standalone='yes'.
7232 *
7233 * [ WFC: Parsed Entity ]
7234 * An entity reference must not contain the name of an unparsed entity
7235 *
7236 * Returns the xmlEntityPtr if found, or NULL otherwise.
7237 */
7238xmlEntityPtr
7239xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007240 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007241 xmlEntityPtr ent = NULL;
7242
7243 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007244
Daniel Veillard0161e632008-08-28 15:36:32 +00007245 if (RAW != '&')
7246 return(NULL);
7247 NEXT;
7248 name = xmlParseName(ctxt);
7249 if (name == NULL) {
7250 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7251 "xmlParseEntityRef: no name\n");
7252 return(NULL);
7253 }
7254 if (RAW != ';') {
7255 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7256 return(NULL);
7257 }
7258 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007259
Daniel Veillard0161e632008-08-28 15:36:32 +00007260 /*
7261 * Predefined entites override any extra definition
7262 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007263 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7264 ent = xmlGetPredefinedEntity(name);
7265 if (ent != NULL)
7266 return(ent);
7267 }
Owen Taylor3473f882001-02-23 17:55:21 +00007268
Daniel Veillard0161e632008-08-28 15:36:32 +00007269 /*
7270 * Increate the number of entity references parsed
7271 */
7272 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007273
Daniel Veillard0161e632008-08-28 15:36:32 +00007274 /*
7275 * Ask first SAX for entity resolution, otherwise try the
7276 * entities which may have stored in the parser context.
7277 */
7278 if (ctxt->sax != NULL) {
7279 if (ctxt->sax->getEntity != NULL)
7280 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007281 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7282 (ctxt->options & XML_PARSE_OLDSAX))
7283 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007284 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7285 (ctxt->userData==ctxt)) {
7286 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007287 }
7288 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007289 /*
7290 * [ WFC: Entity Declared ]
7291 * In a document without any DTD, a document with only an
7292 * internal DTD subset which contains no parameter entity
7293 * references, or a document with "standalone='yes'", the
7294 * Name given in the entity reference must match that in an
7295 * entity declaration, except that well-formed documents
7296 * need not declare any of the following entities: amp, lt,
7297 * gt, apos, quot.
7298 * The declaration of a parameter entity must precede any
7299 * reference to it.
7300 * Similarly, the declaration of a general entity must
7301 * precede any reference to it which appears in a default
7302 * value in an attribute-list declaration. Note that if
7303 * entities are declared in the external subset or in
7304 * external parameter entities, a non-validating processor
7305 * is not obligated to read and process their declarations;
7306 * for such documents, the rule that an entity must be
7307 * declared is a well-formedness constraint only if
7308 * standalone='yes'.
7309 */
7310 if (ent == NULL) {
7311 if ((ctxt->standalone == 1) ||
7312 ((ctxt->hasExternalSubset == 0) &&
7313 (ctxt->hasPErefs == 0))) {
7314 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7315 "Entity '%s' not defined\n", name);
7316 } else {
7317 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7318 "Entity '%s' not defined\n", name);
7319 if ((ctxt->inSubset == 0) &&
7320 (ctxt->sax != NULL) &&
7321 (ctxt->sax->reference != NULL)) {
7322 ctxt->sax->reference(ctxt->userData, name);
7323 }
7324 }
7325 ctxt->valid = 0;
7326 }
7327
7328 /*
7329 * [ WFC: Parsed Entity ]
7330 * An entity reference must not contain the name of an
7331 * unparsed entity
7332 */
7333 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7334 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7335 "Entity reference to unparsed entity %s\n", name);
7336 }
7337
7338 /*
7339 * [ WFC: No External Entity References ]
7340 * Attribute values cannot contain direct or indirect
7341 * entity references to external entities.
7342 */
7343 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7344 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7345 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7346 "Attribute references external entity '%s'\n", name);
7347 }
7348 /*
7349 * [ WFC: No < in Attribute Values ]
7350 * The replacement text of any entity referred to directly or
7351 * indirectly in an attribute value (other than "&lt;") must
7352 * not contain a <.
7353 */
7354 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7355 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007356 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007357 (xmlStrchr(ent->content, '<'))) {
7358 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7359 "'<' in entity '%s' is not allowed in attributes values\n", name);
7360 }
7361
7362 /*
7363 * Internal check, no parameter entities here ...
7364 */
7365 else {
7366 switch (ent->etype) {
7367 case XML_INTERNAL_PARAMETER_ENTITY:
7368 case XML_EXTERNAL_PARAMETER_ENTITY:
7369 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7370 "Attempt to reference the parameter entity '%s'\n",
7371 name);
7372 break;
7373 default:
7374 break;
7375 }
7376 }
7377
7378 /*
7379 * [ WFC: No Recursion ]
7380 * A parsed entity must not contain a recursive reference
7381 * to itself, either directly or indirectly.
7382 * Done somewhere else
7383 */
Owen Taylor3473f882001-02-23 17:55:21 +00007384 return(ent);
7385}
7386
7387/**
7388 * xmlParseStringEntityRef:
7389 * @ctxt: an XML parser context
7390 * @str: a pointer to an index in the string
7391 *
7392 * parse ENTITY references declarations, but this version parses it from
7393 * a string value.
7394 *
7395 * [68] EntityRef ::= '&' Name ';'
7396 *
7397 * [ WFC: Entity Declared ]
7398 * In a document without any DTD, a document with only an internal DTD
7399 * subset which contains no parameter entity references, or a document
7400 * with "standalone='yes'", the Name given in the entity reference
7401 * must match that in an entity declaration, except that well-formed
7402 * documents need not declare any of the following entities: amp, lt,
7403 * gt, apos, quot. The declaration of a parameter entity must precede
7404 * any reference to it. Similarly, the declaration of a general entity
7405 * must precede any reference to it which appears in a default value in an
7406 * attribute-list declaration. Note that if entities are declared in the
7407 * external subset or in external parameter entities, a non-validating
7408 * processor is not obligated to read and process their declarations;
7409 * for such documents, the rule that an entity must be declared is a
7410 * well-formedness constraint only if standalone='yes'.
7411 *
7412 * [ WFC: Parsed Entity ]
7413 * An entity reference must not contain the name of an unparsed entity
7414 *
7415 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7416 * is updated to the current location in the string.
7417 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007418static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007419xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7420 xmlChar *name;
7421 const xmlChar *ptr;
7422 xmlChar cur;
7423 xmlEntityPtr ent = NULL;
7424
7425 if ((str == NULL) || (*str == NULL))
7426 return(NULL);
7427 ptr = *str;
7428 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007429 if (cur != '&')
7430 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007431
Daniel Veillard0161e632008-08-28 15:36:32 +00007432 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007433 name = xmlParseStringName(ctxt, &ptr);
7434 if (name == NULL) {
7435 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7436 "xmlParseStringEntityRef: no name\n");
7437 *str = ptr;
7438 return(NULL);
7439 }
7440 if (*ptr != ';') {
7441 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007442 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007443 *str = ptr;
7444 return(NULL);
7445 }
7446 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007447
Owen Taylor3473f882001-02-23 17:55:21 +00007448
Daniel Veillard0161e632008-08-28 15:36:32 +00007449 /*
7450 * Predefined entites override any extra definition
7451 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007452 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7453 ent = xmlGetPredefinedEntity(name);
7454 if (ent != NULL) {
7455 xmlFree(name);
7456 *str = ptr;
7457 return(ent);
7458 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007459 }
Owen Taylor3473f882001-02-23 17:55:21 +00007460
Daniel Veillard0161e632008-08-28 15:36:32 +00007461 /*
7462 * Increate the number of entity references parsed
7463 */
7464 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007465
Daniel Veillard0161e632008-08-28 15:36:32 +00007466 /*
7467 * Ask first SAX for entity resolution, otherwise try the
7468 * entities which may have stored in the parser context.
7469 */
7470 if (ctxt->sax != NULL) {
7471 if (ctxt->sax->getEntity != NULL)
7472 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007473 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7474 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007475 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7476 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007477 }
7478 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007479
7480 /*
7481 * [ WFC: Entity Declared ]
7482 * In a document without any DTD, a document with only an
7483 * internal DTD subset which contains no parameter entity
7484 * references, or a document with "standalone='yes'", the
7485 * Name given in the entity reference must match that in an
7486 * entity declaration, except that well-formed documents
7487 * need not declare any of the following entities: amp, lt,
7488 * gt, apos, quot.
7489 * The declaration of a parameter entity must precede any
7490 * reference to it.
7491 * Similarly, the declaration of a general entity must
7492 * precede any reference to it which appears in a default
7493 * value in an attribute-list declaration. Note that if
7494 * entities are declared in the external subset or in
7495 * external parameter entities, a non-validating processor
7496 * is not obligated to read and process their declarations;
7497 * for such documents, the rule that an entity must be
7498 * declared is a well-formedness constraint only if
7499 * standalone='yes'.
7500 */
7501 if (ent == NULL) {
7502 if ((ctxt->standalone == 1) ||
7503 ((ctxt->hasExternalSubset == 0) &&
7504 (ctxt->hasPErefs == 0))) {
7505 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7506 "Entity '%s' not defined\n", name);
7507 } else {
7508 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7509 "Entity '%s' not defined\n",
7510 name);
7511 }
7512 /* TODO ? check regressions ctxt->valid = 0; */
7513 }
7514
7515 /*
7516 * [ WFC: Parsed Entity ]
7517 * An entity reference must not contain the name of an
7518 * unparsed entity
7519 */
7520 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7521 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7522 "Entity reference to unparsed entity %s\n", name);
7523 }
7524
7525 /*
7526 * [ WFC: No External Entity References ]
7527 * Attribute values cannot contain direct or indirect
7528 * entity references to external entities.
7529 */
7530 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7531 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7532 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7533 "Attribute references external entity '%s'\n", name);
7534 }
7535 /*
7536 * [ WFC: No < in Attribute Values ]
7537 * The replacement text of any entity referred to directly or
7538 * indirectly in an attribute value (other than "&lt;") must
7539 * not contain a <.
7540 */
7541 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7542 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007543 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007544 (xmlStrchr(ent->content, '<'))) {
7545 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7546 "'<' in entity '%s' is not allowed in attributes values\n",
7547 name);
7548 }
7549
7550 /*
7551 * Internal check, no parameter entities here ...
7552 */
7553 else {
7554 switch (ent->etype) {
7555 case XML_INTERNAL_PARAMETER_ENTITY:
7556 case XML_EXTERNAL_PARAMETER_ENTITY:
7557 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7558 "Attempt to reference the parameter entity '%s'\n",
7559 name);
7560 break;
7561 default:
7562 break;
7563 }
7564 }
7565
7566 /*
7567 * [ WFC: No Recursion ]
7568 * A parsed entity must not contain a recursive reference
7569 * to itself, either directly or indirectly.
7570 * Done somewhere else
7571 */
7572
7573 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007574 *str = ptr;
7575 return(ent);
7576}
7577
7578/**
7579 * xmlParsePEReference:
7580 * @ctxt: an XML parser context
7581 *
7582 * parse PEReference declarations
7583 * The entity content is handled directly by pushing it's content as
7584 * a new input stream.
7585 *
7586 * [69] PEReference ::= '%' Name ';'
7587 *
7588 * [ WFC: No Recursion ]
7589 * A parsed entity must not contain a recursive
7590 * reference to itself, either directly or indirectly.
7591 *
7592 * [ WFC: Entity Declared ]
7593 * In a document without any DTD, a document with only an internal DTD
7594 * subset which contains no parameter entity references, or a document
7595 * with "standalone='yes'", ... ... The declaration of a parameter
7596 * entity must precede any reference to it...
7597 *
7598 * [ VC: Entity Declared ]
7599 * In a document with an external subset or external parameter entities
7600 * with "standalone='no'", ... ... The declaration of a parameter entity
7601 * must precede any reference to it...
7602 *
7603 * [ WFC: In DTD ]
7604 * Parameter-entity references may only appear in the DTD.
7605 * NOTE: misleading but this is handled.
7606 */
7607void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007608xmlParsePEReference(xmlParserCtxtPtr ctxt)
7609{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007610 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007611 xmlEntityPtr entity = NULL;
7612 xmlParserInputPtr input;
7613
Daniel Veillard0161e632008-08-28 15:36:32 +00007614 if (RAW != '%')
7615 return;
7616 NEXT;
7617 name = xmlParseName(ctxt);
7618 if (name == NULL) {
7619 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7620 "xmlParsePEReference: no name\n");
7621 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007622 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007623 if (RAW != ';') {
7624 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7625 return;
7626 }
7627
7628 NEXT;
7629
7630 /*
7631 * Increate the number of entity references parsed
7632 */
7633 ctxt->nbentities++;
7634
7635 /*
7636 * Request the entity from SAX
7637 */
7638 if ((ctxt->sax != NULL) &&
7639 (ctxt->sax->getParameterEntity != NULL))
7640 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7641 name);
7642 if (entity == NULL) {
7643 /*
7644 * [ WFC: Entity Declared ]
7645 * In a document without any DTD, a document with only an
7646 * internal DTD subset which contains no parameter entity
7647 * references, or a document with "standalone='yes'", ...
7648 * ... The declaration of a parameter entity must precede
7649 * any reference to it...
7650 */
7651 if ((ctxt->standalone == 1) ||
7652 ((ctxt->hasExternalSubset == 0) &&
7653 (ctxt->hasPErefs == 0))) {
7654 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7655 "PEReference: %%%s; not found\n",
7656 name);
7657 } else {
7658 /*
7659 * [ VC: Entity Declared ]
7660 * In a document with an external subset or external
7661 * parameter entities with "standalone='no'", ...
7662 * ... The declaration of a parameter entity must
7663 * precede any reference to it...
7664 */
7665 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7666 "PEReference: %%%s; not found\n",
7667 name, NULL);
7668 ctxt->valid = 0;
7669 }
7670 } else {
7671 /*
7672 * Internal checking in case the entity quest barfed
7673 */
7674 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7675 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7676 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7677 "Internal: %%%s; is not a parameter entity\n",
7678 name, NULL);
7679 } else if (ctxt->input->free != deallocblankswrapper) {
7680 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7681 if (xmlPushInput(ctxt, input) < 0)
7682 return;
7683 } else {
7684 /*
7685 * TODO !!!
7686 * handle the extra spaces added before and after
7687 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7688 */
7689 input = xmlNewEntityInputStream(ctxt, entity);
7690 if (xmlPushInput(ctxt, input) < 0)
7691 return;
7692 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7693 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7694 (IS_BLANK_CH(NXT(5)))) {
7695 xmlParseTextDecl(ctxt);
7696 if (ctxt->errNo ==
7697 XML_ERR_UNSUPPORTED_ENCODING) {
7698 /*
7699 * The XML REC instructs us to stop parsing
7700 * right here
7701 */
7702 ctxt->instate = XML_PARSER_EOF;
7703 return;
7704 }
7705 }
7706 }
7707 }
7708 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007709}
7710
7711/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007712 * xmlLoadEntityContent:
7713 * @ctxt: an XML parser context
7714 * @entity: an unloaded system entity
7715 *
7716 * Load the original content of the given system entity from the
7717 * ExternalID/SystemID given. This is to be used for Included in Literal
7718 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7719 *
7720 * Returns 0 in case of success and -1 in case of failure
7721 */
7722static int
7723xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7724 xmlParserInputPtr input;
7725 xmlBufferPtr buf;
7726 int l, c;
7727 int count = 0;
7728
7729 if ((ctxt == NULL) || (entity == NULL) ||
7730 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7731 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7732 (entity->content != NULL)) {
7733 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7734 "xmlLoadEntityContent parameter error");
7735 return(-1);
7736 }
7737
7738 if (xmlParserDebugEntities)
7739 xmlGenericError(xmlGenericErrorContext,
7740 "Reading %s entity content input\n", entity->name);
7741
7742 buf = xmlBufferCreate();
7743 if (buf == NULL) {
7744 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7745 "xmlLoadEntityContent parameter error");
7746 return(-1);
7747 }
7748
7749 input = xmlNewEntityInputStream(ctxt, entity);
7750 if (input == NULL) {
7751 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7752 "xmlLoadEntityContent input error");
7753 xmlBufferFree(buf);
7754 return(-1);
7755 }
7756
7757 /*
7758 * Push the entity as the current input, read char by char
7759 * saving to the buffer until the end of the entity or an error
7760 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007761 if (xmlPushInput(ctxt, input) < 0) {
7762 xmlBufferFree(buf);
7763 return(-1);
7764 }
7765
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007766 GROW;
7767 c = CUR_CHAR(l);
7768 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7769 (IS_CHAR(c))) {
7770 xmlBufferAdd(buf, ctxt->input->cur, l);
7771 if (count++ > 100) {
7772 count = 0;
7773 GROW;
7774 }
7775 NEXTL(l);
7776 c = CUR_CHAR(l);
7777 }
7778
7779 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7780 xmlPopInput(ctxt);
7781 } else if (!IS_CHAR(c)) {
7782 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7783 "xmlLoadEntityContent: invalid char value %d\n",
7784 c);
7785 xmlBufferFree(buf);
7786 return(-1);
7787 }
7788 entity->content = buf->content;
7789 buf->content = NULL;
7790 xmlBufferFree(buf);
7791
7792 return(0);
7793}
7794
7795/**
Owen Taylor3473f882001-02-23 17:55:21 +00007796 * xmlParseStringPEReference:
7797 * @ctxt: an XML parser context
7798 * @str: a pointer to an index in the string
7799 *
7800 * parse PEReference declarations
7801 *
7802 * [69] PEReference ::= '%' Name ';'
7803 *
7804 * [ WFC: No Recursion ]
7805 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007806 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007807 *
7808 * [ WFC: Entity Declared ]
7809 * In a document without any DTD, a document with only an internal DTD
7810 * subset which contains no parameter entity references, or a document
7811 * with "standalone='yes'", ... ... The declaration of a parameter
7812 * entity must precede any reference to it...
7813 *
7814 * [ VC: Entity Declared ]
7815 * In a document with an external subset or external parameter entities
7816 * with "standalone='no'", ... ... The declaration of a parameter entity
7817 * must precede any reference to it...
7818 *
7819 * [ WFC: In DTD ]
7820 * Parameter-entity references may only appear in the DTD.
7821 * NOTE: misleading but this is handled.
7822 *
7823 * Returns the string of the entity content.
7824 * str is updated to the current value of the index
7825 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007826static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007827xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7828 const xmlChar *ptr;
7829 xmlChar cur;
7830 xmlChar *name;
7831 xmlEntityPtr entity = NULL;
7832
7833 if ((str == NULL) || (*str == NULL)) return(NULL);
7834 ptr = *str;
7835 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007836 if (cur != '%')
7837 return(NULL);
7838 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007839 name = xmlParseStringName(ctxt, &ptr);
7840 if (name == NULL) {
7841 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7842 "xmlParseStringPEReference: no name\n");
7843 *str = ptr;
7844 return(NULL);
7845 }
7846 cur = *ptr;
7847 if (cur != ';') {
7848 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7849 xmlFree(name);
7850 *str = ptr;
7851 return(NULL);
7852 }
7853 ptr++;
7854
7855 /*
7856 * Increate the number of entity references parsed
7857 */
7858 ctxt->nbentities++;
7859
7860 /*
7861 * Request the entity from SAX
7862 */
7863 if ((ctxt->sax != NULL) &&
7864 (ctxt->sax->getParameterEntity != NULL))
7865 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7866 name);
7867 if (entity == NULL) {
7868 /*
7869 * [ WFC: Entity Declared ]
7870 * In a document without any DTD, a document with only an
7871 * internal DTD subset which contains no parameter entity
7872 * references, or a document with "standalone='yes'", ...
7873 * ... The declaration of a parameter entity must precede
7874 * any reference to it...
7875 */
7876 if ((ctxt->standalone == 1) ||
7877 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7878 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7879 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007880 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007881 /*
7882 * [ VC: Entity Declared ]
7883 * In a document with an external subset or external
7884 * parameter entities with "standalone='no'", ...
7885 * ... The declaration of a parameter entity must
7886 * precede any reference to it...
7887 */
7888 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7889 "PEReference: %%%s; not found\n",
7890 name, NULL);
7891 ctxt->valid = 0;
7892 }
7893 } else {
7894 /*
7895 * Internal checking in case the entity quest barfed
7896 */
7897 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7898 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7899 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7900 "%%%s; is not a parameter entity\n",
7901 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007902 }
7903 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007904 ctxt->hasPErefs = 1;
7905 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007906 *str = ptr;
7907 return(entity);
7908}
7909
7910/**
7911 * xmlParseDocTypeDecl:
7912 * @ctxt: an XML parser context
7913 *
7914 * parse a DOCTYPE declaration
7915 *
7916 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7917 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7918 *
7919 * [ VC: Root Element Type ]
7920 * The Name in the document type declaration must match the element
7921 * type of the root element.
7922 */
7923
7924void
7925xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007926 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007927 xmlChar *ExternalID = NULL;
7928 xmlChar *URI = NULL;
7929
7930 /*
7931 * We know that '<!DOCTYPE' has been detected.
7932 */
7933 SKIP(9);
7934
7935 SKIP_BLANKS;
7936
7937 /*
7938 * Parse the DOCTYPE name.
7939 */
7940 name = xmlParseName(ctxt);
7941 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007942 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7943 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007944 }
7945 ctxt->intSubName = name;
7946
7947 SKIP_BLANKS;
7948
7949 /*
7950 * Check for SystemID and ExternalID
7951 */
7952 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7953
7954 if ((URI != NULL) || (ExternalID != NULL)) {
7955 ctxt->hasExternalSubset = 1;
7956 }
7957 ctxt->extSubURI = URI;
7958 ctxt->extSubSystem = ExternalID;
7959
7960 SKIP_BLANKS;
7961
7962 /*
7963 * Create and update the internal subset.
7964 */
7965 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7966 (!ctxt->disableSAX))
7967 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7968
7969 /*
7970 * Is there any internal subset declarations ?
7971 * they are handled separately in xmlParseInternalSubset()
7972 */
7973 if (RAW == '[')
7974 return;
7975
7976 /*
7977 * We should be at the end of the DOCTYPE declaration.
7978 */
7979 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007980 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007981 }
7982 NEXT;
7983}
7984
7985/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007986 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007987 * @ctxt: an XML parser context
7988 *
7989 * parse the internal subset declaration
7990 *
7991 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7992 */
7993
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007994static void
Owen Taylor3473f882001-02-23 17:55:21 +00007995xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7996 /*
7997 * Is there any DTD definition ?
7998 */
7999 if (RAW == '[') {
8000 ctxt->instate = XML_PARSER_DTD;
8001 NEXT;
8002 /*
8003 * Parse the succession of Markup declarations and
8004 * PEReferences.
8005 * Subsequence (markupdecl | PEReference | S)*
8006 */
8007 while (RAW != ']') {
8008 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008009 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008010
8011 SKIP_BLANKS;
8012 xmlParseMarkupDecl(ctxt);
8013 xmlParsePEReference(ctxt);
8014
8015 /*
8016 * Pop-up of finished entities.
8017 */
8018 while ((RAW == 0) && (ctxt->inputNr > 1))
8019 xmlPopInput(ctxt);
8020
8021 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008022 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008023 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008024 break;
8025 }
8026 }
8027 if (RAW == ']') {
8028 NEXT;
8029 SKIP_BLANKS;
8030 }
8031 }
8032
8033 /*
8034 * We should be at the end of the DOCTYPE declaration.
8035 */
8036 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008037 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008038 }
8039 NEXT;
8040}
8041
Daniel Veillard81273902003-09-30 00:43:48 +00008042#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008043/**
8044 * xmlParseAttribute:
8045 * @ctxt: an XML parser context
8046 * @value: a xmlChar ** used to store the value of the attribute
8047 *
8048 * parse an attribute
8049 *
8050 * [41] Attribute ::= Name Eq AttValue
8051 *
8052 * [ WFC: No External Entity References ]
8053 * Attribute values cannot contain direct or indirect entity references
8054 * to external entities.
8055 *
8056 * [ WFC: No < in Attribute Values ]
8057 * The replacement text of any entity referred to directly or indirectly in
8058 * an attribute value (other than "&lt;") must not contain a <.
8059 *
8060 * [ VC: Attribute Value Type ]
8061 * The attribute must have been declared; the value must be of the type
8062 * declared for it.
8063 *
8064 * [25] Eq ::= S? '=' S?
8065 *
8066 * With namespace:
8067 *
8068 * [NS 11] Attribute ::= QName Eq AttValue
8069 *
8070 * Also the case QName == xmlns:??? is handled independently as a namespace
8071 * definition.
8072 *
8073 * Returns the attribute name, and the value in *value.
8074 */
8075
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008076const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008077xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008078 const xmlChar *name;
8079 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008080
8081 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008082 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008083 name = xmlParseName(ctxt);
8084 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008085 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008086 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008087 return(NULL);
8088 }
8089
8090 /*
8091 * read the value
8092 */
8093 SKIP_BLANKS;
8094 if (RAW == '=') {
8095 NEXT;
8096 SKIP_BLANKS;
8097 val = xmlParseAttValue(ctxt);
8098 ctxt->instate = XML_PARSER_CONTENT;
8099 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008100 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008101 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008102 return(NULL);
8103 }
8104
8105 /*
8106 * Check that xml:lang conforms to the specification
8107 * No more registered as an error, just generate a warning now
8108 * since this was deprecated in XML second edition
8109 */
8110 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8111 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008112 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8113 "Malformed value for xml:lang : %s\n",
8114 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008115 }
8116 }
8117
8118 /*
8119 * Check that xml:space conforms to the specification
8120 */
8121 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8122 if (xmlStrEqual(val, BAD_CAST "default"))
8123 *(ctxt->space) = 0;
8124 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8125 *(ctxt->space) = 1;
8126 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008127 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008128"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008129 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008130 }
8131 }
8132
8133 *value = val;
8134 return(name);
8135}
8136
8137/**
8138 * xmlParseStartTag:
8139 * @ctxt: an XML parser context
8140 *
8141 * parse a start of tag either for rule element or
8142 * EmptyElement. In both case we don't parse the tag closing chars.
8143 *
8144 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8145 *
8146 * [ WFC: Unique Att Spec ]
8147 * No attribute name may appear more than once in the same start-tag or
8148 * empty-element tag.
8149 *
8150 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8151 *
8152 * [ WFC: Unique Att Spec ]
8153 * No attribute name may appear more than once in the same start-tag or
8154 * empty-element tag.
8155 *
8156 * With namespace:
8157 *
8158 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8159 *
8160 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8161 *
8162 * Returns the element name parsed
8163 */
8164
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008165const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008166xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008167 const xmlChar *name;
8168 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008169 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008170 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008171 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008172 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008173 int i;
8174
8175 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008176 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008177
8178 name = xmlParseName(ctxt);
8179 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008180 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008181 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008182 return(NULL);
8183 }
8184
8185 /*
8186 * Now parse the attributes, it ends up with the ending
8187 *
8188 * (S Attribute)* S?
8189 */
8190 SKIP_BLANKS;
8191 GROW;
8192
Daniel Veillard21a0f912001-02-25 19:54:14 +00008193 while ((RAW != '>') &&
8194 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008195 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008196 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008197 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008198
8199 attname = xmlParseAttribute(ctxt, &attvalue);
8200 if ((attname != NULL) && (attvalue != NULL)) {
8201 /*
8202 * [ WFC: Unique Att Spec ]
8203 * No attribute name may appear more than once in the same
8204 * start-tag or empty-element tag.
8205 */
8206 for (i = 0; i < nbatts;i += 2) {
8207 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008208 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008209 xmlFree(attvalue);
8210 goto failed;
8211 }
8212 }
Owen Taylor3473f882001-02-23 17:55:21 +00008213 /*
8214 * Add the pair to atts
8215 */
8216 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008217 maxatts = 22; /* allow for 10 attrs by default */
8218 atts = (const xmlChar **)
8219 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008220 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008221 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008222 if (attvalue != NULL)
8223 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008224 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008225 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008226 ctxt->atts = atts;
8227 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008228 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008229 const xmlChar **n;
8230
Owen Taylor3473f882001-02-23 17:55:21 +00008231 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008232 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008233 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008234 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008235 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008236 if (attvalue != NULL)
8237 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008238 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008239 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008240 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008241 ctxt->atts = atts;
8242 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008243 }
8244 atts[nbatts++] = attname;
8245 atts[nbatts++] = attvalue;
8246 atts[nbatts] = NULL;
8247 atts[nbatts + 1] = NULL;
8248 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008249 if (attvalue != NULL)
8250 xmlFree(attvalue);
8251 }
8252
8253failed:
8254
Daniel Veillard3772de32002-12-17 10:31:45 +00008255 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008256 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8257 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008258 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008259 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8260 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008261 }
8262 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008263 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8264 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008265 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8266 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008267 break;
8268 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008269 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008270 GROW;
8271 }
8272
8273 /*
8274 * SAX: Start of Element !
8275 */
8276 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008277 (!ctxt->disableSAX)) {
8278 if (nbatts > 0)
8279 ctxt->sax->startElement(ctxt->userData, name, atts);
8280 else
8281 ctxt->sax->startElement(ctxt->userData, name, NULL);
8282 }
Owen Taylor3473f882001-02-23 17:55:21 +00008283
8284 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008285 /* Free only the content strings */
8286 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008287 if (atts[i] != NULL)
8288 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008289 }
8290 return(name);
8291}
8292
8293/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008294 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008295 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008296 * @line: line of the start tag
8297 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008298 *
8299 * parse an end of tag
8300 *
8301 * [42] ETag ::= '</' Name S? '>'
8302 *
8303 * With namespace
8304 *
8305 * [NS 9] ETag ::= '</' QName S? '>'
8306 */
8307
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008308static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008309xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008310 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008311
8312 GROW;
8313 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008314 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008315 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008316 return;
8317 }
8318 SKIP(2);
8319
Daniel Veillard46de64e2002-05-29 08:21:33 +00008320 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008321
8322 /*
8323 * We should definitely be at the ending "S? '>'" part
8324 */
8325 GROW;
8326 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008327 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008328 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008329 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008330 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008331
8332 /*
8333 * [ WFC: Element Type Match ]
8334 * The Name in an element's end-tag must match the element type in the
8335 * start-tag.
8336 *
8337 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008338 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008339 if (name == NULL) name = BAD_CAST "unparseable";
8340 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008341 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008342 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008343 }
8344
8345 /*
8346 * SAX: End of Tag
8347 */
8348 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8349 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008350 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008351
Daniel Veillarde57ec792003-09-10 10:50:59 +00008352 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008353 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008354 return;
8355}
8356
8357/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008358 * xmlParseEndTag:
8359 * @ctxt: an XML parser context
8360 *
8361 * parse an end of tag
8362 *
8363 * [42] ETag ::= '</' Name S? '>'
8364 *
8365 * With namespace
8366 *
8367 * [NS 9] ETag ::= '</' QName S? '>'
8368 */
8369
8370void
8371xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008372 xmlParseEndTag1(ctxt, 0);
8373}
Daniel Veillard81273902003-09-30 00:43:48 +00008374#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008375
8376/************************************************************************
8377 * *
8378 * SAX 2 specific operations *
8379 * *
8380 ************************************************************************/
8381
Daniel Veillard0fb18932003-09-07 09:14:37 +00008382/*
8383 * xmlGetNamespace:
8384 * @ctxt: an XML parser context
8385 * @prefix: the prefix to lookup
8386 *
8387 * Lookup the namespace name for the @prefix (which ca be NULL)
8388 * The prefix must come from the @ctxt->dict dictionnary
8389 *
8390 * Returns the namespace name or NULL if not bound
8391 */
8392static const xmlChar *
8393xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8394 int i;
8395
Daniel Veillarde57ec792003-09-10 10:50:59 +00008396 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008397 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008398 if (ctxt->nsTab[i] == prefix) {
8399 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8400 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008401 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008402 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008403 return(NULL);
8404}
8405
8406/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008407 * xmlParseQName:
8408 * @ctxt: an XML parser context
8409 * @prefix: pointer to store the prefix part
8410 *
8411 * parse an XML Namespace QName
8412 *
8413 * [6] QName ::= (Prefix ':')? LocalPart
8414 * [7] Prefix ::= NCName
8415 * [8] LocalPart ::= NCName
8416 *
8417 * Returns the Name parsed or NULL
8418 */
8419
8420static const xmlChar *
8421xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8422 const xmlChar *l, *p;
8423
8424 GROW;
8425
8426 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008427 if (l == NULL) {
8428 if (CUR == ':') {
8429 l = xmlParseName(ctxt);
8430 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008431 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8432 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008433 *prefix = NULL;
8434 return(l);
8435 }
8436 }
8437 return(NULL);
8438 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008439 if (CUR == ':') {
8440 NEXT;
8441 p = l;
8442 l = xmlParseNCName(ctxt);
8443 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008444 xmlChar *tmp;
8445
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008446 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8447 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008448 l = xmlParseNmtoken(ctxt);
8449 if (l == NULL)
8450 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8451 else {
8452 tmp = xmlBuildQName(l, p, NULL, 0);
8453 xmlFree((char *)l);
8454 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008455 p = xmlDictLookup(ctxt->dict, tmp, -1);
8456 if (tmp != NULL) xmlFree(tmp);
8457 *prefix = NULL;
8458 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008459 }
8460 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008461 xmlChar *tmp;
8462
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008463 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8464 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008465 NEXT;
8466 tmp = (xmlChar *) xmlParseName(ctxt);
8467 if (tmp != NULL) {
8468 tmp = xmlBuildQName(tmp, l, NULL, 0);
8469 l = xmlDictLookup(ctxt->dict, tmp, -1);
8470 if (tmp != NULL) xmlFree(tmp);
8471 *prefix = p;
8472 return(l);
8473 }
8474 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8475 l = xmlDictLookup(ctxt->dict, tmp, -1);
8476 if (tmp != NULL) xmlFree(tmp);
8477 *prefix = p;
8478 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008479 }
8480 *prefix = p;
8481 } else
8482 *prefix = NULL;
8483 return(l);
8484}
8485
8486/**
8487 * xmlParseQNameAndCompare:
8488 * @ctxt: an XML parser context
8489 * @name: the localname
8490 * @prefix: the prefix, if any.
8491 *
8492 * parse an XML name and compares for match
8493 * (specialized for endtag parsing)
8494 *
8495 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8496 * and the name for mismatch
8497 */
8498
8499static const xmlChar *
8500xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8501 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008502 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008503 const xmlChar *in;
8504 const xmlChar *ret;
8505 const xmlChar *prefix2;
8506
8507 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8508
8509 GROW;
8510 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008511
Daniel Veillard0fb18932003-09-07 09:14:37 +00008512 cmp = prefix;
8513 while (*in != 0 && *in == *cmp) {
8514 ++in;
8515 ++cmp;
8516 }
8517 if ((*cmp == 0) && (*in == ':')) {
8518 in++;
8519 cmp = name;
8520 while (*in != 0 && *in == *cmp) {
8521 ++in;
8522 ++cmp;
8523 }
William M. Brack76e95df2003-10-18 16:20:14 +00008524 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008525 /* success */
8526 ctxt->input->cur = in;
8527 return((const xmlChar*) 1);
8528 }
8529 }
8530 /*
8531 * all strings coms from the dictionary, equality can be done directly
8532 */
8533 ret = xmlParseQName (ctxt, &prefix2);
8534 if ((ret == name) && (prefix == prefix2))
8535 return((const xmlChar*) 1);
8536 return ret;
8537}
8538
8539/**
8540 * xmlParseAttValueInternal:
8541 * @ctxt: an XML parser context
8542 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008543 * @alloc: whether the attribute was reallocated as a new string
8544 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008545 *
8546 * parse a value for an attribute.
8547 * NOTE: if no normalization is needed, the routine will return pointers
8548 * directly from the data buffer.
8549 *
8550 * 3.3.3 Attribute-Value Normalization:
8551 * Before the value of an attribute is passed to the application or
8552 * checked for validity, the XML processor must normalize it as follows:
8553 * - a character reference is processed by appending the referenced
8554 * character to the attribute value
8555 * - an entity reference is processed by recursively processing the
8556 * replacement text of the entity
8557 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8558 * appending #x20 to the normalized value, except that only a single
8559 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8560 * parsed entity or the literal entity value of an internal parsed entity
8561 * - other characters are processed by appending them to the normalized value
8562 * If the declared value is not CDATA, then the XML processor must further
8563 * process the normalized attribute value by discarding any leading and
8564 * trailing space (#x20) characters, and by replacing sequences of space
8565 * (#x20) characters by a single space (#x20) character.
8566 * All attributes for which no declaration has been read should be treated
8567 * by a non-validating parser as if declared CDATA.
8568 *
8569 * Returns the AttValue parsed or NULL. The value has to be freed by the
8570 * caller if it was copied, this can be detected by val[*len] == 0.
8571 */
8572
8573static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008574xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8575 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008576{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008577 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008578 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008579 xmlChar *ret = NULL;
8580
8581 GROW;
8582 in = (xmlChar *) CUR_PTR;
8583 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008584 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008585 return (NULL);
8586 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008587 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008588
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008589 /*
8590 * try to handle in this routine the most common case where no
8591 * allocation of a new string is required and where content is
8592 * pure ASCII.
8593 */
8594 limit = *in++;
8595 end = ctxt->input->end;
8596 start = in;
8597 if (in >= end) {
8598 const xmlChar *oldbase = ctxt->input->base;
8599 GROW;
8600 if (oldbase != ctxt->input->base) {
8601 long delta = ctxt->input->base - oldbase;
8602 start = start + delta;
8603 in = in + delta;
8604 }
8605 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008606 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008607 if (normalize) {
8608 /*
8609 * Skip any leading spaces
8610 */
8611 while ((in < end) && (*in != limit) &&
8612 ((*in == 0x20) || (*in == 0x9) ||
8613 (*in == 0xA) || (*in == 0xD))) {
8614 in++;
8615 start = in;
8616 if (in >= end) {
8617 const xmlChar *oldbase = ctxt->input->base;
8618 GROW;
8619 if (oldbase != ctxt->input->base) {
8620 long delta = ctxt->input->base - oldbase;
8621 start = start + delta;
8622 in = in + delta;
8623 }
8624 end = ctxt->input->end;
8625 }
8626 }
8627 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8628 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8629 if ((*in++ == 0x20) && (*in == 0x20)) break;
8630 if (in >= end) {
8631 const xmlChar *oldbase = ctxt->input->base;
8632 GROW;
8633 if (oldbase != ctxt->input->base) {
8634 long delta = ctxt->input->base - oldbase;
8635 start = start + delta;
8636 in = in + delta;
8637 }
8638 end = ctxt->input->end;
8639 }
8640 }
8641 last = in;
8642 /*
8643 * skip the trailing blanks
8644 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008645 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008646 while ((in < end) && (*in != limit) &&
8647 ((*in == 0x20) || (*in == 0x9) ||
8648 (*in == 0xA) || (*in == 0xD))) {
8649 in++;
8650 if (in >= end) {
8651 const xmlChar *oldbase = ctxt->input->base;
8652 GROW;
8653 if (oldbase != ctxt->input->base) {
8654 long delta = ctxt->input->base - oldbase;
8655 start = start + delta;
8656 in = in + delta;
8657 last = last + delta;
8658 }
8659 end = ctxt->input->end;
8660 }
8661 }
8662 if (*in != limit) goto need_complex;
8663 } else {
8664 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8665 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8666 in++;
8667 if (in >= end) {
8668 const xmlChar *oldbase = ctxt->input->base;
8669 GROW;
8670 if (oldbase != ctxt->input->base) {
8671 long delta = ctxt->input->base - oldbase;
8672 start = start + delta;
8673 in = in + delta;
8674 }
8675 end = ctxt->input->end;
8676 }
8677 }
8678 last = in;
8679 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008680 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008681 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008682 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008683 *len = last - start;
8684 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008685 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008686 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008687 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008688 }
8689 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008690 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008691 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008692need_complex:
8693 if (alloc) *alloc = 1;
8694 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008695}
8696
8697/**
8698 * xmlParseAttribute2:
8699 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008700 * @pref: the element prefix
8701 * @elem: the element name
8702 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008703 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008704 * @len: an int * to save the length of the attribute
8705 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008706 *
8707 * parse an attribute in the new SAX2 framework.
8708 *
8709 * Returns the attribute name, and the value in *value, .
8710 */
8711
8712static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008713xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008714 const xmlChar * pref, const xmlChar * elem,
8715 const xmlChar ** prefix, xmlChar ** value,
8716 int *len, int *alloc)
8717{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008718 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008719 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008720 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008721
8722 *value = NULL;
8723 GROW;
8724 name = xmlParseQName(ctxt, prefix);
8725 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008726 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8727 "error parsing attribute name\n");
8728 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008729 }
8730
8731 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008732 * get the type if needed
8733 */
8734 if (ctxt->attsSpecial != NULL) {
8735 int type;
8736
8737 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008738 pref, elem, *prefix, name);
8739 if (type != 0)
8740 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008741 }
8742
8743 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008744 * read the value
8745 */
8746 SKIP_BLANKS;
8747 if (RAW == '=') {
8748 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008749 SKIP_BLANKS;
8750 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8751 if (normalize) {
8752 /*
8753 * Sometimes a second normalisation pass for spaces is needed
8754 * but that only happens if charrefs or entities refernces
8755 * have been used in the attribute value, i.e. the attribute
8756 * value have been extracted in an allocated string already.
8757 */
8758 if (*alloc) {
8759 const xmlChar *val2;
8760
8761 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008762 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008763 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008764 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008765 }
8766 }
8767 }
8768 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008769 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008770 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8771 "Specification mandate value for attribute %s\n",
8772 name);
8773 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008774 }
8775
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008776 if (*prefix == ctxt->str_xml) {
8777 /*
8778 * Check that xml:lang conforms to the specification
8779 * No more registered as an error, just generate a warning now
8780 * since this was deprecated in XML second edition
8781 */
8782 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8783 internal_val = xmlStrndup(val, *len);
8784 if (!xmlCheckLanguageID(internal_val)) {
8785 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8786 "Malformed value for xml:lang : %s\n",
8787 internal_val, NULL);
8788 }
8789 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008790
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008791 /*
8792 * Check that xml:space conforms to the specification
8793 */
8794 if (xmlStrEqual(name, BAD_CAST "space")) {
8795 internal_val = xmlStrndup(val, *len);
8796 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8797 *(ctxt->space) = 0;
8798 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8799 *(ctxt->space) = 1;
8800 else {
8801 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8802 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8803 internal_val, NULL);
8804 }
8805 }
8806 if (internal_val) {
8807 xmlFree(internal_val);
8808 }
8809 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008810
8811 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008812 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008813}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008814/**
8815 * xmlParseStartTag2:
8816 * @ctxt: an XML parser context
8817 *
8818 * parse a start of tag either for rule element or
8819 * EmptyElement. In both case we don't parse the tag closing chars.
8820 * This routine is called when running SAX2 parsing
8821 *
8822 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8823 *
8824 * [ WFC: Unique Att Spec ]
8825 * No attribute name may appear more than once in the same start-tag or
8826 * empty-element tag.
8827 *
8828 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8829 *
8830 * [ WFC: Unique Att Spec ]
8831 * No attribute name may appear more than once in the same start-tag or
8832 * empty-element tag.
8833 *
8834 * With namespace:
8835 *
8836 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8837 *
8838 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8839 *
8840 * Returns the element name parsed
8841 */
8842
8843static const xmlChar *
8844xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008845 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008846 const xmlChar *localname;
8847 const xmlChar *prefix;
8848 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008849 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008850 const xmlChar *nsname;
8851 xmlChar *attvalue;
8852 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008853 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008854 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008855 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008856 const xmlChar *base;
8857 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008858 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859
8860 if (RAW != '<') return(NULL);
8861 NEXT1;
8862
8863 /*
8864 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8865 * point since the attribute values may be stored as pointers to
8866 * the buffer and calling SHRINK would destroy them !
8867 * The Shrinking is only possible once the full set of attribute
8868 * callbacks have been done.
8869 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008870reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008871 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008872 base = ctxt->input->base;
8873 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008874 oldline = ctxt->input->line;
8875 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008876 nbatts = 0;
8877 nratts = 0;
8878 nbdef = 0;
8879 nbNs = 0;
8880 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008881 /* Forget any namespaces added during an earlier parse of this element. */
8882 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008883
8884 localname = xmlParseQName(ctxt, &prefix);
8885 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008886 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8887 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008888 return(NULL);
8889 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008890 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008891
8892 /*
8893 * Now parse the attributes, it ends up with the ending
8894 *
8895 * (S Attribute)* S?
8896 */
8897 SKIP_BLANKS;
8898 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008899 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008900
8901 while ((RAW != '>') &&
8902 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008903 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008904 const xmlChar *q = CUR_PTR;
8905 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008906 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008907
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008908 attname = xmlParseAttribute2(ctxt, prefix, localname,
8909 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008910 if (ctxt->input->base != base) {
8911 if ((attvalue != NULL) && (alloc != 0))
8912 xmlFree(attvalue);
8913 attvalue = NULL;
8914 goto base_changed;
8915 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008916 if ((attname != NULL) && (attvalue != NULL)) {
8917 if (len < 0) len = xmlStrlen(attvalue);
8918 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008919 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8920 xmlURIPtr uri;
8921
8922 if (*URL != 0) {
8923 uri = xmlParseURI((const char *) URL);
8924 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008925 xmlNsErr(ctxt, XML_WAR_NS_URI,
8926 "xmlns: '%s' is not a valid URI\n",
8927 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008928 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008929 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008930 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8931 "xmlns: URI %s is not absolute\n",
8932 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008933 }
8934 xmlFreeURI(uri);
8935 }
Daniel Veillard37334572008-07-31 08:20:02 +00008936 if (URL == ctxt->str_xml_ns) {
8937 if (attname != ctxt->str_xml) {
8938 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8939 "xml namespace URI cannot be the default namespace\n",
8940 NULL, NULL, NULL);
8941 }
8942 goto skip_default_ns;
8943 }
8944 if ((len == 29) &&
8945 (xmlStrEqual(URL,
8946 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8947 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8948 "reuse of the xmlns namespace name is forbidden\n",
8949 NULL, NULL, NULL);
8950 goto skip_default_ns;
8951 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008952 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008953 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008954 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008955 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008956 for (j = 1;j <= nbNs;j++)
8957 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8958 break;
8959 if (j <= nbNs)
8960 xmlErrAttributeDup(ctxt, NULL, attname);
8961 else
8962 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008963skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008964 if (alloc != 0) xmlFree(attvalue);
8965 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008966 continue;
8967 }
8968 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008969 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8970 xmlURIPtr uri;
8971
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008972 if (attname == ctxt->str_xml) {
8973 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008974 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8975 "xml namespace prefix mapped to wrong URI\n",
8976 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008977 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008978 /*
8979 * Do not keep a namespace definition node
8980 */
Daniel Veillard37334572008-07-31 08:20:02 +00008981 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008982 }
Daniel Veillard37334572008-07-31 08:20:02 +00008983 if (URL == ctxt->str_xml_ns) {
8984 if (attname != ctxt->str_xml) {
8985 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8986 "xml namespace URI mapped to wrong prefix\n",
8987 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008988 }
Daniel Veillard37334572008-07-31 08:20:02 +00008989 goto skip_ns;
8990 }
8991 if (attname == ctxt->str_xmlns) {
8992 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8993 "redefinition of the xmlns prefix is forbidden\n",
8994 NULL, NULL, NULL);
8995 goto skip_ns;
8996 }
8997 if ((len == 29) &&
8998 (xmlStrEqual(URL,
8999 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9000 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9001 "reuse of the xmlns namespace name is forbidden\n",
9002 NULL, NULL, NULL);
9003 goto skip_ns;
9004 }
9005 if ((URL == NULL) || (URL[0] == 0)) {
9006 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9007 "xmlns:%s: Empty XML namespace is not allowed\n",
9008 attname, NULL, NULL);
9009 goto skip_ns;
9010 } else {
9011 uri = xmlParseURI((const char *) URL);
9012 if (uri == NULL) {
9013 xmlNsErr(ctxt, XML_WAR_NS_URI,
9014 "xmlns:%s: '%s' is not a valid URI\n",
9015 attname, URL, NULL);
9016 } else {
9017 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9018 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9019 "xmlns:%s: URI %s is not absolute\n",
9020 attname, URL, NULL);
9021 }
9022 xmlFreeURI(uri);
9023 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009024 }
9025
Daniel Veillard0fb18932003-09-07 09:14:37 +00009026 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009027 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009028 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009029 for (j = 1;j <= nbNs;j++)
9030 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9031 break;
9032 if (j <= nbNs)
9033 xmlErrAttributeDup(ctxt, aprefix, attname);
9034 else
9035 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009036skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009037 if (alloc != 0) xmlFree(attvalue);
9038 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009039 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009040 continue;
9041 }
9042
9043 /*
9044 * Add the pair to atts
9045 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009046 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9047 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009048 if (attvalue[len] == 0)
9049 xmlFree(attvalue);
9050 goto failed;
9051 }
9052 maxatts = ctxt->maxatts;
9053 atts = ctxt->atts;
9054 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009055 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009056 atts[nbatts++] = attname;
9057 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009058 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009059 atts[nbatts++] = attvalue;
9060 attvalue += len;
9061 atts[nbatts++] = attvalue;
9062 /*
9063 * tag if some deallocation is needed
9064 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009065 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009066 } else {
9067 if ((attvalue != NULL) && (attvalue[len] == 0))
9068 xmlFree(attvalue);
9069 }
9070
Daniel Veillard37334572008-07-31 08:20:02 +00009071failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009072
9073 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00009074 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009075 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9076 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009077 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009078 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9079 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009080 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009081 }
9082 SKIP_BLANKS;
9083 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9084 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009086 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009087 break;
9088 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009089 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009090 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009091 }
9092
Daniel Veillard0fb18932003-09-07 09:14:37 +00009093 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009094 * The attributes defaulting
9095 */
9096 if (ctxt->attsDefault != NULL) {
9097 xmlDefAttrsPtr defaults;
9098
9099 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9100 if (defaults != NULL) {
9101 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009102 attname = defaults->values[5 * i];
9103 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009104
9105 /*
9106 * special work for namespaces defaulted defs
9107 */
9108 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9109 /*
9110 * check that it's not a defined namespace
9111 */
9112 for (j = 1;j <= nbNs;j++)
9113 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9114 break;
9115 if (j <= nbNs) continue;
9116
9117 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009118 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009119 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009120 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009121 nbNs++;
9122 }
9123 } else if (aprefix == ctxt->str_xmlns) {
9124 /*
9125 * check that it's not a defined namespace
9126 */
9127 for (j = 1;j <= nbNs;j++)
9128 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9129 break;
9130 if (j <= nbNs) continue;
9131
9132 nsname = xmlGetNamespace(ctxt, attname);
9133 if (nsname != defaults->values[2]) {
9134 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009135 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009136 nbNs++;
9137 }
9138 } else {
9139 /*
9140 * check that it's not a defined attribute
9141 */
9142 for (j = 0;j < nbatts;j+=5) {
9143 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9144 break;
9145 }
9146 if (j < nbatts) continue;
9147
9148 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9149 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009150 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009151 }
9152 maxatts = ctxt->maxatts;
9153 atts = ctxt->atts;
9154 }
9155 atts[nbatts++] = attname;
9156 atts[nbatts++] = aprefix;
9157 if (aprefix == NULL)
9158 atts[nbatts++] = NULL;
9159 else
9160 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009161 atts[nbatts++] = defaults->values[5 * i + 2];
9162 atts[nbatts++] = defaults->values[5 * i + 3];
9163 if ((ctxt->standalone == 1) &&
9164 (defaults->values[5 * i + 4] != NULL)) {
9165 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9166 "standalone: attribute %s on %s defaulted from external subset\n",
9167 attname, localname);
9168 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009169 nbdef++;
9170 }
9171 }
9172 }
9173 }
9174
Daniel Veillarde70c8772003-11-25 07:21:18 +00009175 /*
9176 * The attributes checkings
9177 */
9178 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009179 /*
9180 * The default namespace does not apply to attribute names.
9181 */
9182 if (atts[i + 1] != NULL) {
9183 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9184 if (nsname == NULL) {
9185 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9186 "Namespace prefix %s for %s on %s is not defined\n",
9187 atts[i + 1], atts[i], localname);
9188 }
9189 atts[i + 2] = nsname;
9190 } else
9191 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009192 /*
9193 * [ WFC: Unique Att Spec ]
9194 * No attribute name may appear more than once in the same
9195 * start-tag or empty-element tag.
9196 * As extended by the Namespace in XML REC.
9197 */
9198 for (j = 0; j < i;j += 5) {
9199 if (atts[i] == atts[j]) {
9200 if (atts[i+1] == atts[j+1]) {
9201 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9202 break;
9203 }
9204 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9205 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9206 "Namespaced Attribute %s in '%s' redefined\n",
9207 atts[i], nsname, NULL);
9208 break;
9209 }
9210 }
9211 }
9212 }
9213
Daniel Veillarde57ec792003-09-10 10:50:59 +00009214 nsname = xmlGetNamespace(ctxt, prefix);
9215 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009216 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9217 "Namespace prefix %s on %s is not defined\n",
9218 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009219 }
9220 *pref = prefix;
9221 *URI = nsname;
9222
9223 /*
9224 * SAX: Start of Element !
9225 */
9226 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9227 (!ctxt->disableSAX)) {
9228 if (nbNs > 0)
9229 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9230 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9231 nbatts / 5, nbdef, atts);
9232 else
9233 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9234 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9235 }
9236
9237 /*
9238 * Free up attribute allocated strings if needed
9239 */
9240 if (attval != 0) {
9241 for (i = 3,j = 0; j < nratts;i += 5,j++)
9242 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9243 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009244 }
9245
9246 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009247
9248base_changed:
9249 /*
9250 * the attribute strings are valid iif the base didn't changed
9251 */
9252 if (attval != 0) {
9253 for (i = 3,j = 0; j < nratts;i += 5,j++)
9254 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9255 xmlFree((xmlChar *) atts[i]);
9256 }
9257 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009258 ctxt->input->line = oldline;
9259 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009260 if (ctxt->wellFormed == 1) {
9261 goto reparse;
9262 }
9263 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009264}
9265
9266/**
9267 * xmlParseEndTag2:
9268 * @ctxt: an XML parser context
9269 * @line: line of the start tag
9270 * @nsNr: number of namespaces on the start tag
9271 *
9272 * parse an end of tag
9273 *
9274 * [42] ETag ::= '</' Name S? '>'
9275 *
9276 * With namespace
9277 *
9278 * [NS 9] ETag ::= '</' QName S? '>'
9279 */
9280
9281static void
9282xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009283 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009284 const xmlChar *name;
9285
9286 GROW;
9287 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009288 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009289 return;
9290 }
9291 SKIP(2);
9292
William M. Brack13dfa872004-09-18 04:52:08 +00009293 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009294 if (ctxt->input->cur[tlen] == '>') {
9295 ctxt->input->cur += tlen + 1;
9296 goto done;
9297 }
9298 ctxt->input->cur += tlen;
9299 name = (xmlChar*)1;
9300 } else {
9301 if (prefix == NULL)
9302 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9303 else
9304 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9305 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009306
9307 /*
9308 * We should definitely be at the ending "S? '>'" part
9309 */
9310 GROW;
9311 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009312 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009313 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009314 } else
9315 NEXT1;
9316
9317 /*
9318 * [ WFC: Element Type Match ]
9319 * The Name in an element's end-tag must match the element type in the
9320 * start-tag.
9321 *
9322 */
9323 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009324 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009325 if ((line == 0) && (ctxt->node != NULL))
9326 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009327 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009328 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009329 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009330 }
9331
9332 /*
9333 * SAX: End of Tag
9334 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009335done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009336 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9337 (!ctxt->disableSAX))
9338 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9339
Daniel Veillard0fb18932003-09-07 09:14:37 +00009340 spacePop(ctxt);
9341 if (nsNr != 0)
9342 nsPop(ctxt, nsNr);
9343 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009344}
9345
9346/**
Owen Taylor3473f882001-02-23 17:55:21 +00009347 * xmlParseCDSect:
9348 * @ctxt: an XML parser context
9349 *
9350 * Parse escaped pure raw content.
9351 *
9352 * [18] CDSect ::= CDStart CData CDEnd
9353 *
9354 * [19] CDStart ::= '<![CDATA['
9355 *
9356 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9357 *
9358 * [21] CDEnd ::= ']]>'
9359 */
9360void
9361xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9362 xmlChar *buf = NULL;
9363 int len = 0;
9364 int size = XML_PARSER_BUFFER_SIZE;
9365 int r, rl;
9366 int s, sl;
9367 int cur, l;
9368 int count = 0;
9369
Daniel Veillard8f597c32003-10-06 08:19:27 +00009370 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009371 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009372 SKIP(9);
9373 } else
9374 return;
9375
9376 ctxt->instate = XML_PARSER_CDATA_SECTION;
9377 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009378 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009379 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009380 ctxt->instate = XML_PARSER_CONTENT;
9381 return;
9382 }
9383 NEXTL(rl);
9384 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009385 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009386 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009387 ctxt->instate = XML_PARSER_CONTENT;
9388 return;
9389 }
9390 NEXTL(sl);
9391 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009392 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009393 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009394 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009395 return;
9396 }
William M. Brack871611b2003-10-18 04:53:14 +00009397 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009398 ((r != ']') || (s != ']') || (cur != '>'))) {
9399 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009400 xmlChar *tmp;
9401
Owen Taylor3473f882001-02-23 17:55:21 +00009402 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009403 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9404 if (tmp == NULL) {
9405 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009406 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009407 return;
9408 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009409 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009410 }
9411 COPY_BUF(rl,buf,len,r);
9412 r = s;
9413 rl = sl;
9414 s = cur;
9415 sl = l;
9416 count++;
9417 if (count > 50) {
9418 GROW;
9419 count = 0;
9420 }
9421 NEXTL(l);
9422 cur = CUR_CHAR(l);
9423 }
9424 buf[len] = 0;
9425 ctxt->instate = XML_PARSER_CONTENT;
9426 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009427 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009428 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009429 xmlFree(buf);
9430 return;
9431 }
9432 NEXTL(l);
9433
9434 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009435 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009436 */
9437 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9438 if (ctxt->sax->cdataBlock != NULL)
9439 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009440 else if (ctxt->sax->characters != NULL)
9441 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009442 }
9443 xmlFree(buf);
9444}
9445
9446/**
9447 * xmlParseContent:
9448 * @ctxt: an XML parser context
9449 *
9450 * Parse a content:
9451 *
9452 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9453 */
9454
9455void
9456xmlParseContent(xmlParserCtxtPtr ctxt) {
9457 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009458 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009459 ((RAW != '<') || (NXT(1) != '/')) &&
9460 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009461 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009462 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009463 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009464
9465 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009466 * First case : a Processing Instruction.
9467 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009468 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009469 xmlParsePI(ctxt);
9470 }
9471
9472 /*
9473 * Second case : a CDSection
9474 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009475 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009476 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009477 xmlParseCDSect(ctxt);
9478 }
9479
9480 /*
9481 * Third case : a comment
9482 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009483 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009484 (NXT(2) == '-') && (NXT(3) == '-')) {
9485 xmlParseComment(ctxt);
9486 ctxt->instate = XML_PARSER_CONTENT;
9487 }
9488
9489 /*
9490 * Fourth case : a sub-element.
9491 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009492 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009493 xmlParseElement(ctxt);
9494 }
9495
9496 /*
9497 * Fifth case : a reference. If if has not been resolved,
9498 * parsing returns it's Name, create the node
9499 */
9500
Daniel Veillard21a0f912001-02-25 19:54:14 +00009501 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009502 xmlParseReference(ctxt);
9503 }
9504
9505 /*
9506 * Last case, text. Note that References are handled directly.
9507 */
9508 else {
9509 xmlParseCharData(ctxt, 0);
9510 }
9511
9512 GROW;
9513 /*
9514 * Pop-up of finished entities.
9515 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009516 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009517 xmlPopInput(ctxt);
9518 SHRINK;
9519
Daniel Veillardfdc91562002-07-01 21:52:03 +00009520 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009521 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9522 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009523 ctxt->instate = XML_PARSER_EOF;
9524 break;
9525 }
9526 }
9527}
9528
9529/**
9530 * xmlParseElement:
9531 * @ctxt: an XML parser context
9532 *
9533 * parse an XML element, this is highly recursive
9534 *
9535 * [39] element ::= EmptyElemTag | STag content ETag
9536 *
9537 * [ WFC: Element Type Match ]
9538 * The Name in an element's end-tag must match the element type in the
9539 * start-tag.
9540 *
Owen Taylor3473f882001-02-23 17:55:21 +00009541 */
9542
9543void
9544xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009545 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009546 const xmlChar *prefix = NULL;
9547 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009548 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009549 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009550 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009551 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009552
Daniel Veillard8915c152008-08-26 13:05:34 +00009553 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9554 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9555 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9556 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9557 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009558 ctxt->instate = XML_PARSER_EOF;
9559 return;
9560 }
9561
Owen Taylor3473f882001-02-23 17:55:21 +00009562 /* Capture start position */
9563 if (ctxt->record_info) {
9564 node_info.begin_pos = ctxt->input->consumed +
9565 (CUR_PTR - ctxt->input->base);
9566 node_info.begin_line = ctxt->input->line;
9567 }
9568
9569 if (ctxt->spaceNr == 0)
9570 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009571 else if (*ctxt->space == -2)
9572 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009573 else
9574 spacePush(ctxt, *ctxt->space);
9575
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009576 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009577#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009578 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009579#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009580 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009581#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009582 else
9583 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009584#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009585 if (name == NULL) {
9586 spacePop(ctxt);
9587 return;
9588 }
9589 namePush(ctxt, name);
9590 ret = ctxt->node;
9591
Daniel Veillard4432df22003-09-28 18:58:27 +00009592#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009593 /*
9594 * [ VC: Root Element Type ]
9595 * The Name in the document type declaration must match the element
9596 * type of the root element.
9597 */
9598 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9599 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9600 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009601#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009602
9603 /*
9604 * Check for an Empty Element.
9605 */
9606 if ((RAW == '/') && (NXT(1) == '>')) {
9607 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009608 if (ctxt->sax2) {
9609 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9610 (!ctxt->disableSAX))
9611 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009612#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009613 } else {
9614 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9615 (!ctxt->disableSAX))
9616 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009617#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009618 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009619 namePop(ctxt);
9620 spacePop(ctxt);
9621 if (nsNr != ctxt->nsNr)
9622 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009623 if ( ret != NULL && ctxt->record_info ) {
9624 node_info.end_pos = ctxt->input->consumed +
9625 (CUR_PTR - ctxt->input->base);
9626 node_info.end_line = ctxt->input->line;
9627 node_info.node = ret;
9628 xmlParserAddNodeInfo(ctxt, &node_info);
9629 }
9630 return;
9631 }
9632 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009633 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009634 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009635 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9636 "Couldn't find end of Start Tag %s line %d\n",
9637 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009638
9639 /*
9640 * end of parsing of this node.
9641 */
9642 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009643 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009644 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009645 if (nsNr != ctxt->nsNr)
9646 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009647
9648 /*
9649 * Capture end position and add node
9650 */
9651 if ( ret != NULL && ctxt->record_info ) {
9652 node_info.end_pos = ctxt->input->consumed +
9653 (CUR_PTR - ctxt->input->base);
9654 node_info.end_line = ctxt->input->line;
9655 node_info.node = ret;
9656 xmlParserAddNodeInfo(ctxt, &node_info);
9657 }
9658 return;
9659 }
9660
9661 /*
9662 * Parse the content of the element:
9663 */
9664 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009665 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009666 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009667 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009668 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009669
9670 /*
9671 * end of parsing of this node.
9672 */
9673 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009674 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009675 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009676 if (nsNr != ctxt->nsNr)
9677 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009678 return;
9679 }
9680
9681 /*
9682 * parse the end of tag: '</' should be here.
9683 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009684 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009685 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009686 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009687 }
9688#ifdef LIBXML_SAX1_ENABLED
9689 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009690 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009691#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009692
9693 /*
9694 * Capture end position and add node
9695 */
9696 if ( ret != NULL && ctxt->record_info ) {
9697 node_info.end_pos = ctxt->input->consumed +
9698 (CUR_PTR - ctxt->input->base);
9699 node_info.end_line = ctxt->input->line;
9700 node_info.node = ret;
9701 xmlParserAddNodeInfo(ctxt, &node_info);
9702 }
9703}
9704
9705/**
9706 * xmlParseVersionNum:
9707 * @ctxt: an XML parser context
9708 *
9709 * parse the XML version value.
9710 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009711 * [26] VersionNum ::= '1.' [0-9]+
9712 *
9713 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009714 *
9715 * Returns the string giving the XML version number, or NULL
9716 */
9717xmlChar *
9718xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9719 xmlChar *buf = NULL;
9720 int len = 0;
9721 int size = 10;
9722 xmlChar cur;
9723
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009724 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009725 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009726 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009727 return(NULL);
9728 }
9729 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009730 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009731 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009732 return(NULL);
9733 }
9734 buf[len++] = cur;
9735 NEXT;
9736 cur=CUR;
9737 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009738 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009739 return(NULL);
9740 }
9741 buf[len++] = cur;
9742 NEXT;
9743 cur=CUR;
9744 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009745 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009746 xmlChar *tmp;
9747
Owen Taylor3473f882001-02-23 17:55:21 +00009748 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009749 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9750 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009751 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009752 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009753 return(NULL);
9754 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009755 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009756 }
9757 buf[len++] = cur;
9758 NEXT;
9759 cur=CUR;
9760 }
9761 buf[len] = 0;
9762 return(buf);
9763}
9764
9765/**
9766 * xmlParseVersionInfo:
9767 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009768 *
Owen Taylor3473f882001-02-23 17:55:21 +00009769 * parse the XML version.
9770 *
9771 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009772 *
Owen Taylor3473f882001-02-23 17:55:21 +00009773 * [25] Eq ::= S? '=' S?
9774 *
9775 * Returns the version string, e.g. "1.0"
9776 */
9777
9778xmlChar *
9779xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9780 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009781
Daniel Veillarda07050d2003-10-19 14:46:32 +00009782 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009783 SKIP(7);
9784 SKIP_BLANKS;
9785 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009786 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009787 return(NULL);
9788 }
9789 NEXT;
9790 SKIP_BLANKS;
9791 if (RAW == '"') {
9792 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009793 version = xmlParseVersionNum(ctxt);
9794 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009795 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009796 } else
9797 NEXT;
9798 } else if (RAW == '\''){
9799 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009800 version = xmlParseVersionNum(ctxt);
9801 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009802 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009803 } else
9804 NEXT;
9805 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009806 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009807 }
9808 }
9809 return(version);
9810}
9811
9812/**
9813 * xmlParseEncName:
9814 * @ctxt: an XML parser context
9815 *
9816 * parse the XML encoding name
9817 *
9818 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9819 *
9820 * Returns the encoding name value or NULL
9821 */
9822xmlChar *
9823xmlParseEncName(xmlParserCtxtPtr ctxt) {
9824 xmlChar *buf = NULL;
9825 int len = 0;
9826 int size = 10;
9827 xmlChar cur;
9828
9829 cur = CUR;
9830 if (((cur >= 'a') && (cur <= 'z')) ||
9831 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009832 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009833 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009834 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009835 return(NULL);
9836 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009837
Owen Taylor3473f882001-02-23 17:55:21 +00009838 buf[len++] = cur;
9839 NEXT;
9840 cur = CUR;
9841 while (((cur >= 'a') && (cur <= 'z')) ||
9842 ((cur >= 'A') && (cur <= 'Z')) ||
9843 ((cur >= '0') && (cur <= '9')) ||
9844 (cur == '.') || (cur == '_') ||
9845 (cur == '-')) {
9846 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009847 xmlChar *tmp;
9848
Owen Taylor3473f882001-02-23 17:55:21 +00009849 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009850 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9851 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009852 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009853 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009854 return(NULL);
9855 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009856 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009857 }
9858 buf[len++] = cur;
9859 NEXT;
9860 cur = CUR;
9861 if (cur == 0) {
9862 SHRINK;
9863 GROW;
9864 cur = CUR;
9865 }
9866 }
9867 buf[len] = 0;
9868 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009869 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009870 }
9871 return(buf);
9872}
9873
9874/**
9875 * xmlParseEncodingDecl:
9876 * @ctxt: an XML parser context
9877 *
9878 * parse the XML encoding declaration
9879 *
9880 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9881 *
9882 * this setups the conversion filters.
9883 *
9884 * Returns the encoding value or NULL
9885 */
9886
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009887const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009888xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9889 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009890
9891 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009892 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009893 SKIP(8);
9894 SKIP_BLANKS;
9895 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009896 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009897 return(NULL);
9898 }
9899 NEXT;
9900 SKIP_BLANKS;
9901 if (RAW == '"') {
9902 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009903 encoding = xmlParseEncName(ctxt);
9904 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009905 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009906 } else
9907 NEXT;
9908 } else if (RAW == '\''){
9909 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009910 encoding = xmlParseEncName(ctxt);
9911 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009912 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009913 } else
9914 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009915 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009916 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009917 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009918 /*
9919 * UTF-16 encoding stwich has already taken place at this stage,
9920 * more over the little-endian/big-endian selection is already done
9921 */
9922 if ((encoding != NULL) &&
9923 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9924 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009925 /*
9926 * If no encoding was passed to the parser, that we are
9927 * using UTF-16 and no decoder is present i.e. the
9928 * document is apparently UTF-8 compatible, then raise an
9929 * encoding mismatch fatal error
9930 */
9931 if ((ctxt->encoding == NULL) &&
9932 (ctxt->input->buf != NULL) &&
9933 (ctxt->input->buf->encoder == NULL)) {
9934 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9935 "Document labelled UTF-16 but has UTF-8 content\n");
9936 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009937 if (ctxt->encoding != NULL)
9938 xmlFree((xmlChar *) ctxt->encoding);
9939 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009940 }
9941 /*
9942 * UTF-8 encoding is handled natively
9943 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009944 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009945 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9946 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009947 if (ctxt->encoding != NULL)
9948 xmlFree((xmlChar *) ctxt->encoding);
9949 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009950 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009951 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009952 xmlCharEncodingHandlerPtr handler;
9953
9954 if (ctxt->input->encoding != NULL)
9955 xmlFree((xmlChar *) ctxt->input->encoding);
9956 ctxt->input->encoding = encoding;
9957
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009958 handler = xmlFindCharEncodingHandler((const char *) encoding);
9959 if (handler != NULL) {
9960 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009961 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009962 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009963 "Unsupported encoding %s\n", encoding);
9964 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009965 }
9966 }
9967 }
9968 return(encoding);
9969}
9970
9971/**
9972 * xmlParseSDDecl:
9973 * @ctxt: an XML parser context
9974 *
9975 * parse the XML standalone declaration
9976 *
9977 * [32] SDDecl ::= S 'standalone' Eq
9978 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9979 *
9980 * [ VC: Standalone Document Declaration ]
9981 * TODO The standalone document declaration must have the value "no"
9982 * if any external markup declarations contain declarations of:
9983 * - attributes with default values, if elements to which these
9984 * attributes apply appear in the document without specifications
9985 * of values for these attributes, or
9986 * - entities (other than amp, lt, gt, apos, quot), if references
9987 * to those entities appear in the document, or
9988 * - attributes with values subject to normalization, where the
9989 * attribute appears in the document with a value which will change
9990 * as a result of normalization, or
9991 * - element types with element content, if white space occurs directly
9992 * within any instance of those types.
9993 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009994 * Returns:
9995 * 1 if standalone="yes"
9996 * 0 if standalone="no"
9997 * -2 if standalone attribute is missing or invalid
9998 * (A standalone value of -2 means that the XML declaration was found,
9999 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010000 */
10001
10002int
10003xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010004 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010005
10006 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010007 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010008 SKIP(10);
10009 SKIP_BLANKS;
10010 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010011 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010012 return(standalone);
10013 }
10014 NEXT;
10015 SKIP_BLANKS;
10016 if (RAW == '\''){
10017 NEXT;
10018 if ((RAW == 'n') && (NXT(1) == 'o')) {
10019 standalone = 0;
10020 SKIP(2);
10021 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10022 (NXT(2) == 's')) {
10023 standalone = 1;
10024 SKIP(3);
10025 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010026 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010027 }
10028 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010029 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010030 } else
10031 NEXT;
10032 } else if (RAW == '"'){
10033 NEXT;
10034 if ((RAW == 'n') && (NXT(1) == 'o')) {
10035 standalone = 0;
10036 SKIP(2);
10037 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10038 (NXT(2) == 's')) {
10039 standalone = 1;
10040 SKIP(3);
10041 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010042 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010043 }
10044 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010045 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010046 } else
10047 NEXT;
10048 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010049 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010050 }
10051 }
10052 return(standalone);
10053}
10054
10055/**
10056 * xmlParseXMLDecl:
10057 * @ctxt: an XML parser context
10058 *
10059 * parse an XML declaration header
10060 *
10061 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10062 */
10063
10064void
10065xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10066 xmlChar *version;
10067
10068 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010069 * This value for standalone indicates that the document has an
10070 * XML declaration but it does not have a standalone attribute.
10071 * It will be overwritten later if a standalone attribute is found.
10072 */
10073 ctxt->input->standalone = -2;
10074
10075 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010076 * We know that '<?xml' is here.
10077 */
10078 SKIP(5);
10079
William M. Brack76e95df2003-10-18 16:20:14 +000010080 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010081 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10082 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010083 }
10084 SKIP_BLANKS;
10085
10086 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010087 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010088 */
10089 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010090 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010091 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010092 } else {
10093 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10094 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010095 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010096 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010097 if (ctxt->options & XML_PARSE_OLD10) {
10098 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10099 "Unsupported version '%s'\n",
10100 version);
10101 } else {
10102 if ((version[0] == '1') && ((version[1] == '.'))) {
10103 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10104 "Unsupported version '%s'\n",
10105 version, NULL);
10106 } else {
10107 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10108 "Unsupported version '%s'\n",
10109 version);
10110 }
10111 }
Daniel Veillard19840942001-11-29 16:11:38 +000010112 }
10113 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010114 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010115 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010116 }
Owen Taylor3473f882001-02-23 17:55:21 +000010117
10118 /*
10119 * We may have the encoding declaration
10120 */
William M. Brack76e95df2003-10-18 16:20:14 +000010121 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010122 if ((RAW == '?') && (NXT(1) == '>')) {
10123 SKIP(2);
10124 return;
10125 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010126 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010127 }
10128 xmlParseEncodingDecl(ctxt);
10129 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10130 /*
10131 * The XML REC instructs us to stop parsing right here
10132 */
10133 return;
10134 }
10135
10136 /*
10137 * We may have the standalone status.
10138 */
William M. Brack76e95df2003-10-18 16:20:14 +000010139 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010140 if ((RAW == '?') && (NXT(1) == '>')) {
10141 SKIP(2);
10142 return;
10143 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010144 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010145 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010146
10147 /*
10148 * We can grow the input buffer freely at that point
10149 */
10150 GROW;
10151
Owen Taylor3473f882001-02-23 17:55:21 +000010152 SKIP_BLANKS;
10153 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10154
10155 SKIP_BLANKS;
10156 if ((RAW == '?') && (NXT(1) == '>')) {
10157 SKIP(2);
10158 } else if (RAW == '>') {
10159 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010160 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010161 NEXT;
10162 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010163 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010164 MOVETO_ENDTAG(CUR_PTR);
10165 NEXT;
10166 }
10167}
10168
10169/**
10170 * xmlParseMisc:
10171 * @ctxt: an XML parser context
10172 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010173 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010174 *
10175 * [27] Misc ::= Comment | PI | S
10176 */
10177
10178void
10179xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010180 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010181 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010182 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010183 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010184 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010185 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010186 NEXT;
10187 } else
10188 xmlParseComment(ctxt);
10189 }
10190}
10191
10192/**
10193 * xmlParseDocument:
10194 * @ctxt: an XML parser context
10195 *
10196 * parse an XML document (and build a tree if using the standard SAX
10197 * interface).
10198 *
10199 * [1] document ::= prolog element Misc*
10200 *
10201 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10202 *
10203 * Returns 0, -1 in case of error. the parser context is augmented
10204 * as a result of the parsing.
10205 */
10206
10207int
10208xmlParseDocument(xmlParserCtxtPtr ctxt) {
10209 xmlChar start[4];
10210 xmlCharEncoding enc;
10211
10212 xmlInitParser();
10213
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010214 if ((ctxt == NULL) || (ctxt->input == NULL))
10215 return(-1);
10216
Owen Taylor3473f882001-02-23 17:55:21 +000010217 GROW;
10218
10219 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010220 * SAX: detecting the level.
10221 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010222 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010223
10224 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010225 * SAX: beginning of the document processing.
10226 */
10227 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10228 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10229
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010230 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010231 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010232 /*
10233 * Get the 4 first bytes and decode the charset
10234 * if enc != XML_CHAR_ENCODING_NONE
10235 * plug some encoding conversion routines.
10236 */
10237 start[0] = RAW;
10238 start[1] = NXT(1);
10239 start[2] = NXT(2);
10240 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010241 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010242 if (enc != XML_CHAR_ENCODING_NONE) {
10243 xmlSwitchEncoding(ctxt, enc);
10244 }
Owen Taylor3473f882001-02-23 17:55:21 +000010245 }
10246
10247
10248 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010249 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010250 }
10251
10252 /*
10253 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010254 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010255 * than just the first line, unless the amount of data is really
10256 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010257 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010258 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10259 GROW;
10260 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010261 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010262
10263 /*
10264 * Note that we will switch encoding on the fly.
10265 */
10266 xmlParseXMLDecl(ctxt);
10267 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10268 /*
10269 * The XML REC instructs us to stop parsing right here
10270 */
10271 return(-1);
10272 }
10273 ctxt->standalone = ctxt->input->standalone;
10274 SKIP_BLANKS;
10275 } else {
10276 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10277 }
10278 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10279 ctxt->sax->startDocument(ctxt->userData);
10280
10281 /*
10282 * The Misc part of the Prolog
10283 */
10284 GROW;
10285 xmlParseMisc(ctxt);
10286
10287 /*
10288 * Then possibly doc type declaration(s) and more Misc
10289 * (doctypedecl Misc*)?
10290 */
10291 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010292 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010293
10294 ctxt->inSubset = 1;
10295 xmlParseDocTypeDecl(ctxt);
10296 if (RAW == '[') {
10297 ctxt->instate = XML_PARSER_DTD;
10298 xmlParseInternalSubset(ctxt);
10299 }
10300
10301 /*
10302 * Create and update the external subset.
10303 */
10304 ctxt->inSubset = 2;
10305 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10306 (!ctxt->disableSAX))
10307 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10308 ctxt->extSubSystem, ctxt->extSubURI);
10309 ctxt->inSubset = 0;
10310
Daniel Veillardac4118d2008-01-11 05:27:32 +000010311 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010312
10313 ctxt->instate = XML_PARSER_PROLOG;
10314 xmlParseMisc(ctxt);
10315 }
10316
10317 /*
10318 * Time to start parsing the tree itself
10319 */
10320 GROW;
10321 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010322 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10323 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010324 } else {
10325 ctxt->instate = XML_PARSER_CONTENT;
10326 xmlParseElement(ctxt);
10327 ctxt->instate = XML_PARSER_EPILOG;
10328
10329
10330 /*
10331 * The Misc part at the end
10332 */
10333 xmlParseMisc(ctxt);
10334
Daniel Veillard561b7f82002-03-20 21:55:57 +000010335 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010336 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010337 }
10338 ctxt->instate = XML_PARSER_EOF;
10339 }
10340
10341 /*
10342 * SAX: end of the document processing.
10343 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010344 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010345 ctxt->sax->endDocument(ctxt->userData);
10346
Daniel Veillard5997aca2002-03-18 18:36:20 +000010347 /*
10348 * Remove locally kept entity definitions if the tree was not built
10349 */
10350 if ((ctxt->myDoc != NULL) &&
10351 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10352 xmlFreeDoc(ctxt->myDoc);
10353 ctxt->myDoc = NULL;
10354 }
10355
Daniel Veillardae0765b2008-07-31 19:54:59 +000010356 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10357 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10358 if (ctxt->valid)
10359 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10360 if (ctxt->nsWellFormed)
10361 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10362 if (ctxt->options & XML_PARSE_OLD10)
10363 ctxt->myDoc->properties |= XML_DOC_OLD10;
10364 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010365 if (! ctxt->wellFormed) {
10366 ctxt->valid = 0;
10367 return(-1);
10368 }
Owen Taylor3473f882001-02-23 17:55:21 +000010369 return(0);
10370}
10371
10372/**
10373 * xmlParseExtParsedEnt:
10374 * @ctxt: an XML parser context
10375 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010376 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010377 * An external general parsed entity is well-formed if it matches the
10378 * production labeled extParsedEnt.
10379 *
10380 * [78] extParsedEnt ::= TextDecl? content
10381 *
10382 * Returns 0, -1 in case of error. the parser context is augmented
10383 * as a result of the parsing.
10384 */
10385
10386int
10387xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10388 xmlChar start[4];
10389 xmlCharEncoding enc;
10390
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010391 if ((ctxt == NULL) || (ctxt->input == NULL))
10392 return(-1);
10393
Owen Taylor3473f882001-02-23 17:55:21 +000010394 xmlDefaultSAXHandlerInit();
10395
Daniel Veillard309f81d2003-09-23 09:02:53 +000010396 xmlDetectSAX2(ctxt);
10397
Owen Taylor3473f882001-02-23 17:55:21 +000010398 GROW;
10399
10400 /*
10401 * SAX: beginning of the document processing.
10402 */
10403 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10404 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10405
10406 /*
10407 * Get the 4 first bytes and decode the charset
10408 * if enc != XML_CHAR_ENCODING_NONE
10409 * plug some encoding conversion routines.
10410 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010411 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10412 start[0] = RAW;
10413 start[1] = NXT(1);
10414 start[2] = NXT(2);
10415 start[3] = NXT(3);
10416 enc = xmlDetectCharEncoding(start, 4);
10417 if (enc != XML_CHAR_ENCODING_NONE) {
10418 xmlSwitchEncoding(ctxt, enc);
10419 }
Owen Taylor3473f882001-02-23 17:55:21 +000010420 }
10421
10422
10423 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010424 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010425 }
10426
10427 /*
10428 * Check for the XMLDecl in the Prolog.
10429 */
10430 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010431 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010432
10433 /*
10434 * Note that we will switch encoding on the fly.
10435 */
10436 xmlParseXMLDecl(ctxt);
10437 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10438 /*
10439 * The XML REC instructs us to stop parsing right here
10440 */
10441 return(-1);
10442 }
10443 SKIP_BLANKS;
10444 } else {
10445 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10446 }
10447 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10448 ctxt->sax->startDocument(ctxt->userData);
10449
10450 /*
10451 * Doing validity checking on chunk doesn't make sense
10452 */
10453 ctxt->instate = XML_PARSER_CONTENT;
10454 ctxt->validate = 0;
10455 ctxt->loadsubset = 0;
10456 ctxt->depth = 0;
10457
10458 xmlParseContent(ctxt);
10459
10460 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010461 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010462 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010463 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010464 }
10465
10466 /*
10467 * SAX: end of the document processing.
10468 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010469 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010470 ctxt->sax->endDocument(ctxt->userData);
10471
10472 if (! ctxt->wellFormed) return(-1);
10473 return(0);
10474}
10475
Daniel Veillard73b013f2003-09-30 12:36:01 +000010476#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010477/************************************************************************
10478 * *
10479 * Progressive parsing interfaces *
10480 * *
10481 ************************************************************************/
10482
10483/**
10484 * xmlParseLookupSequence:
10485 * @ctxt: an XML parser context
10486 * @first: the first char to lookup
10487 * @next: the next char to lookup or zero
10488 * @third: the next char to lookup or zero
10489 *
10490 * Try to find if a sequence (first, next, third) or just (first next) or
10491 * (first) is available in the input stream.
10492 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10493 * to avoid rescanning sequences of bytes, it DOES change the state of the
10494 * parser, do not use liberally.
10495 *
10496 * Returns the index to the current parsing point if the full sequence
10497 * is available, -1 otherwise.
10498 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010499static int
Owen Taylor3473f882001-02-23 17:55:21 +000010500xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10501 xmlChar next, xmlChar third) {
10502 int base, len;
10503 xmlParserInputPtr in;
10504 const xmlChar *buf;
10505
10506 in = ctxt->input;
10507 if (in == NULL) return(-1);
10508 base = in->cur - in->base;
10509 if (base < 0) return(-1);
10510 if (ctxt->checkIndex > base)
10511 base = ctxt->checkIndex;
10512 if (in->buf == NULL) {
10513 buf = in->base;
10514 len = in->length;
10515 } else {
10516 buf = in->buf->buffer->content;
10517 len = in->buf->buffer->use;
10518 }
10519 /* take into account the sequence length */
10520 if (third) len -= 2;
10521 else if (next) len --;
10522 for (;base < len;base++) {
10523 if (buf[base] == first) {
10524 if (third != 0) {
10525 if ((buf[base + 1] != next) ||
10526 (buf[base + 2] != third)) continue;
10527 } else if (next != 0) {
10528 if (buf[base + 1] != next) continue;
10529 }
10530 ctxt->checkIndex = 0;
10531#ifdef DEBUG_PUSH
10532 if (next == 0)
10533 xmlGenericError(xmlGenericErrorContext,
10534 "PP: lookup '%c' found at %d\n",
10535 first, base);
10536 else if (third == 0)
10537 xmlGenericError(xmlGenericErrorContext,
10538 "PP: lookup '%c%c' found at %d\n",
10539 first, next, base);
10540 else
10541 xmlGenericError(xmlGenericErrorContext,
10542 "PP: lookup '%c%c%c' found at %d\n",
10543 first, next, third, base);
10544#endif
10545 return(base - (in->cur - in->base));
10546 }
10547 }
10548 ctxt->checkIndex = base;
10549#ifdef DEBUG_PUSH
10550 if (next == 0)
10551 xmlGenericError(xmlGenericErrorContext,
10552 "PP: lookup '%c' failed\n", first);
10553 else if (third == 0)
10554 xmlGenericError(xmlGenericErrorContext,
10555 "PP: lookup '%c%c' failed\n", first, next);
10556 else
10557 xmlGenericError(xmlGenericErrorContext,
10558 "PP: lookup '%c%c%c' failed\n", first, next, third);
10559#endif
10560 return(-1);
10561}
10562
10563/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010564 * xmlParseGetLasts:
10565 * @ctxt: an XML parser context
10566 * @lastlt: pointer to store the last '<' from the input
10567 * @lastgt: pointer to store the last '>' from the input
10568 *
10569 * Lookup the last < and > in the current chunk
10570 */
10571static void
10572xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10573 const xmlChar **lastgt) {
10574 const xmlChar *tmp;
10575
10576 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10577 xmlGenericError(xmlGenericErrorContext,
10578 "Internal error: xmlParseGetLasts\n");
10579 return;
10580 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010581 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010582 tmp = ctxt->input->end;
10583 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010584 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010585 if (tmp < ctxt->input->base) {
10586 *lastlt = NULL;
10587 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010588 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010589 *lastlt = tmp;
10590 tmp++;
10591 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10592 if (*tmp == '\'') {
10593 tmp++;
10594 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10595 if (tmp < ctxt->input->end) tmp++;
10596 } else if (*tmp == '"') {
10597 tmp++;
10598 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10599 if (tmp < ctxt->input->end) tmp++;
10600 } else
10601 tmp++;
10602 }
10603 if (tmp < ctxt->input->end)
10604 *lastgt = tmp;
10605 else {
10606 tmp = *lastlt;
10607 tmp--;
10608 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10609 if (tmp >= ctxt->input->base)
10610 *lastgt = tmp;
10611 else
10612 *lastgt = NULL;
10613 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010614 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010615 } else {
10616 *lastlt = NULL;
10617 *lastgt = NULL;
10618 }
10619}
10620/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010621 * xmlCheckCdataPush:
10622 * @cur: pointer to the bock of characters
10623 * @len: length of the block in bytes
10624 *
10625 * Check that the block of characters is okay as SCdata content [20]
10626 *
10627 * Returns the number of bytes to pass if okay, a negative index where an
10628 * UTF-8 error occured otherwise
10629 */
10630static int
10631xmlCheckCdataPush(const xmlChar *utf, int len) {
10632 int ix;
10633 unsigned char c;
10634 int codepoint;
10635
10636 if ((utf == NULL) || (len <= 0))
10637 return(0);
10638
10639 for (ix = 0; ix < len;) { /* string is 0-terminated */
10640 c = utf[ix];
10641 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10642 if (c >= 0x20)
10643 ix++;
10644 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10645 ix++;
10646 else
10647 return(-ix);
10648 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10649 if (ix + 2 > len) return(ix);
10650 if ((utf[ix+1] & 0xc0 ) != 0x80)
10651 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010652 codepoint = (utf[ix] & 0x1f) << 6;
10653 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010654 if (!xmlIsCharQ(codepoint))
10655 return(-ix);
10656 ix += 2;
10657 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10658 if (ix + 3 > len) return(ix);
10659 if (((utf[ix+1] & 0xc0) != 0x80) ||
10660 ((utf[ix+2] & 0xc0) != 0x80))
10661 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010662 codepoint = (utf[ix] & 0xf) << 12;
10663 codepoint |= (utf[ix+1] & 0x3f) << 6;
10664 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010665 if (!xmlIsCharQ(codepoint))
10666 return(-ix);
10667 ix += 3;
10668 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10669 if (ix + 4 > len) return(ix);
10670 if (((utf[ix+1] & 0xc0) != 0x80) ||
10671 ((utf[ix+2] & 0xc0) != 0x80) ||
10672 ((utf[ix+3] & 0xc0) != 0x80))
10673 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010674 codepoint = (utf[ix] & 0x7) << 18;
10675 codepoint |= (utf[ix+1] & 0x3f) << 12;
10676 codepoint |= (utf[ix+2] & 0x3f) << 6;
10677 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010678 if (!xmlIsCharQ(codepoint))
10679 return(-ix);
10680 ix += 4;
10681 } else /* unknown encoding */
10682 return(-ix);
10683 }
10684 return(ix);
10685}
10686
10687/**
Owen Taylor3473f882001-02-23 17:55:21 +000010688 * xmlParseTryOrFinish:
10689 * @ctxt: an XML parser context
10690 * @terminate: last chunk indicator
10691 *
10692 * Try to progress on parsing
10693 *
10694 * Returns zero if no parsing was possible
10695 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010696static int
Owen Taylor3473f882001-02-23 17:55:21 +000010697xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10698 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010699 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010700 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010701 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010702
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010703 if (ctxt->input == NULL)
10704 return(0);
10705
Owen Taylor3473f882001-02-23 17:55:21 +000010706#ifdef DEBUG_PUSH
10707 switch (ctxt->instate) {
10708 case XML_PARSER_EOF:
10709 xmlGenericError(xmlGenericErrorContext,
10710 "PP: try EOF\n"); break;
10711 case XML_PARSER_START:
10712 xmlGenericError(xmlGenericErrorContext,
10713 "PP: try START\n"); break;
10714 case XML_PARSER_MISC:
10715 xmlGenericError(xmlGenericErrorContext,
10716 "PP: try MISC\n");break;
10717 case XML_PARSER_COMMENT:
10718 xmlGenericError(xmlGenericErrorContext,
10719 "PP: try COMMENT\n");break;
10720 case XML_PARSER_PROLOG:
10721 xmlGenericError(xmlGenericErrorContext,
10722 "PP: try PROLOG\n");break;
10723 case XML_PARSER_START_TAG:
10724 xmlGenericError(xmlGenericErrorContext,
10725 "PP: try START_TAG\n");break;
10726 case XML_PARSER_CONTENT:
10727 xmlGenericError(xmlGenericErrorContext,
10728 "PP: try CONTENT\n");break;
10729 case XML_PARSER_CDATA_SECTION:
10730 xmlGenericError(xmlGenericErrorContext,
10731 "PP: try CDATA_SECTION\n");break;
10732 case XML_PARSER_END_TAG:
10733 xmlGenericError(xmlGenericErrorContext,
10734 "PP: try END_TAG\n");break;
10735 case XML_PARSER_ENTITY_DECL:
10736 xmlGenericError(xmlGenericErrorContext,
10737 "PP: try ENTITY_DECL\n");break;
10738 case XML_PARSER_ENTITY_VALUE:
10739 xmlGenericError(xmlGenericErrorContext,
10740 "PP: try ENTITY_VALUE\n");break;
10741 case XML_PARSER_ATTRIBUTE_VALUE:
10742 xmlGenericError(xmlGenericErrorContext,
10743 "PP: try ATTRIBUTE_VALUE\n");break;
10744 case XML_PARSER_DTD:
10745 xmlGenericError(xmlGenericErrorContext,
10746 "PP: try DTD\n");break;
10747 case XML_PARSER_EPILOG:
10748 xmlGenericError(xmlGenericErrorContext,
10749 "PP: try EPILOG\n");break;
10750 case XML_PARSER_PI:
10751 xmlGenericError(xmlGenericErrorContext,
10752 "PP: try PI\n");break;
10753 case XML_PARSER_IGNORE:
10754 xmlGenericError(xmlGenericErrorContext,
10755 "PP: try IGNORE\n");break;
10756 }
10757#endif
10758
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010759 if ((ctxt->input != NULL) &&
10760 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010761 xmlSHRINK(ctxt);
10762 ctxt->checkIndex = 0;
10763 }
10764 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010765
Daniel Veillarda880b122003-04-21 21:36:41 +000010766 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010767 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010768 return(0);
10769
10770
Owen Taylor3473f882001-02-23 17:55:21 +000010771 /*
10772 * Pop-up of finished entities.
10773 */
10774 while ((RAW == 0) && (ctxt->inputNr > 1))
10775 xmlPopInput(ctxt);
10776
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010777 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010778 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010779 avail = ctxt->input->length -
10780 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010781 else {
10782 /*
10783 * If we are operating on converted input, try to flush
10784 * remainng chars to avoid them stalling in the non-converted
10785 * buffer.
10786 */
10787 if ((ctxt->input->buf->raw != NULL) &&
10788 (ctxt->input->buf->raw->use > 0)) {
10789 int base = ctxt->input->base -
10790 ctxt->input->buf->buffer->content;
10791 int current = ctxt->input->cur - ctxt->input->base;
10792
10793 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10794 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10795 ctxt->input->cur = ctxt->input->base + current;
10796 ctxt->input->end =
10797 &ctxt->input->buf->buffer->content[
10798 ctxt->input->buf->buffer->use];
10799 }
10800 avail = ctxt->input->buf->buffer->use -
10801 (ctxt->input->cur - ctxt->input->base);
10802 }
Owen Taylor3473f882001-02-23 17:55:21 +000010803 if (avail < 1)
10804 goto done;
10805 switch (ctxt->instate) {
10806 case XML_PARSER_EOF:
10807 /*
10808 * Document parsing is done !
10809 */
10810 goto done;
10811 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010812 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10813 xmlChar start[4];
10814 xmlCharEncoding enc;
10815
10816 /*
10817 * Very first chars read from the document flow.
10818 */
10819 if (avail < 4)
10820 goto done;
10821
10822 /*
10823 * Get the 4 first bytes and decode the charset
10824 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010825 * plug some encoding conversion routines,
10826 * else xmlSwitchEncoding will set to (default)
10827 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010828 */
10829 start[0] = RAW;
10830 start[1] = NXT(1);
10831 start[2] = NXT(2);
10832 start[3] = NXT(3);
10833 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010834 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010835 break;
10836 }
Owen Taylor3473f882001-02-23 17:55:21 +000010837
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010838 if (avail < 2)
10839 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010840 cur = ctxt->input->cur[0];
10841 next = ctxt->input->cur[1];
10842 if (cur == 0) {
10843 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10844 ctxt->sax->setDocumentLocator(ctxt->userData,
10845 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010846 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010847 ctxt->instate = XML_PARSER_EOF;
10848#ifdef DEBUG_PUSH
10849 xmlGenericError(xmlGenericErrorContext,
10850 "PP: entering EOF\n");
10851#endif
10852 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10853 ctxt->sax->endDocument(ctxt->userData);
10854 goto done;
10855 }
10856 if ((cur == '<') && (next == '?')) {
10857 /* PI or XML decl */
10858 if (avail < 5) return(ret);
10859 if ((!terminate) &&
10860 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10861 return(ret);
10862 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10863 ctxt->sax->setDocumentLocator(ctxt->userData,
10864 &xmlDefaultSAXLocator);
10865 if ((ctxt->input->cur[2] == 'x') &&
10866 (ctxt->input->cur[3] == 'm') &&
10867 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010868 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010869 ret += 5;
10870#ifdef DEBUG_PUSH
10871 xmlGenericError(xmlGenericErrorContext,
10872 "PP: Parsing XML Decl\n");
10873#endif
10874 xmlParseXMLDecl(ctxt);
10875 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10876 /*
10877 * The XML REC instructs us to stop parsing right
10878 * here
10879 */
10880 ctxt->instate = XML_PARSER_EOF;
10881 return(0);
10882 }
10883 ctxt->standalone = ctxt->input->standalone;
10884 if ((ctxt->encoding == NULL) &&
10885 (ctxt->input->encoding != NULL))
10886 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10887 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10888 (!ctxt->disableSAX))
10889 ctxt->sax->startDocument(ctxt->userData);
10890 ctxt->instate = XML_PARSER_MISC;
10891#ifdef DEBUG_PUSH
10892 xmlGenericError(xmlGenericErrorContext,
10893 "PP: entering MISC\n");
10894#endif
10895 } else {
10896 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10897 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10898 (!ctxt->disableSAX))
10899 ctxt->sax->startDocument(ctxt->userData);
10900 ctxt->instate = XML_PARSER_MISC;
10901#ifdef DEBUG_PUSH
10902 xmlGenericError(xmlGenericErrorContext,
10903 "PP: entering MISC\n");
10904#endif
10905 }
10906 } else {
10907 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10908 ctxt->sax->setDocumentLocator(ctxt->userData,
10909 &xmlDefaultSAXLocator);
10910 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010911 if (ctxt->version == NULL) {
10912 xmlErrMemory(ctxt, NULL);
10913 break;
10914 }
Owen Taylor3473f882001-02-23 17:55:21 +000010915 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10916 (!ctxt->disableSAX))
10917 ctxt->sax->startDocument(ctxt->userData);
10918 ctxt->instate = XML_PARSER_MISC;
10919#ifdef DEBUG_PUSH
10920 xmlGenericError(xmlGenericErrorContext,
10921 "PP: entering MISC\n");
10922#endif
10923 }
10924 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010925 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010926 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010927 const xmlChar *prefix = NULL;
10928 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010929 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010930
10931 if ((avail < 2) && (ctxt->inputNr == 1))
10932 goto done;
10933 cur = ctxt->input->cur[0];
10934 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010935 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010936 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010937 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10938 ctxt->sax->endDocument(ctxt->userData);
10939 goto done;
10940 }
10941 if (!terminate) {
10942 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010943 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010944 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010945 goto done;
10946 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10947 goto done;
10948 }
10949 }
10950 if (ctxt->spaceNr == 0)
10951 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010952 else if (*ctxt->space == -2)
10953 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010954 else
10955 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010956#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010957 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010958#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010959 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010960#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010961 else
10962 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010963#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010964 if (name == NULL) {
10965 spacePop(ctxt);
10966 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010967 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10968 ctxt->sax->endDocument(ctxt->userData);
10969 goto done;
10970 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010971#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010972 /*
10973 * [ VC: Root Element Type ]
10974 * The Name in the document type declaration must match
10975 * the element type of the root element.
10976 */
10977 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10978 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10979 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010980#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010981
10982 /*
10983 * Check for an Empty Element.
10984 */
10985 if ((RAW == '/') && (NXT(1) == '>')) {
10986 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010987
10988 if (ctxt->sax2) {
10989 if ((ctxt->sax != NULL) &&
10990 (ctxt->sax->endElementNs != NULL) &&
10991 (!ctxt->disableSAX))
10992 ctxt->sax->endElementNs(ctxt->userData, name,
10993 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010994 if (ctxt->nsNr - nsNr > 0)
10995 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010996#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010997 } else {
10998 if ((ctxt->sax != NULL) &&
10999 (ctxt->sax->endElement != NULL) &&
11000 (!ctxt->disableSAX))
11001 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011002#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011003 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011004 spacePop(ctxt);
11005 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011006 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011007 } else {
11008 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011009 }
11010 break;
11011 }
11012 if (RAW == '>') {
11013 NEXT;
11014 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011015 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011016 "Couldn't find end of Start Tag %s\n",
11017 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011018 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011019 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011020 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011021 if (ctxt->sax2)
11022 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011023#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011024 else
11025 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011026#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011027
Daniel Veillarda880b122003-04-21 21:36:41 +000011028 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011029 break;
11030 }
11031 case XML_PARSER_CONTENT: {
11032 const xmlChar *test;
11033 unsigned int cons;
11034 if ((avail < 2) && (ctxt->inputNr == 1))
11035 goto done;
11036 cur = ctxt->input->cur[0];
11037 next = ctxt->input->cur[1];
11038
11039 test = CUR_PTR;
11040 cons = ctxt->input->consumed;
11041 if ((cur == '<') && (next == '/')) {
11042 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011043 break;
11044 } else if ((cur == '<') && (next == '?')) {
11045 if ((!terminate) &&
11046 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11047 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011048 xmlParsePI(ctxt);
11049 } else if ((cur == '<') && (next != '!')) {
11050 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011051 break;
11052 } else if ((cur == '<') && (next == '!') &&
11053 (ctxt->input->cur[2] == '-') &&
11054 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011055 int term;
11056
11057 if (avail < 4)
11058 goto done;
11059 ctxt->input->cur += 4;
11060 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11061 ctxt->input->cur -= 4;
11062 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000011063 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011064 xmlParseComment(ctxt);
11065 ctxt->instate = XML_PARSER_CONTENT;
11066 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11067 (ctxt->input->cur[2] == '[') &&
11068 (ctxt->input->cur[3] == 'C') &&
11069 (ctxt->input->cur[4] == 'D') &&
11070 (ctxt->input->cur[5] == 'A') &&
11071 (ctxt->input->cur[6] == 'T') &&
11072 (ctxt->input->cur[7] == 'A') &&
11073 (ctxt->input->cur[8] == '[')) {
11074 SKIP(9);
11075 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011076 break;
11077 } else if ((cur == '<') && (next == '!') &&
11078 (avail < 9)) {
11079 goto done;
11080 } else if (cur == '&') {
11081 if ((!terminate) &&
11082 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11083 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011084 xmlParseReference(ctxt);
11085 } else {
11086 /* TODO Avoid the extra copy, handle directly !!! */
11087 /*
11088 * Goal of the following test is:
11089 * - minimize calls to the SAX 'character' callback
11090 * when they are mergeable
11091 * - handle an problem for isBlank when we only parse
11092 * a sequence of blank chars and the next one is
11093 * not available to check against '<' presence.
11094 * - tries to homogenize the differences in SAX
11095 * callbacks between the push and pull versions
11096 * of the parser.
11097 */
11098 if ((ctxt->inputNr == 1) &&
11099 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11100 if (!terminate) {
11101 if (ctxt->progressive) {
11102 if ((lastlt == NULL) ||
11103 (ctxt->input->cur > lastlt))
11104 goto done;
11105 } else if (xmlParseLookupSequence(ctxt,
11106 '<', 0, 0) < 0) {
11107 goto done;
11108 }
11109 }
11110 }
11111 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011112 xmlParseCharData(ctxt, 0);
11113 }
11114 /*
11115 * Pop-up of finished entities.
11116 */
11117 while ((RAW == 0) && (ctxt->inputNr > 1))
11118 xmlPopInput(ctxt);
11119 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011120 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11121 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011122 ctxt->instate = XML_PARSER_EOF;
11123 break;
11124 }
11125 break;
11126 }
11127 case XML_PARSER_END_TAG:
11128 if (avail < 2)
11129 goto done;
11130 if (!terminate) {
11131 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011132 /* > can be found unescaped in attribute values */
11133 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011134 goto done;
11135 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11136 goto done;
11137 }
11138 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011139 if (ctxt->sax2) {
11140 xmlParseEndTag2(ctxt,
11141 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11142 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011143 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011144 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011145 }
11146#ifdef LIBXML_SAX1_ENABLED
11147 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011148 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011149#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011150 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011151 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011152 } else {
11153 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011154 }
11155 break;
11156 case XML_PARSER_CDATA_SECTION: {
11157 /*
11158 * The Push mode need to have the SAX callback for
11159 * cdataBlock merge back contiguous callbacks.
11160 */
11161 int base;
11162
11163 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11164 if (base < 0) {
11165 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011166 int tmp;
11167
11168 tmp = xmlCheckCdataPush(ctxt->input->cur,
11169 XML_PARSER_BIG_BUFFER_SIZE);
11170 if (tmp < 0) {
11171 tmp = -tmp;
11172 ctxt->input->cur += tmp;
11173 goto encoding_error;
11174 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011175 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11176 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011177 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011178 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011179 else if (ctxt->sax->characters != NULL)
11180 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011181 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011182 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011183 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011184 ctxt->checkIndex = 0;
11185 }
11186 goto done;
11187 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011188 int tmp;
11189
11190 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11191 if ((tmp < 0) || (tmp != base)) {
11192 tmp = -tmp;
11193 ctxt->input->cur += tmp;
11194 goto encoding_error;
11195 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011196 if ((ctxt->sax != NULL) && (base == 0) &&
11197 (ctxt->sax->cdataBlock != NULL) &&
11198 (!ctxt->disableSAX)) {
11199 /*
11200 * Special case to provide identical behaviour
11201 * between pull and push parsers on enpty CDATA
11202 * sections
11203 */
11204 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11205 (!strncmp((const char *)&ctxt->input->cur[-9],
11206 "<![CDATA[", 9)))
11207 ctxt->sax->cdataBlock(ctxt->userData,
11208 BAD_CAST "", 0);
11209 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011210 (!ctxt->disableSAX)) {
11211 if (ctxt->sax->cdataBlock != NULL)
11212 ctxt->sax->cdataBlock(ctxt->userData,
11213 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011214 else if (ctxt->sax->characters != NULL)
11215 ctxt->sax->characters(ctxt->userData,
11216 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011217 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011218 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011219 ctxt->checkIndex = 0;
11220 ctxt->instate = XML_PARSER_CONTENT;
11221#ifdef DEBUG_PUSH
11222 xmlGenericError(xmlGenericErrorContext,
11223 "PP: entering CONTENT\n");
11224#endif
11225 }
11226 break;
11227 }
Owen Taylor3473f882001-02-23 17:55:21 +000011228 case XML_PARSER_MISC:
11229 SKIP_BLANKS;
11230 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011231 avail = ctxt->input->length -
11232 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011233 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011234 avail = ctxt->input->buf->buffer->use -
11235 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011236 if (avail < 2)
11237 goto done;
11238 cur = ctxt->input->cur[0];
11239 next = ctxt->input->cur[1];
11240 if ((cur == '<') && (next == '?')) {
11241 if ((!terminate) &&
11242 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11243 goto done;
11244#ifdef DEBUG_PUSH
11245 xmlGenericError(xmlGenericErrorContext,
11246 "PP: Parsing PI\n");
11247#endif
11248 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011249 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011250 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011251 (ctxt->input->cur[2] == '-') &&
11252 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011253 if ((!terminate) &&
11254 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11255 goto done;
11256#ifdef DEBUG_PUSH
11257 xmlGenericError(xmlGenericErrorContext,
11258 "PP: Parsing Comment\n");
11259#endif
11260 xmlParseComment(ctxt);
11261 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011262 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011263 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011264 (ctxt->input->cur[2] == 'D') &&
11265 (ctxt->input->cur[3] == 'O') &&
11266 (ctxt->input->cur[4] == 'C') &&
11267 (ctxt->input->cur[5] == 'T') &&
11268 (ctxt->input->cur[6] == 'Y') &&
11269 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011270 (ctxt->input->cur[8] == 'E')) {
11271 if ((!terminate) &&
11272 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11273 goto done;
11274#ifdef DEBUG_PUSH
11275 xmlGenericError(xmlGenericErrorContext,
11276 "PP: Parsing internal subset\n");
11277#endif
11278 ctxt->inSubset = 1;
11279 xmlParseDocTypeDecl(ctxt);
11280 if (RAW == '[') {
11281 ctxt->instate = XML_PARSER_DTD;
11282#ifdef DEBUG_PUSH
11283 xmlGenericError(xmlGenericErrorContext,
11284 "PP: entering DTD\n");
11285#endif
11286 } else {
11287 /*
11288 * Create and update the external subset.
11289 */
11290 ctxt->inSubset = 2;
11291 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11292 (ctxt->sax->externalSubset != NULL))
11293 ctxt->sax->externalSubset(ctxt->userData,
11294 ctxt->intSubName, ctxt->extSubSystem,
11295 ctxt->extSubURI);
11296 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011297 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011298 ctxt->instate = XML_PARSER_PROLOG;
11299#ifdef DEBUG_PUSH
11300 xmlGenericError(xmlGenericErrorContext,
11301 "PP: entering PROLOG\n");
11302#endif
11303 }
11304 } else if ((cur == '<') && (next == '!') &&
11305 (avail < 9)) {
11306 goto done;
11307 } else {
11308 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011309 ctxt->progressive = 1;
11310 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011311#ifdef DEBUG_PUSH
11312 xmlGenericError(xmlGenericErrorContext,
11313 "PP: entering START_TAG\n");
11314#endif
11315 }
11316 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011317 case XML_PARSER_PROLOG:
11318 SKIP_BLANKS;
11319 if (ctxt->input->buf == NULL)
11320 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11321 else
11322 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11323 if (avail < 2)
11324 goto done;
11325 cur = ctxt->input->cur[0];
11326 next = ctxt->input->cur[1];
11327 if ((cur == '<') && (next == '?')) {
11328 if ((!terminate) &&
11329 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11330 goto done;
11331#ifdef DEBUG_PUSH
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: Parsing PI\n");
11334#endif
11335 xmlParsePI(ctxt);
11336 } else if ((cur == '<') && (next == '!') &&
11337 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11338 if ((!terminate) &&
11339 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11340 goto done;
11341#ifdef DEBUG_PUSH
11342 xmlGenericError(xmlGenericErrorContext,
11343 "PP: Parsing Comment\n");
11344#endif
11345 xmlParseComment(ctxt);
11346 ctxt->instate = XML_PARSER_PROLOG;
11347 } else if ((cur == '<') && (next == '!') &&
11348 (avail < 4)) {
11349 goto done;
11350 } else {
11351 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011352 if (ctxt->progressive == 0)
11353 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011354 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011355#ifdef DEBUG_PUSH
11356 xmlGenericError(xmlGenericErrorContext,
11357 "PP: entering START_TAG\n");
11358#endif
11359 }
11360 break;
11361 case XML_PARSER_EPILOG:
11362 SKIP_BLANKS;
11363 if (ctxt->input->buf == NULL)
11364 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11365 else
11366 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11367 if (avail < 2)
11368 goto done;
11369 cur = ctxt->input->cur[0];
11370 next = ctxt->input->cur[1];
11371 if ((cur == '<') && (next == '?')) {
11372 if ((!terminate) &&
11373 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11374 goto done;
11375#ifdef DEBUG_PUSH
11376 xmlGenericError(xmlGenericErrorContext,
11377 "PP: Parsing PI\n");
11378#endif
11379 xmlParsePI(ctxt);
11380 ctxt->instate = XML_PARSER_EPILOG;
11381 } else if ((cur == '<') && (next == '!') &&
11382 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11383 if ((!terminate) &&
11384 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11385 goto done;
11386#ifdef DEBUG_PUSH
11387 xmlGenericError(xmlGenericErrorContext,
11388 "PP: Parsing Comment\n");
11389#endif
11390 xmlParseComment(ctxt);
11391 ctxt->instate = XML_PARSER_EPILOG;
11392 } else if ((cur == '<') && (next == '!') &&
11393 (avail < 4)) {
11394 goto done;
11395 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011396 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011397 ctxt->instate = XML_PARSER_EOF;
11398#ifdef DEBUG_PUSH
11399 xmlGenericError(xmlGenericErrorContext,
11400 "PP: entering EOF\n");
11401#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011402 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011403 ctxt->sax->endDocument(ctxt->userData);
11404 goto done;
11405 }
11406 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011407 case XML_PARSER_DTD: {
11408 /*
11409 * Sorry but progressive parsing of the internal subset
11410 * is not expected to be supported. We first check that
11411 * the full content of the internal subset is available and
11412 * the parsing is launched only at that point.
11413 * Internal subset ends up with "']' S? '>'" in an unescaped
11414 * section and not in a ']]>' sequence which are conditional
11415 * sections (whoever argued to keep that crap in XML deserve
11416 * a place in hell !).
11417 */
11418 int base, i;
11419 xmlChar *buf;
11420 xmlChar quote = 0;
11421
11422 base = ctxt->input->cur - ctxt->input->base;
11423 if (base < 0) return(0);
11424 if (ctxt->checkIndex > base)
11425 base = ctxt->checkIndex;
11426 buf = ctxt->input->buf->buffer->content;
11427 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11428 base++) {
11429 if (quote != 0) {
11430 if (buf[base] == quote)
11431 quote = 0;
11432 continue;
11433 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011434 if ((quote == 0) && (buf[base] == '<')) {
11435 int found = 0;
11436 /* special handling of comments */
11437 if (((unsigned int) base + 4 <
11438 ctxt->input->buf->buffer->use) &&
11439 (buf[base + 1] == '!') &&
11440 (buf[base + 2] == '-') &&
11441 (buf[base + 3] == '-')) {
11442 for (;(unsigned int) base + 3 <
11443 ctxt->input->buf->buffer->use; base++) {
11444 if ((buf[base] == '-') &&
11445 (buf[base + 1] == '-') &&
11446 (buf[base + 2] == '>')) {
11447 found = 1;
11448 base += 2;
11449 break;
11450 }
11451 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011452 if (!found) {
11453#if 0
11454 fprintf(stderr, "unfinished comment\n");
11455#endif
11456 break; /* for */
11457 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011458 continue;
11459 }
11460 }
Owen Taylor3473f882001-02-23 17:55:21 +000011461 if (buf[base] == '"') {
11462 quote = '"';
11463 continue;
11464 }
11465 if (buf[base] == '\'') {
11466 quote = '\'';
11467 continue;
11468 }
11469 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011470#if 0
11471 fprintf(stderr, "%c%c%c%c: ", buf[base],
11472 buf[base + 1], buf[base + 2], buf[base + 3]);
11473#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011474 if ((unsigned int) base +1 >=
11475 ctxt->input->buf->buffer->use)
11476 break;
11477 if (buf[base + 1] == ']') {
11478 /* conditional crap, skip both ']' ! */
11479 base++;
11480 continue;
11481 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011482 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011483 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11484 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011485 if (buf[base + i] == '>') {
11486#if 0
11487 fprintf(stderr, "found\n");
11488#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011489 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011490 }
11491 if (!IS_BLANK_CH(buf[base + i])) {
11492#if 0
11493 fprintf(stderr, "not found\n");
11494#endif
11495 goto not_end_of_int_subset;
11496 }
Owen Taylor3473f882001-02-23 17:55:21 +000011497 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011498#if 0
11499 fprintf(stderr, "end of stream\n");
11500#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011501 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011502
Owen Taylor3473f882001-02-23 17:55:21 +000011503 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011504not_end_of_int_subset:
11505 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011506 }
11507 /*
11508 * We didn't found the end of the Internal subset
11509 */
Owen Taylor3473f882001-02-23 17:55:21 +000011510#ifdef DEBUG_PUSH
11511 if (next == 0)
11512 xmlGenericError(xmlGenericErrorContext,
11513 "PP: lookup of int subset end filed\n");
11514#endif
11515 goto done;
11516
11517found_end_int_subset:
11518 xmlParseInternalSubset(ctxt);
11519 ctxt->inSubset = 2;
11520 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11521 (ctxt->sax->externalSubset != NULL))
11522 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11523 ctxt->extSubSystem, ctxt->extSubURI);
11524 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011525 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011526 ctxt->instate = XML_PARSER_PROLOG;
11527 ctxt->checkIndex = 0;
11528#ifdef DEBUG_PUSH
11529 xmlGenericError(xmlGenericErrorContext,
11530 "PP: entering PROLOG\n");
11531#endif
11532 break;
11533 }
11534 case XML_PARSER_COMMENT:
11535 xmlGenericError(xmlGenericErrorContext,
11536 "PP: internal error, state == COMMENT\n");
11537 ctxt->instate = XML_PARSER_CONTENT;
11538#ifdef DEBUG_PUSH
11539 xmlGenericError(xmlGenericErrorContext,
11540 "PP: entering CONTENT\n");
11541#endif
11542 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011543 case XML_PARSER_IGNORE:
11544 xmlGenericError(xmlGenericErrorContext,
11545 "PP: internal error, state == IGNORE");
11546 ctxt->instate = XML_PARSER_DTD;
11547#ifdef DEBUG_PUSH
11548 xmlGenericError(xmlGenericErrorContext,
11549 "PP: entering DTD\n");
11550#endif
11551 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011552 case XML_PARSER_PI:
11553 xmlGenericError(xmlGenericErrorContext,
11554 "PP: internal error, state == PI\n");
11555 ctxt->instate = XML_PARSER_CONTENT;
11556#ifdef DEBUG_PUSH
11557 xmlGenericError(xmlGenericErrorContext,
11558 "PP: entering CONTENT\n");
11559#endif
11560 break;
11561 case XML_PARSER_ENTITY_DECL:
11562 xmlGenericError(xmlGenericErrorContext,
11563 "PP: internal error, state == ENTITY_DECL\n");
11564 ctxt->instate = XML_PARSER_DTD;
11565#ifdef DEBUG_PUSH
11566 xmlGenericError(xmlGenericErrorContext,
11567 "PP: entering DTD\n");
11568#endif
11569 break;
11570 case XML_PARSER_ENTITY_VALUE:
11571 xmlGenericError(xmlGenericErrorContext,
11572 "PP: internal error, state == ENTITY_VALUE\n");
11573 ctxt->instate = XML_PARSER_CONTENT;
11574#ifdef DEBUG_PUSH
11575 xmlGenericError(xmlGenericErrorContext,
11576 "PP: entering DTD\n");
11577#endif
11578 break;
11579 case XML_PARSER_ATTRIBUTE_VALUE:
11580 xmlGenericError(xmlGenericErrorContext,
11581 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11582 ctxt->instate = XML_PARSER_START_TAG;
11583#ifdef DEBUG_PUSH
11584 xmlGenericError(xmlGenericErrorContext,
11585 "PP: entering START_TAG\n");
11586#endif
11587 break;
11588 case XML_PARSER_SYSTEM_LITERAL:
11589 xmlGenericError(xmlGenericErrorContext,
11590 "PP: internal error, state == SYSTEM_LITERAL\n");
11591 ctxt->instate = XML_PARSER_START_TAG;
11592#ifdef DEBUG_PUSH
11593 xmlGenericError(xmlGenericErrorContext,
11594 "PP: entering START_TAG\n");
11595#endif
11596 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011597 case XML_PARSER_PUBLIC_LITERAL:
11598 xmlGenericError(xmlGenericErrorContext,
11599 "PP: internal error, state == PUBLIC_LITERAL\n");
11600 ctxt->instate = XML_PARSER_START_TAG;
11601#ifdef DEBUG_PUSH
11602 xmlGenericError(xmlGenericErrorContext,
11603 "PP: entering START_TAG\n");
11604#endif
11605 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011606 }
11607 }
11608done:
11609#ifdef DEBUG_PUSH
11610 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11611#endif
11612 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011613encoding_error:
11614 {
11615 char buffer[150];
11616
11617 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11618 ctxt->input->cur[0], ctxt->input->cur[1],
11619 ctxt->input->cur[2], ctxt->input->cur[3]);
11620 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11621 "Input is not proper UTF-8, indicate encoding !\n%s",
11622 BAD_CAST buffer, NULL);
11623 }
11624 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011625}
11626
11627/**
Owen Taylor3473f882001-02-23 17:55:21 +000011628 * xmlParseChunk:
11629 * @ctxt: an XML parser context
11630 * @chunk: an char array
11631 * @size: the size in byte of the chunk
11632 * @terminate: last chunk indicator
11633 *
11634 * Parse a Chunk of memory
11635 *
11636 * Returns zero if no error, the xmlParserErrors otherwise.
11637 */
11638int
11639xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11640 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011641 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011642 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011643
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011644 if (ctxt == NULL)
11645 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011646 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011647 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011648 if (ctxt->instate == XML_PARSER_START)
11649 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011650 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11651 (chunk[size - 1] == '\r')) {
11652 end_in_lf = 1;
11653 size--;
11654 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011655
11656xmldecl_done:
11657
Owen Taylor3473f882001-02-23 17:55:21 +000011658 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11659 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11660 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11661 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011662 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011663
11664 /*
11665 * Specific handling if we autodetected an encoding, we should not
11666 * push more than the first line ... which depend on the encoding
11667 * And only push the rest once the final encoding was detected
11668 */
11669 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11670 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010011671 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011672
11673 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11674 BAD_CAST "UTF-16")) ||
11675 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11676 BAD_CAST "UTF16")))
11677 len = 90;
11678 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11679 BAD_CAST "UCS-4")) ||
11680 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11681 BAD_CAST "UCS4")))
11682 len = 180;
11683
11684 if (ctxt->input->buf->rawconsumed < len)
11685 len -= ctxt->input->buf->rawconsumed;
11686
Raul Hudeaba9716a2010-03-15 10:13:29 +010011687 /*
11688 * Change size for reading the initial declaration only
11689 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11690 * will blindly copy extra bytes from memory.
11691 */
Daniel Veillard60587d62010-11-04 15:16:27 +010011692 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010011693 remain = size - len;
11694 size = len;
11695 } else {
11696 remain = 0;
11697 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011698 }
William M. Bracka3215c72004-07-31 16:24:01 +000011699 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11700 if (res < 0) {
11701 ctxt->errNo = XML_PARSER_EOF;
11702 ctxt->disableSAX = 1;
11703 return (XML_PARSER_EOF);
11704 }
Owen Taylor3473f882001-02-23 17:55:21 +000011705 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11706 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011707 ctxt->input->end =
11708 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011709#ifdef DEBUG_PUSH
11710 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11711#endif
11712
Owen Taylor3473f882001-02-23 17:55:21 +000011713 } else if (ctxt->instate != XML_PARSER_EOF) {
11714 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11715 xmlParserInputBufferPtr in = ctxt->input->buf;
11716 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11717 (in->raw != NULL)) {
11718 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011719
Owen Taylor3473f882001-02-23 17:55:21 +000011720 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11721 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011722 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011723 xmlGenericError(xmlGenericErrorContext,
11724 "xmlParseChunk: encoder error\n");
11725 return(XML_ERR_INVALID_ENCODING);
11726 }
11727 }
11728 }
11729 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011730 if (remain != 0)
11731 xmlParseTryOrFinish(ctxt, 0);
11732 else
11733 xmlParseTryOrFinish(ctxt, terminate);
11734 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11735 return(ctxt->errNo);
11736
11737 if (remain != 0) {
11738 chunk += size;
11739 size = remain;
11740 remain = 0;
11741 goto xmldecl_done;
11742 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011743 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11744 (ctxt->input->buf != NULL)) {
11745 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11746 }
Owen Taylor3473f882001-02-23 17:55:21 +000011747 if (terminate) {
11748 /*
11749 * Check for termination
11750 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011751 int avail = 0;
11752
11753 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011754 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011755 avail = ctxt->input->length -
11756 (ctxt->input->cur - ctxt->input->base);
11757 else
11758 avail = ctxt->input->buf->buffer->use -
11759 (ctxt->input->cur - ctxt->input->base);
11760 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011761
Owen Taylor3473f882001-02-23 17:55:21 +000011762 if ((ctxt->instate != XML_PARSER_EOF) &&
11763 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011764 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011765 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011766 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011767 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011768 }
Owen Taylor3473f882001-02-23 17:55:21 +000011769 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011770 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011771 ctxt->sax->endDocument(ctxt->userData);
11772 }
11773 ctxt->instate = XML_PARSER_EOF;
11774 }
11775 return((xmlParserErrors) ctxt->errNo);
11776}
11777
11778/************************************************************************
11779 * *
11780 * I/O front end functions to the parser *
11781 * *
11782 ************************************************************************/
11783
11784/**
Owen Taylor3473f882001-02-23 17:55:21 +000011785 * xmlCreatePushParserCtxt:
11786 * @sax: a SAX handler
11787 * @user_data: The user data returned on SAX callbacks
11788 * @chunk: a pointer to an array of chars
11789 * @size: number of chars in the array
11790 * @filename: an optional file name or URI
11791 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011792 * Create a parser context for using the XML parser in push mode.
11793 * If @buffer and @size are non-NULL, the data is used to detect
11794 * the encoding. The remaining characters will be parsed so they
11795 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011796 * To allow content encoding detection, @size should be >= 4
11797 * The value of @filename is used for fetching external entities
11798 * and error/warning reports.
11799 *
11800 * Returns the new parser context or NULL
11801 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011802
Owen Taylor3473f882001-02-23 17:55:21 +000011803xmlParserCtxtPtr
11804xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11805 const char *chunk, int size, const char *filename) {
11806 xmlParserCtxtPtr ctxt;
11807 xmlParserInputPtr inputStream;
11808 xmlParserInputBufferPtr buf;
11809 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11810
11811 /*
11812 * plug some encoding conversion routines
11813 */
11814 if ((chunk != NULL) && (size >= 4))
11815 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11816
11817 buf = xmlAllocParserInputBuffer(enc);
11818 if (buf == NULL) return(NULL);
11819
11820 ctxt = xmlNewParserCtxt();
11821 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011822 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011823 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011824 return(NULL);
11825 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011826 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011827 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11828 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011829 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011830 xmlFreeParserInputBuffer(buf);
11831 xmlFreeParserCtxt(ctxt);
11832 return(NULL);
11833 }
Owen Taylor3473f882001-02-23 17:55:21 +000011834 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011835#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011836 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011837#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011838 xmlFree(ctxt->sax);
11839 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11840 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011841 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011842 xmlFreeParserInputBuffer(buf);
11843 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011844 return(NULL);
11845 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011846 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11847 if (sax->initialized == XML_SAX2_MAGIC)
11848 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11849 else
11850 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011851 if (user_data != NULL)
11852 ctxt->userData = user_data;
11853 }
11854 if (filename == NULL) {
11855 ctxt->directory = NULL;
11856 } else {
11857 ctxt->directory = xmlParserGetDirectory(filename);
11858 }
11859
11860 inputStream = xmlNewInputStream(ctxt);
11861 if (inputStream == NULL) {
11862 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011863 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011864 return(NULL);
11865 }
11866
11867 if (filename == NULL)
11868 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011869 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011870 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011871 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011872 if (inputStream->filename == NULL) {
11873 xmlFreeParserCtxt(ctxt);
11874 xmlFreeParserInputBuffer(buf);
11875 return(NULL);
11876 }
11877 }
Owen Taylor3473f882001-02-23 17:55:21 +000011878 inputStream->buf = buf;
11879 inputStream->base = inputStream->buf->buffer->content;
11880 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011881 inputStream->end =
11882 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011883
11884 inputPush(ctxt, inputStream);
11885
William M. Brack3a1cd212005-02-11 14:35:54 +000011886 /*
11887 * If the caller didn't provide an initial 'chunk' for determining
11888 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11889 * that it can be automatically determined later
11890 */
11891 if ((size == 0) || (chunk == NULL)) {
11892 ctxt->charset = XML_CHAR_ENCODING_NONE;
11893 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011894 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11895 int cur = ctxt->input->cur - ctxt->input->base;
11896
Owen Taylor3473f882001-02-23 17:55:21 +000011897 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011898
11899 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11900 ctxt->input->cur = ctxt->input->base + cur;
11901 ctxt->input->end =
11902 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011903#ifdef DEBUG_PUSH
11904 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11905#endif
11906 }
11907
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011908 if (enc != XML_CHAR_ENCODING_NONE) {
11909 xmlSwitchEncoding(ctxt, enc);
11910 }
11911
Owen Taylor3473f882001-02-23 17:55:21 +000011912 return(ctxt);
11913}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011914#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011915
11916/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011917 * xmlStopParser:
11918 * @ctxt: an XML parser context
11919 *
11920 * Blocks further parser processing
11921 */
11922void
11923xmlStopParser(xmlParserCtxtPtr ctxt) {
11924 if (ctxt == NULL)
11925 return;
11926 ctxt->instate = XML_PARSER_EOF;
11927 ctxt->disableSAX = 1;
11928 if (ctxt->input != NULL) {
11929 ctxt->input->cur = BAD_CAST"";
11930 ctxt->input->base = ctxt->input->cur;
11931 }
11932}
11933
11934/**
Owen Taylor3473f882001-02-23 17:55:21 +000011935 * xmlCreateIOParserCtxt:
11936 * @sax: a SAX handler
11937 * @user_data: The user data returned on SAX callbacks
11938 * @ioread: an I/O read function
11939 * @ioclose: an I/O close function
11940 * @ioctx: an I/O handler
11941 * @enc: the charset encoding if known
11942 *
11943 * Create a parser context for using the XML parser with an existing
11944 * I/O stream
11945 *
11946 * Returns the new parser context or NULL
11947 */
11948xmlParserCtxtPtr
11949xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11950 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11951 void *ioctx, xmlCharEncoding enc) {
11952 xmlParserCtxtPtr ctxt;
11953 xmlParserInputPtr inputStream;
11954 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011955
11956 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011957
11958 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11959 if (buf == NULL) return(NULL);
11960
11961 ctxt = xmlNewParserCtxt();
11962 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011963 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011964 return(NULL);
11965 }
11966 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011967#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011968 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011969#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011970 xmlFree(ctxt->sax);
11971 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11972 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011973 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011974 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011975 return(NULL);
11976 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011977 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11978 if (sax->initialized == XML_SAX2_MAGIC)
11979 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11980 else
11981 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011982 if (user_data != NULL)
11983 ctxt->userData = user_data;
11984 }
11985
11986 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11987 if (inputStream == NULL) {
11988 xmlFreeParserCtxt(ctxt);
11989 return(NULL);
11990 }
11991 inputPush(ctxt, inputStream);
11992
11993 return(ctxt);
11994}
11995
Daniel Veillard4432df22003-09-28 18:58:27 +000011996#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011997/************************************************************************
11998 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011999 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012000 * *
12001 ************************************************************************/
12002
12003/**
12004 * xmlIOParseDTD:
12005 * @sax: the SAX handler block or NULL
12006 * @input: an Input Buffer
12007 * @enc: the charset encoding if known
12008 *
12009 * Load and parse a DTD
12010 *
12011 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012012 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012013 */
12014
12015xmlDtdPtr
12016xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12017 xmlCharEncoding enc) {
12018 xmlDtdPtr ret = NULL;
12019 xmlParserCtxtPtr ctxt;
12020 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012021 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012022
12023 if (input == NULL)
12024 return(NULL);
12025
12026 ctxt = xmlNewParserCtxt();
12027 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012028 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012029 return(NULL);
12030 }
12031
12032 /*
12033 * Set-up the SAX context
12034 */
12035 if (sax != NULL) {
12036 if (ctxt->sax != NULL)
12037 xmlFree(ctxt->sax);
12038 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012039 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012040 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012041 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012042
12043 /*
12044 * generate a parser input from the I/O handler
12045 */
12046
Daniel Veillard43caefb2003-12-07 19:32:22 +000012047 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012048 if (pinput == NULL) {
12049 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012050 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012051 xmlFreeParserCtxt(ctxt);
12052 return(NULL);
12053 }
12054
12055 /*
12056 * plug some encoding conversion routines here.
12057 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012058 if (xmlPushInput(ctxt, pinput) < 0) {
12059 if (sax != NULL) ctxt->sax = NULL;
12060 xmlFreeParserCtxt(ctxt);
12061 return(NULL);
12062 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012063 if (enc != XML_CHAR_ENCODING_NONE) {
12064 xmlSwitchEncoding(ctxt, enc);
12065 }
Owen Taylor3473f882001-02-23 17:55:21 +000012066
12067 pinput->filename = NULL;
12068 pinput->line = 1;
12069 pinput->col = 1;
12070 pinput->base = ctxt->input->cur;
12071 pinput->cur = ctxt->input->cur;
12072 pinput->free = NULL;
12073
12074 /*
12075 * let's parse that entity knowing it's an external subset.
12076 */
12077 ctxt->inSubset = 2;
12078 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012079 if (ctxt->myDoc == NULL) {
12080 xmlErrMemory(ctxt, "New Doc failed");
12081 return(NULL);
12082 }
12083 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012084 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12085 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012086
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012087 if ((enc == XML_CHAR_ENCODING_NONE) &&
12088 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000012089 /*
12090 * Get the 4 first bytes and decode the charset
12091 * if enc != XML_CHAR_ENCODING_NONE
12092 * plug some encoding conversion routines.
12093 */
12094 start[0] = RAW;
12095 start[1] = NXT(1);
12096 start[2] = NXT(2);
12097 start[3] = NXT(3);
12098 enc = xmlDetectCharEncoding(start, 4);
12099 if (enc != XML_CHAR_ENCODING_NONE) {
12100 xmlSwitchEncoding(ctxt, enc);
12101 }
12102 }
12103
Owen Taylor3473f882001-02-23 17:55:21 +000012104 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12105
12106 if (ctxt->myDoc != NULL) {
12107 if (ctxt->wellFormed) {
12108 ret = ctxt->myDoc->extSubset;
12109 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012110 if (ret != NULL) {
12111 xmlNodePtr tmp;
12112
12113 ret->doc = NULL;
12114 tmp = ret->children;
12115 while (tmp != NULL) {
12116 tmp->doc = NULL;
12117 tmp = tmp->next;
12118 }
12119 }
Owen Taylor3473f882001-02-23 17:55:21 +000012120 } else {
12121 ret = NULL;
12122 }
12123 xmlFreeDoc(ctxt->myDoc);
12124 ctxt->myDoc = NULL;
12125 }
12126 if (sax != NULL) ctxt->sax = NULL;
12127 xmlFreeParserCtxt(ctxt);
12128
12129 return(ret);
12130}
12131
12132/**
12133 * xmlSAXParseDTD:
12134 * @sax: the SAX handler block
12135 * @ExternalID: a NAME* containing the External ID of the DTD
12136 * @SystemID: a NAME* containing the URL to the DTD
12137 *
12138 * Load and parse an external subset.
12139 *
12140 * Returns the resulting xmlDtdPtr or NULL in case of error.
12141 */
12142
12143xmlDtdPtr
12144xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12145 const xmlChar *SystemID) {
12146 xmlDtdPtr ret = NULL;
12147 xmlParserCtxtPtr ctxt;
12148 xmlParserInputPtr input = NULL;
12149 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012150 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012151
12152 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12153
12154 ctxt = xmlNewParserCtxt();
12155 if (ctxt == NULL) {
12156 return(NULL);
12157 }
12158
12159 /*
12160 * Set-up the SAX context
12161 */
12162 if (sax != NULL) {
12163 if (ctxt->sax != NULL)
12164 xmlFree(ctxt->sax);
12165 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012166 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012167 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012168
12169 /*
12170 * Canonicalise the system ID
12171 */
12172 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012173 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012174 xmlFreeParserCtxt(ctxt);
12175 return(NULL);
12176 }
Owen Taylor3473f882001-02-23 17:55:21 +000012177
12178 /*
12179 * Ask the Entity resolver to load the damn thing
12180 */
12181
12182 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012183 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12184 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012185 if (input == NULL) {
12186 if (sax != NULL) ctxt->sax = NULL;
12187 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012188 if (systemIdCanonic != NULL)
12189 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012190 return(NULL);
12191 }
12192
12193 /*
12194 * plug some encoding conversion routines here.
12195 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012196 if (xmlPushInput(ctxt, input) < 0) {
12197 if (sax != NULL) ctxt->sax = NULL;
12198 xmlFreeParserCtxt(ctxt);
12199 if (systemIdCanonic != NULL)
12200 xmlFree(systemIdCanonic);
12201 return(NULL);
12202 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012203 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12204 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12205 xmlSwitchEncoding(ctxt, enc);
12206 }
Owen Taylor3473f882001-02-23 17:55:21 +000012207
12208 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012209 input->filename = (char *) systemIdCanonic;
12210 else
12211 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012212 input->line = 1;
12213 input->col = 1;
12214 input->base = ctxt->input->cur;
12215 input->cur = ctxt->input->cur;
12216 input->free = NULL;
12217
12218 /*
12219 * let's parse that entity knowing it's an external subset.
12220 */
12221 ctxt->inSubset = 2;
12222 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012223 if (ctxt->myDoc == NULL) {
12224 xmlErrMemory(ctxt, "New Doc failed");
12225 if (sax != NULL) ctxt->sax = NULL;
12226 xmlFreeParserCtxt(ctxt);
12227 return(NULL);
12228 }
12229 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012230 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12231 ExternalID, SystemID);
12232 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12233
12234 if (ctxt->myDoc != NULL) {
12235 if (ctxt->wellFormed) {
12236 ret = ctxt->myDoc->extSubset;
12237 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012238 if (ret != NULL) {
12239 xmlNodePtr tmp;
12240
12241 ret->doc = NULL;
12242 tmp = ret->children;
12243 while (tmp != NULL) {
12244 tmp->doc = NULL;
12245 tmp = tmp->next;
12246 }
12247 }
Owen Taylor3473f882001-02-23 17:55:21 +000012248 } else {
12249 ret = NULL;
12250 }
12251 xmlFreeDoc(ctxt->myDoc);
12252 ctxt->myDoc = NULL;
12253 }
12254 if (sax != NULL) ctxt->sax = NULL;
12255 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012256
Owen Taylor3473f882001-02-23 17:55:21 +000012257 return(ret);
12258}
12259
Daniel Veillard4432df22003-09-28 18:58:27 +000012260
Owen Taylor3473f882001-02-23 17:55:21 +000012261/**
12262 * xmlParseDTD:
12263 * @ExternalID: a NAME* containing the External ID of the DTD
12264 * @SystemID: a NAME* containing the URL to the DTD
12265 *
12266 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012267 *
Owen Taylor3473f882001-02-23 17:55:21 +000012268 * Returns the resulting xmlDtdPtr or NULL in case of error.
12269 */
12270
12271xmlDtdPtr
12272xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12273 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12274}
Daniel Veillard4432df22003-09-28 18:58:27 +000012275#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012276
12277/************************************************************************
12278 * *
12279 * Front ends when parsing an Entity *
12280 * *
12281 ************************************************************************/
12282
12283/**
Owen Taylor3473f882001-02-23 17:55:21 +000012284 * xmlParseCtxtExternalEntity:
12285 * @ctx: the existing parsing context
12286 * @URL: the URL for the entity to load
12287 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012288 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012289 *
12290 * Parse an external general entity within an existing parsing context
12291 * An external general parsed entity is well-formed if it matches the
12292 * production labeled extParsedEnt.
12293 *
12294 * [78] extParsedEnt ::= TextDecl? content
12295 *
12296 * Returns 0 if the entity is well formed, -1 in case of args problem and
12297 * the parser error code otherwise
12298 */
12299
12300int
12301xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012302 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012303 xmlParserCtxtPtr ctxt;
12304 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012305 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012306 xmlSAXHandlerPtr oldsax = NULL;
12307 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012308 xmlChar start[4];
12309 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012310
Daniel Veillardce682bc2004-11-05 17:22:25 +000012311 if (ctx == NULL) return(-1);
12312
Daniel Veillard0161e632008-08-28 15:36:32 +000012313 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12314 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012315 return(XML_ERR_ENTITY_LOOP);
12316 }
12317
Daniel Veillardcda96922001-08-21 10:56:31 +000012318 if (lst != NULL)
12319 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012320 if ((URL == NULL) && (ID == NULL))
12321 return(-1);
12322 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12323 return(-1);
12324
Rob Richards798743a2009-06-19 13:54:25 -040012325 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012326 if (ctxt == NULL) {
12327 return(-1);
12328 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012329
Owen Taylor3473f882001-02-23 17:55:21 +000012330 oldsax = ctxt->sax;
12331 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012332 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012333 newDoc = xmlNewDoc(BAD_CAST "1.0");
12334 if (newDoc == NULL) {
12335 xmlFreeParserCtxt(ctxt);
12336 return(-1);
12337 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012338 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012339 if (ctx->myDoc->dict) {
12340 newDoc->dict = ctx->myDoc->dict;
12341 xmlDictReference(newDoc->dict);
12342 }
Owen Taylor3473f882001-02-23 17:55:21 +000012343 if (ctx->myDoc != NULL) {
12344 newDoc->intSubset = ctx->myDoc->intSubset;
12345 newDoc->extSubset = ctx->myDoc->extSubset;
12346 }
12347 if (ctx->myDoc->URL != NULL) {
12348 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12349 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012350 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12351 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012352 ctxt->sax = oldsax;
12353 xmlFreeParserCtxt(ctxt);
12354 newDoc->intSubset = NULL;
12355 newDoc->extSubset = NULL;
12356 xmlFreeDoc(newDoc);
12357 return(-1);
12358 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012359 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012360 nodePush(ctxt, newDoc->children);
12361 if (ctx->myDoc == NULL) {
12362 ctxt->myDoc = newDoc;
12363 } else {
12364 ctxt->myDoc = ctx->myDoc;
12365 newDoc->children->doc = ctx->myDoc;
12366 }
12367
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012368 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012369 * Get the 4 first bytes and decode the charset
12370 * if enc != XML_CHAR_ENCODING_NONE
12371 * plug some encoding conversion routines.
12372 */
12373 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012374 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12375 start[0] = RAW;
12376 start[1] = NXT(1);
12377 start[2] = NXT(2);
12378 start[3] = NXT(3);
12379 enc = xmlDetectCharEncoding(start, 4);
12380 if (enc != XML_CHAR_ENCODING_NONE) {
12381 xmlSwitchEncoding(ctxt, enc);
12382 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012383 }
12384
Owen Taylor3473f882001-02-23 17:55:21 +000012385 /*
12386 * Parse a possible text declaration first
12387 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012388 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012389 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012390 /*
12391 * An XML-1.0 document can't reference an entity not XML-1.0
12392 */
12393 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12394 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12395 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12396 "Version mismatch between document and entity\n");
12397 }
Owen Taylor3473f882001-02-23 17:55:21 +000012398 }
12399
12400 /*
12401 * Doing validity checking on chunk doesn't make sense
12402 */
12403 ctxt->instate = XML_PARSER_CONTENT;
12404 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012405 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012406 ctxt->loadsubset = ctx->loadsubset;
12407 ctxt->depth = ctx->depth + 1;
12408 ctxt->replaceEntities = ctx->replaceEntities;
12409 if (ctxt->validate) {
12410 ctxt->vctxt.error = ctx->vctxt.error;
12411 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012412 } else {
12413 ctxt->vctxt.error = NULL;
12414 ctxt->vctxt.warning = NULL;
12415 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012416 ctxt->vctxt.nodeTab = NULL;
12417 ctxt->vctxt.nodeNr = 0;
12418 ctxt->vctxt.nodeMax = 0;
12419 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012420 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12421 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012422 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12423 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12424 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012425 ctxt->dictNames = ctx->dictNames;
12426 ctxt->attsDefault = ctx->attsDefault;
12427 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012428 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012429
12430 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012431
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012432 ctx->validate = ctxt->validate;
12433 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012434 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012435 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012436 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012437 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012438 }
12439 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012440 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012441 }
12442
12443 if (!ctxt->wellFormed) {
12444 if (ctxt->errNo == 0)
12445 ret = 1;
12446 else
12447 ret = ctxt->errNo;
12448 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012449 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012450 xmlNodePtr cur;
12451
12452 /*
12453 * Return the newly created nodeset after unlinking it from
12454 * they pseudo parent.
12455 */
12456 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012457 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012458 while (cur != NULL) {
12459 cur->parent = NULL;
12460 cur = cur->next;
12461 }
12462 newDoc->children->children = NULL;
12463 }
12464 ret = 0;
12465 }
12466 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012467 ctxt->dict = NULL;
12468 ctxt->attsDefault = NULL;
12469 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012470 xmlFreeParserCtxt(ctxt);
12471 newDoc->intSubset = NULL;
12472 newDoc->extSubset = NULL;
12473 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012474
Owen Taylor3473f882001-02-23 17:55:21 +000012475 return(ret);
12476}
12477
12478/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012479 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012480 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012481 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012482 * @sax: the SAX handler bloc (possibly NULL)
12483 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12484 * @depth: Used for loop detection, use 0
12485 * @URL: the URL for the entity to load
12486 * @ID: the System ID for the entity to load
12487 * @list: the return value for the set of parsed nodes
12488 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012489 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012490 *
12491 * Returns 0 if the entity is well formed, -1 in case of args problem and
12492 * the parser error code otherwise
12493 */
12494
Daniel Veillard7d515752003-09-26 19:12:37 +000012495static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012496xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12497 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012498 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012499 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012500 xmlParserCtxtPtr ctxt;
12501 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012502 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012503 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012504 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012505 xmlChar start[4];
12506 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012507
Daniel Veillard0161e632008-08-28 15:36:32 +000012508 if (((depth > 40) &&
12509 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12510 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012511 return(XML_ERR_ENTITY_LOOP);
12512 }
12513
Owen Taylor3473f882001-02-23 17:55:21 +000012514 if (list != NULL)
12515 *list = NULL;
12516 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012517 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012518 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012519 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012520
12521
Rob Richards9c0aa472009-03-26 18:10:19 +000012522 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012523 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012524 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012525 if (oldctxt != NULL) {
12526 ctxt->_private = oldctxt->_private;
12527 ctxt->loadsubset = oldctxt->loadsubset;
12528 ctxt->validate = oldctxt->validate;
12529 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012530 ctxt->record_info = oldctxt->record_info;
12531 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12532 ctxt->node_seq.length = oldctxt->node_seq.length;
12533 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012534 } else {
12535 /*
12536 * Doing validity checking on chunk without context
12537 * doesn't make sense
12538 */
12539 ctxt->_private = NULL;
12540 ctxt->validate = 0;
12541 ctxt->external = 2;
12542 ctxt->loadsubset = 0;
12543 }
Owen Taylor3473f882001-02-23 17:55:21 +000012544 if (sax != NULL) {
12545 oldsax = ctxt->sax;
12546 ctxt->sax = sax;
12547 if (user_data != NULL)
12548 ctxt->userData = user_data;
12549 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012550 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012551 newDoc = xmlNewDoc(BAD_CAST "1.0");
12552 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012553 ctxt->node_seq.maximum = 0;
12554 ctxt->node_seq.length = 0;
12555 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012556 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012557 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012558 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012559 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012560 newDoc->intSubset = doc->intSubset;
12561 newDoc->extSubset = doc->extSubset;
12562 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012563 xmlDictReference(newDoc->dict);
12564
Owen Taylor3473f882001-02-23 17:55:21 +000012565 if (doc->URL != NULL) {
12566 newDoc->URL = xmlStrdup(doc->URL);
12567 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012568 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12569 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012570 if (sax != NULL)
12571 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012572 ctxt->node_seq.maximum = 0;
12573 ctxt->node_seq.length = 0;
12574 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012575 xmlFreeParserCtxt(ctxt);
12576 newDoc->intSubset = NULL;
12577 newDoc->extSubset = NULL;
12578 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012579 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012580 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012581 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012582 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012583 ctxt->myDoc = doc;
12584 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012585
Daniel Veillard0161e632008-08-28 15:36:32 +000012586 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012587 * Get the 4 first bytes and decode the charset
12588 * if enc != XML_CHAR_ENCODING_NONE
12589 * plug some encoding conversion routines.
12590 */
12591 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012592 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12593 start[0] = RAW;
12594 start[1] = NXT(1);
12595 start[2] = NXT(2);
12596 start[3] = NXT(3);
12597 enc = xmlDetectCharEncoding(start, 4);
12598 if (enc != XML_CHAR_ENCODING_NONE) {
12599 xmlSwitchEncoding(ctxt, enc);
12600 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012601 }
12602
Owen Taylor3473f882001-02-23 17:55:21 +000012603 /*
12604 * Parse a possible text declaration first
12605 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012606 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012607 xmlParseTextDecl(ctxt);
12608 }
12609
Owen Taylor3473f882001-02-23 17:55:21 +000012610 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012611 ctxt->depth = depth;
12612
12613 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012614
Daniel Veillard561b7f82002-03-20 21:55:57 +000012615 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012616 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012617 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012618 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012619 }
12620 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012621 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012622 }
12623
12624 if (!ctxt->wellFormed) {
12625 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012626 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012627 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012628 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012629 } else {
12630 if (list != NULL) {
12631 xmlNodePtr cur;
12632
12633 /*
12634 * Return the newly created nodeset after unlinking it from
12635 * they pseudo parent.
12636 */
12637 cur = newDoc->children->children;
12638 *list = cur;
12639 while (cur != NULL) {
12640 cur->parent = NULL;
12641 cur = cur->next;
12642 }
12643 newDoc->children->children = NULL;
12644 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012645 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012646 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012647
12648 /*
12649 * Record in the parent context the number of entities replacement
12650 * done when parsing that reference.
12651 */
Daniel Veillard76d36452009-09-07 11:19:33 +020012652 if (oldctxt != NULL)
12653 oldctxt->nbentities += ctxt->nbentities;
12654
Daniel Veillard0161e632008-08-28 15:36:32 +000012655 /*
12656 * Also record the size of the entity parsed
12657 */
12658 if (ctxt->input != NULL) {
12659 oldctxt->sizeentities += ctxt->input->consumed;
12660 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12661 }
12662 /*
12663 * And record the last error if any
12664 */
12665 if (ctxt->lastError.code != XML_ERR_OK)
12666 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12667
Owen Taylor3473f882001-02-23 17:55:21 +000012668 if (sax != NULL)
12669 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012670 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12671 oldctxt->node_seq.length = ctxt->node_seq.length;
12672 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012673 ctxt->node_seq.maximum = 0;
12674 ctxt->node_seq.length = 0;
12675 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012676 xmlFreeParserCtxt(ctxt);
12677 newDoc->intSubset = NULL;
12678 newDoc->extSubset = NULL;
12679 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012680
Owen Taylor3473f882001-02-23 17:55:21 +000012681 return(ret);
12682}
12683
Daniel Veillard81273902003-09-30 00:43:48 +000012684#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012685/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012686 * xmlParseExternalEntity:
12687 * @doc: the document the chunk pertains to
12688 * @sax: the SAX handler bloc (possibly NULL)
12689 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12690 * @depth: Used for loop detection, use 0
12691 * @URL: the URL for the entity to load
12692 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012693 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012694 *
12695 * Parse an external general entity
12696 * An external general parsed entity is well-formed if it matches the
12697 * production labeled extParsedEnt.
12698 *
12699 * [78] extParsedEnt ::= TextDecl? content
12700 *
12701 * Returns 0 if the entity is well formed, -1 in case of args problem and
12702 * the parser error code otherwise
12703 */
12704
12705int
12706xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012707 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012708 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012709 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012710}
12711
12712/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012713 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012714 * @doc: the document the chunk pertains to
12715 * @sax: the SAX handler bloc (possibly NULL)
12716 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12717 * @depth: Used for loop detection, use 0
12718 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012719 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012720 *
12721 * Parse a well-balanced chunk of an XML document
12722 * called by the parser
12723 * The allowed sequence for the Well Balanced Chunk is the one defined by
12724 * the content production in the XML grammar:
12725 *
12726 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12727 *
12728 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12729 * the parser error code otherwise
12730 */
12731
12732int
12733xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012734 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012735 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12736 depth, string, lst, 0 );
12737}
Daniel Veillard81273902003-09-30 00:43:48 +000012738#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012739
12740/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012741 * xmlParseBalancedChunkMemoryInternal:
12742 * @oldctxt: the existing parsing context
12743 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12744 * @user_data: the user data field for the parser context
12745 * @lst: the return value for the set of parsed nodes
12746 *
12747 *
12748 * Parse a well-balanced chunk of an XML document
12749 * called by the parser
12750 * The allowed sequence for the Well Balanced Chunk is the one defined by
12751 * the content production in the XML grammar:
12752 *
12753 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12754 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012755 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12756 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012757 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012758 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012759 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012760 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012761static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012762xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12763 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12764 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012765 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012766 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012767 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012768 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012769 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012770 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012771 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012772#ifdef SAX2
12773 int i;
12774#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012775
Daniel Veillard0161e632008-08-28 15:36:32 +000012776 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12777 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012778 return(XML_ERR_ENTITY_LOOP);
12779 }
12780
12781
12782 if (lst != NULL)
12783 *lst = NULL;
12784 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012785 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012786
12787 size = xmlStrlen(string);
12788
12789 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012790 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012791 if (user_data != NULL)
12792 ctxt->userData = user_data;
12793 else
12794 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012795 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12796 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012797 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12798 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12799 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012800
Daniel Veillard74eaec12009-08-26 15:57:20 +020012801#ifdef SAX2
12802 /* propagate namespaces down the entity */
12803 for (i = 0;i < oldctxt->nsNr;i += 2) {
12804 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12805 }
12806#endif
12807
Daniel Veillard328f48c2002-11-15 15:24:34 +000012808 oldsax = ctxt->sax;
12809 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012810 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012811 ctxt->replaceEntities = oldctxt->replaceEntities;
12812 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012813
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012814 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012815 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012816 newDoc = xmlNewDoc(BAD_CAST "1.0");
12817 if (newDoc == NULL) {
12818 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012819 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012820 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012821 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012822 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012823 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012824 newDoc->dict = ctxt->dict;
12825 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012826 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012827 } else {
12828 ctxt->myDoc = oldctxt->myDoc;
12829 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012830 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012831 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012832 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12833 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012834 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012835 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012836 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012837 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012838 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012839 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012840 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012841 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012842 ctxt->myDoc->children = NULL;
12843 ctxt->myDoc->last = NULL;
12844 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012845 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012846 ctxt->instate = XML_PARSER_CONTENT;
12847 ctxt->depth = oldctxt->depth + 1;
12848
Daniel Veillard328f48c2002-11-15 15:24:34 +000012849 ctxt->validate = 0;
12850 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012851 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12852 /*
12853 * ID/IDREF registration will be done in xmlValidateElement below
12854 */
12855 ctxt->loadsubset |= XML_SKIP_IDS;
12856 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012857 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012858 ctxt->attsDefault = oldctxt->attsDefault;
12859 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012860
Daniel Veillard68e9e742002-11-16 15:35:11 +000012861 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012862 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012863 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012864 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012865 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012866 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012867 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012868 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012869 }
12870
12871 if (!ctxt->wellFormed) {
12872 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012873 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012874 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012875 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012876 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012877 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012878 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012879
William M. Brack7b9154b2003-09-27 19:23:50 +000012880 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012881 xmlNodePtr cur;
12882
12883 /*
12884 * Return the newly created nodeset after unlinking it from
12885 * they pseudo parent.
12886 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012887 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012888 *lst = cur;
12889 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012890#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012891 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12892 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12893 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012894 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12895 oldctxt->myDoc, cur);
12896 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012897#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012898 cur->parent = NULL;
12899 cur = cur->next;
12900 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012901 ctxt->myDoc->children->children = NULL;
12902 }
12903 if (ctxt->myDoc != NULL) {
12904 xmlFreeNode(ctxt->myDoc->children);
12905 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012906 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012907 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012908
12909 /*
12910 * Record in the parent context the number of entities replacement
12911 * done when parsing that reference.
12912 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020012913 if (oldctxt != NULL)
12914 oldctxt->nbentities += ctxt->nbentities;
12915
Daniel Veillard0161e632008-08-28 15:36:32 +000012916 /*
12917 * Also record the last error if any
12918 */
12919 if (ctxt->lastError.code != XML_ERR_OK)
12920 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12921
Daniel Veillard328f48c2002-11-15 15:24:34 +000012922 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012923 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012924 ctxt->attsDefault = NULL;
12925 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012926 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012927 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012928 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012929 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012930
Daniel Veillard328f48c2002-11-15 15:24:34 +000012931 return(ret);
12932}
12933
Daniel Veillard29b17482004-08-16 00:39:03 +000012934/**
12935 * xmlParseInNodeContext:
12936 * @node: the context node
12937 * @data: the input string
12938 * @datalen: the input string length in bytes
12939 * @options: a combination of xmlParserOption
12940 * @lst: the return value for the set of parsed nodes
12941 *
12942 * Parse a well-balanced chunk of an XML document
12943 * within the context (DTD, namespaces, etc ...) of the given node.
12944 *
12945 * The allowed sequence for the data is a Well Balanced Chunk defined by
12946 * the content production in the XML grammar:
12947 *
12948 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12949 *
12950 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12951 * error code otherwise
12952 */
12953xmlParserErrors
12954xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12955 int options, xmlNodePtr *lst) {
12956#ifdef SAX2
12957 xmlParserCtxtPtr ctxt;
12958 xmlDocPtr doc = NULL;
12959 xmlNodePtr fake, cur;
12960 int nsnr = 0;
12961
12962 xmlParserErrors ret = XML_ERR_OK;
12963
12964 /*
12965 * check all input parameters, grab the document
12966 */
12967 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12968 return(XML_ERR_INTERNAL_ERROR);
12969 switch (node->type) {
12970 case XML_ELEMENT_NODE:
12971 case XML_ATTRIBUTE_NODE:
12972 case XML_TEXT_NODE:
12973 case XML_CDATA_SECTION_NODE:
12974 case XML_ENTITY_REF_NODE:
12975 case XML_PI_NODE:
12976 case XML_COMMENT_NODE:
12977 case XML_DOCUMENT_NODE:
12978 case XML_HTML_DOCUMENT_NODE:
12979 break;
12980 default:
12981 return(XML_ERR_INTERNAL_ERROR);
12982
12983 }
12984 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12985 (node->type != XML_DOCUMENT_NODE) &&
12986 (node->type != XML_HTML_DOCUMENT_NODE))
12987 node = node->parent;
12988 if (node == NULL)
12989 return(XML_ERR_INTERNAL_ERROR);
12990 if (node->type == XML_ELEMENT_NODE)
12991 doc = node->doc;
12992 else
12993 doc = (xmlDocPtr) node;
12994 if (doc == NULL)
12995 return(XML_ERR_INTERNAL_ERROR);
12996
12997 /*
12998 * allocate a context and set-up everything not related to the
12999 * node position in the tree
13000 */
13001 if (doc->type == XML_DOCUMENT_NODE)
13002 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13003#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013004 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013005 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013006 /*
13007 * When parsing in context, it makes no sense to add implied
13008 * elements like html/body/etc...
13009 */
13010 options |= HTML_PARSE_NOIMPLIED;
13011 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013012#endif
13013 else
13014 return(XML_ERR_INTERNAL_ERROR);
13015
13016 if (ctxt == NULL)
13017 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013018
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013019 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013020 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13021 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13022 * we must wait until the last moment to free the original one.
13023 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013024 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013025 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013026 xmlDictFree(ctxt->dict);
13027 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013028 } else
13029 options |= XML_PARSE_NODICT;
13030
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013031 if (doc->encoding != NULL) {
13032 xmlCharEncodingHandlerPtr hdlr;
13033
13034 if (ctxt->encoding != NULL)
13035 xmlFree((xmlChar *) ctxt->encoding);
13036 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13037
13038 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13039 if (hdlr != NULL) {
13040 xmlSwitchToEncoding(ctxt, hdlr);
13041 } else {
13042 return(XML_ERR_UNSUPPORTED_ENCODING);
13043 }
13044 }
13045
Daniel Veillard37334572008-07-31 08:20:02 +000013046 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013047 xmlDetectSAX2(ctxt);
13048 ctxt->myDoc = doc;
13049
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013050 fake = xmlNewComment(NULL);
13051 if (fake == NULL) {
13052 xmlFreeParserCtxt(ctxt);
13053 return(XML_ERR_NO_MEMORY);
13054 }
13055 xmlAddChild(node, fake);
13056
Daniel Veillard29b17482004-08-16 00:39:03 +000013057 if (node->type == XML_ELEMENT_NODE) {
13058 nodePush(ctxt, node);
13059 /*
13060 * initialize the SAX2 namespaces stack
13061 */
13062 cur = node;
13063 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13064 xmlNsPtr ns = cur->nsDef;
13065 const xmlChar *iprefix, *ihref;
13066
13067 while (ns != NULL) {
13068 if (ctxt->dict) {
13069 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13070 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13071 } else {
13072 iprefix = ns->prefix;
13073 ihref = ns->href;
13074 }
13075
13076 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13077 nsPush(ctxt, iprefix, ihref);
13078 nsnr++;
13079 }
13080 ns = ns->next;
13081 }
13082 cur = cur->parent;
13083 }
13084 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013085 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013086
13087 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13088 /*
13089 * ID/IDREF registration will be done in xmlValidateElement below
13090 */
13091 ctxt->loadsubset |= XML_SKIP_IDS;
13092 }
13093
Daniel Veillard499cc922006-01-18 17:22:35 +000013094#ifdef LIBXML_HTML_ENABLED
13095 if (doc->type == XML_HTML_DOCUMENT_NODE)
13096 __htmlParseContent(ctxt);
13097 else
13098#endif
13099 xmlParseContent(ctxt);
13100
Daniel Veillard29b17482004-08-16 00:39:03 +000013101 nsPop(ctxt, nsnr);
13102 if ((RAW == '<') && (NXT(1) == '/')) {
13103 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13104 } else if (RAW != 0) {
13105 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13106 }
13107 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13108 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13109 ctxt->wellFormed = 0;
13110 }
13111
13112 if (!ctxt->wellFormed) {
13113 if (ctxt->errNo == 0)
13114 ret = XML_ERR_INTERNAL_ERROR;
13115 else
13116 ret = (xmlParserErrors)ctxt->errNo;
13117 } else {
13118 ret = XML_ERR_OK;
13119 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013120
Daniel Veillard29b17482004-08-16 00:39:03 +000013121 /*
13122 * Return the newly created nodeset after unlinking it from
13123 * the pseudo sibling.
13124 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013125
Daniel Veillard29b17482004-08-16 00:39:03 +000013126 cur = fake->next;
13127 fake->next = NULL;
13128 node->last = fake;
13129
13130 if (cur != NULL) {
13131 cur->prev = NULL;
13132 }
13133
13134 *lst = cur;
13135
13136 while (cur != NULL) {
13137 cur->parent = NULL;
13138 cur = cur->next;
13139 }
13140
13141 xmlUnlinkNode(fake);
13142 xmlFreeNode(fake);
13143
13144
13145 if (ret != XML_ERR_OK) {
13146 xmlFreeNodeList(*lst);
13147 *lst = NULL;
13148 }
William M. Brackc3f81342004-10-03 01:22:44 +000013149
William M. Brackb7b54de2004-10-06 16:38:01 +000013150 if (doc->dict != NULL)
13151 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013152 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013153
Daniel Veillard29b17482004-08-16 00:39:03 +000013154 return(ret);
13155#else /* !SAX2 */
13156 return(XML_ERR_INTERNAL_ERROR);
13157#endif
13158}
13159
Daniel Veillard81273902003-09-30 00:43:48 +000013160#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013161/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013162 * xmlParseBalancedChunkMemoryRecover:
13163 * @doc: the document the chunk pertains to
13164 * @sax: the SAX handler bloc (possibly NULL)
13165 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13166 * @depth: Used for loop detection, use 0
13167 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13168 * @lst: the return value for the set of parsed nodes
13169 * @recover: return nodes even if the data is broken (use 0)
13170 *
13171 *
13172 * Parse a well-balanced chunk of an XML document
13173 * called by the parser
13174 * The allowed sequence for the Well Balanced Chunk is the one defined by
13175 * the content production in the XML grammar:
13176 *
13177 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13178 *
13179 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13180 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013181 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013182 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013183 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13184 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013185 */
13186int
13187xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013188 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013189 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013190 xmlParserCtxtPtr ctxt;
13191 xmlDocPtr newDoc;
13192 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013193 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013194 int size;
13195 int ret = 0;
13196
Daniel Veillard0161e632008-08-28 15:36:32 +000013197 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013198 return(XML_ERR_ENTITY_LOOP);
13199 }
13200
13201
Daniel Veillardcda96922001-08-21 10:56:31 +000013202 if (lst != NULL)
13203 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013204 if (string == NULL)
13205 return(-1);
13206
13207 size = xmlStrlen(string);
13208
13209 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13210 if (ctxt == NULL) return(-1);
13211 ctxt->userData = ctxt;
13212 if (sax != NULL) {
13213 oldsax = ctxt->sax;
13214 ctxt->sax = sax;
13215 if (user_data != NULL)
13216 ctxt->userData = user_data;
13217 }
13218 newDoc = xmlNewDoc(BAD_CAST "1.0");
13219 if (newDoc == NULL) {
13220 xmlFreeParserCtxt(ctxt);
13221 return(-1);
13222 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013223 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013224 if ((doc != NULL) && (doc->dict != NULL)) {
13225 xmlDictFree(ctxt->dict);
13226 ctxt->dict = doc->dict;
13227 xmlDictReference(ctxt->dict);
13228 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13229 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13230 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13231 ctxt->dictNames = 1;
13232 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013233 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013234 }
Owen Taylor3473f882001-02-23 17:55:21 +000013235 if (doc != NULL) {
13236 newDoc->intSubset = doc->intSubset;
13237 newDoc->extSubset = doc->extSubset;
13238 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013239 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13240 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013241 if (sax != NULL)
13242 ctxt->sax = oldsax;
13243 xmlFreeParserCtxt(ctxt);
13244 newDoc->intSubset = NULL;
13245 newDoc->extSubset = NULL;
13246 xmlFreeDoc(newDoc);
13247 return(-1);
13248 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013249 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13250 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013251 if (doc == NULL) {
13252 ctxt->myDoc = newDoc;
13253 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013254 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013255 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013256 /* Ensure that doc has XML spec namespace */
13257 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13258 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013259 }
13260 ctxt->instate = XML_PARSER_CONTENT;
13261 ctxt->depth = depth;
13262
13263 /*
13264 * Doing validity checking on chunk doesn't make sense
13265 */
13266 ctxt->validate = 0;
13267 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013268 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013269
Daniel Veillardb39bc392002-10-26 19:29:51 +000013270 if ( doc != NULL ){
13271 content = doc->children;
13272 doc->children = NULL;
13273 xmlParseContent(ctxt);
13274 doc->children = content;
13275 }
13276 else {
13277 xmlParseContent(ctxt);
13278 }
Owen Taylor3473f882001-02-23 17:55:21 +000013279 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013280 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013281 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013282 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013283 }
13284 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013285 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013286 }
13287
13288 if (!ctxt->wellFormed) {
13289 if (ctxt->errNo == 0)
13290 ret = 1;
13291 else
13292 ret = ctxt->errNo;
13293 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013294 ret = 0;
13295 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013296
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013297 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13298 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013299
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013300 /*
13301 * Return the newly created nodeset after unlinking it from
13302 * they pseudo parent.
13303 */
13304 cur = newDoc->children->children;
13305 *lst = cur;
13306 while (cur != NULL) {
13307 xmlSetTreeDoc(cur, doc);
13308 cur->parent = NULL;
13309 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013310 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013311 newDoc->children->children = NULL;
13312 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013313
13314 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013315 ctxt->sax = oldsax;
13316 xmlFreeParserCtxt(ctxt);
13317 newDoc->intSubset = NULL;
13318 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013319 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013320 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013321
Owen Taylor3473f882001-02-23 17:55:21 +000013322 return(ret);
13323}
13324
13325/**
13326 * xmlSAXParseEntity:
13327 * @sax: the SAX handler block
13328 * @filename: the filename
13329 *
13330 * parse an XML external entity out of context and build a tree.
13331 * It use the given SAX function block to handle the parsing callback.
13332 * If sax is NULL, fallback to the default DOM tree building routines.
13333 *
13334 * [78] extParsedEnt ::= TextDecl? content
13335 *
13336 * This correspond to a "Well Balanced" chunk
13337 *
13338 * Returns the resulting document tree
13339 */
13340
13341xmlDocPtr
13342xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13343 xmlDocPtr ret;
13344 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013345
13346 ctxt = xmlCreateFileParserCtxt(filename);
13347 if (ctxt == NULL) {
13348 return(NULL);
13349 }
13350 if (sax != NULL) {
13351 if (ctxt->sax != NULL)
13352 xmlFree(ctxt->sax);
13353 ctxt->sax = sax;
13354 ctxt->userData = NULL;
13355 }
13356
Owen Taylor3473f882001-02-23 17:55:21 +000013357 xmlParseExtParsedEnt(ctxt);
13358
13359 if (ctxt->wellFormed)
13360 ret = ctxt->myDoc;
13361 else {
13362 ret = NULL;
13363 xmlFreeDoc(ctxt->myDoc);
13364 ctxt->myDoc = NULL;
13365 }
13366 if (sax != NULL)
13367 ctxt->sax = NULL;
13368 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013369
Owen Taylor3473f882001-02-23 17:55:21 +000013370 return(ret);
13371}
13372
13373/**
13374 * xmlParseEntity:
13375 * @filename: the filename
13376 *
13377 * parse an XML external entity out of context and build a tree.
13378 *
13379 * [78] extParsedEnt ::= TextDecl? content
13380 *
13381 * This correspond to a "Well Balanced" chunk
13382 *
13383 * Returns the resulting document tree
13384 */
13385
13386xmlDocPtr
13387xmlParseEntity(const char *filename) {
13388 return(xmlSAXParseEntity(NULL, filename));
13389}
Daniel Veillard81273902003-09-30 00:43:48 +000013390#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013391
13392/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013393 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013394 * @URL: the entity URL
13395 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013396 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013397 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013398 *
13399 * Create a parser context for an external entity
13400 * Automatic support for ZLIB/Compress compressed document is provided
13401 * by default if found at compile-time.
13402 *
13403 * Returns the new parser context or NULL
13404 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013405static xmlParserCtxtPtr
13406xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13407 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013408 xmlParserCtxtPtr ctxt;
13409 xmlParserInputPtr inputStream;
13410 char *directory = NULL;
13411 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013412
Owen Taylor3473f882001-02-23 17:55:21 +000013413 ctxt = xmlNewParserCtxt();
13414 if (ctxt == NULL) {
13415 return(NULL);
13416 }
13417
Daniel Veillard48247b42009-07-10 16:12:46 +020013418 if (pctx != NULL) {
13419 ctxt->options = pctx->options;
13420 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013421 }
13422
Owen Taylor3473f882001-02-23 17:55:21 +000013423 uri = xmlBuildURI(URL, base);
13424
13425 if (uri == NULL) {
13426 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13427 if (inputStream == NULL) {
13428 xmlFreeParserCtxt(ctxt);
13429 return(NULL);
13430 }
13431
13432 inputPush(ctxt, inputStream);
13433
13434 if ((ctxt->directory == NULL) && (directory == NULL))
13435 directory = xmlParserGetDirectory((char *)URL);
13436 if ((ctxt->directory == NULL) && (directory != NULL))
13437 ctxt->directory = directory;
13438 } else {
13439 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13440 if (inputStream == NULL) {
13441 xmlFree(uri);
13442 xmlFreeParserCtxt(ctxt);
13443 return(NULL);
13444 }
13445
13446 inputPush(ctxt, inputStream);
13447
13448 if ((ctxt->directory == NULL) && (directory == NULL))
13449 directory = xmlParserGetDirectory((char *)uri);
13450 if ((ctxt->directory == NULL) && (directory != NULL))
13451 ctxt->directory = directory;
13452 xmlFree(uri);
13453 }
Owen Taylor3473f882001-02-23 17:55:21 +000013454 return(ctxt);
13455}
13456
Rob Richards9c0aa472009-03-26 18:10:19 +000013457/**
13458 * xmlCreateEntityParserCtxt:
13459 * @URL: the entity URL
13460 * @ID: the entity PUBLIC ID
13461 * @base: a possible base for the target URI
13462 *
13463 * Create a parser context for an external entity
13464 * Automatic support for ZLIB/Compress compressed document is provided
13465 * by default if found at compile-time.
13466 *
13467 * Returns the new parser context or NULL
13468 */
13469xmlParserCtxtPtr
13470xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13471 const xmlChar *base) {
13472 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13473
13474}
13475
Owen Taylor3473f882001-02-23 17:55:21 +000013476/************************************************************************
13477 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013478 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013479 * *
13480 ************************************************************************/
13481
13482/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013483 * xmlCreateURLParserCtxt:
13484 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013485 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013486 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013487 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013488 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013489 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013490 *
13491 * Returns the new parser context or NULL
13492 */
13493xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013494xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013495{
13496 xmlParserCtxtPtr ctxt;
13497 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013498 char *directory = NULL;
13499
Owen Taylor3473f882001-02-23 17:55:21 +000013500 ctxt = xmlNewParserCtxt();
13501 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013502 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013503 return(NULL);
13504 }
13505
Daniel Veillarddf292f72005-01-16 19:00:15 +000013506 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013507 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013508 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013509
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013510 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013511 if (inputStream == NULL) {
13512 xmlFreeParserCtxt(ctxt);
13513 return(NULL);
13514 }
13515
Owen Taylor3473f882001-02-23 17:55:21 +000013516 inputPush(ctxt, inputStream);
13517 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013518 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013519 if ((ctxt->directory == NULL) && (directory != NULL))
13520 ctxt->directory = directory;
13521
13522 return(ctxt);
13523}
13524
Daniel Veillard61b93382003-11-03 14:28:31 +000013525/**
13526 * xmlCreateFileParserCtxt:
13527 * @filename: the filename
13528 *
13529 * Create a parser context for a file content.
13530 * Automatic support for ZLIB/Compress compressed document is provided
13531 * by default if found at compile-time.
13532 *
13533 * Returns the new parser context or NULL
13534 */
13535xmlParserCtxtPtr
13536xmlCreateFileParserCtxt(const char *filename)
13537{
13538 return(xmlCreateURLParserCtxt(filename, 0));
13539}
13540
Daniel Veillard81273902003-09-30 00:43:48 +000013541#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013542/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013543 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013544 * @sax: the SAX handler block
13545 * @filename: the filename
13546 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13547 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013548 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013549 *
13550 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13551 * compressed document is provided by default if found at compile-time.
13552 * It use the given SAX function block to handle the parsing callback.
13553 * If sax is NULL, fallback to the default DOM tree building routines.
13554 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013555 * User data (void *) is stored within the parser context in the
13556 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013557 *
Owen Taylor3473f882001-02-23 17:55:21 +000013558 * Returns the resulting document tree
13559 */
13560
13561xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013562xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13563 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013564 xmlDocPtr ret;
13565 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013566
Daniel Veillard635ef722001-10-29 11:48:19 +000013567 xmlInitParser();
13568
Owen Taylor3473f882001-02-23 17:55:21 +000013569 ctxt = xmlCreateFileParserCtxt(filename);
13570 if (ctxt == NULL) {
13571 return(NULL);
13572 }
13573 if (sax != NULL) {
13574 if (ctxt->sax != NULL)
13575 xmlFree(ctxt->sax);
13576 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013577 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013578 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013579 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013580 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013581 }
Owen Taylor3473f882001-02-23 17:55:21 +000013582
Daniel Veillard37d2d162008-03-14 10:54:00 +000013583 if (ctxt->directory == NULL)
13584 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013585
Daniel Veillarddad3f682002-11-17 16:47:27 +000013586 ctxt->recovery = recovery;
13587
Owen Taylor3473f882001-02-23 17:55:21 +000013588 xmlParseDocument(ctxt);
13589
William M. Brackc07329e2003-09-08 01:57:30 +000013590 if ((ctxt->wellFormed) || recovery) {
13591 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013592 if (ret != NULL) {
13593 if (ctxt->input->buf->compressed > 0)
13594 ret->compression = 9;
13595 else
13596 ret->compression = ctxt->input->buf->compressed;
13597 }
William M. Brackc07329e2003-09-08 01:57:30 +000013598 }
Owen Taylor3473f882001-02-23 17:55:21 +000013599 else {
13600 ret = NULL;
13601 xmlFreeDoc(ctxt->myDoc);
13602 ctxt->myDoc = NULL;
13603 }
13604 if (sax != NULL)
13605 ctxt->sax = NULL;
13606 xmlFreeParserCtxt(ctxt);
13607
13608 return(ret);
13609}
13610
13611/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013612 * xmlSAXParseFile:
13613 * @sax: the SAX handler block
13614 * @filename: the filename
13615 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13616 * documents
13617 *
13618 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13619 * compressed document is provided by default if found at compile-time.
13620 * It use the given SAX function block to handle the parsing callback.
13621 * If sax is NULL, fallback to the default DOM tree building routines.
13622 *
13623 * Returns the resulting document tree
13624 */
13625
13626xmlDocPtr
13627xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13628 int recovery) {
13629 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13630}
13631
13632/**
Owen Taylor3473f882001-02-23 17:55:21 +000013633 * xmlRecoverDoc:
13634 * @cur: a pointer to an array of xmlChar
13635 *
13636 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013637 * In the case the document is not Well Formed, a attempt to build a
13638 * tree is tried anyway
13639 *
13640 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013641 */
13642
13643xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013644xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013645 return(xmlSAXParseDoc(NULL, cur, 1));
13646}
13647
13648/**
13649 * xmlParseFile:
13650 * @filename: the filename
13651 *
13652 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13653 * compressed document is provided by default if found at compile-time.
13654 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013655 * Returns the resulting document tree if the file was wellformed,
13656 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013657 */
13658
13659xmlDocPtr
13660xmlParseFile(const char *filename) {
13661 return(xmlSAXParseFile(NULL, filename, 0));
13662}
13663
13664/**
13665 * xmlRecoverFile:
13666 * @filename: the filename
13667 *
13668 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13669 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013670 * In the case the document is not Well Formed, it attempts to build
13671 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013672 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013673 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013674 */
13675
13676xmlDocPtr
13677xmlRecoverFile(const char *filename) {
13678 return(xmlSAXParseFile(NULL, filename, 1));
13679}
13680
13681
13682/**
13683 * xmlSetupParserForBuffer:
13684 * @ctxt: an XML parser context
13685 * @buffer: a xmlChar * buffer
13686 * @filename: a file name
13687 *
13688 * Setup the parser context to parse a new buffer; Clears any prior
13689 * contents from the parser context. The buffer parameter must not be
13690 * NULL, but the filename parameter can be
13691 */
13692void
13693xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13694 const char* filename)
13695{
13696 xmlParserInputPtr input;
13697
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013698 if ((ctxt == NULL) || (buffer == NULL))
13699 return;
13700
Owen Taylor3473f882001-02-23 17:55:21 +000013701 input = xmlNewInputStream(ctxt);
13702 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013703 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013704 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013705 return;
13706 }
13707
13708 xmlClearParserCtxt(ctxt);
13709 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013710 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013711 input->base = buffer;
13712 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013713 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013714 inputPush(ctxt, input);
13715}
13716
13717/**
13718 * xmlSAXUserParseFile:
13719 * @sax: a SAX handler
13720 * @user_data: The user data returned on SAX callbacks
13721 * @filename: a file name
13722 *
13723 * parse an XML file and call the given SAX handler routines.
13724 * Automatic support for ZLIB/Compress compressed document is provided
13725 *
13726 * Returns 0 in case of success or a error number otherwise
13727 */
13728int
13729xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13730 const char *filename) {
13731 int ret = 0;
13732 xmlParserCtxtPtr ctxt;
13733
13734 ctxt = xmlCreateFileParserCtxt(filename);
13735 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013736 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013737 xmlFree(ctxt->sax);
13738 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013739 xmlDetectSAX2(ctxt);
13740
Owen Taylor3473f882001-02-23 17:55:21 +000013741 if (user_data != NULL)
13742 ctxt->userData = user_data;
13743
13744 xmlParseDocument(ctxt);
13745
13746 if (ctxt->wellFormed)
13747 ret = 0;
13748 else {
13749 if (ctxt->errNo != 0)
13750 ret = ctxt->errNo;
13751 else
13752 ret = -1;
13753 }
13754 if (sax != NULL)
13755 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013756 if (ctxt->myDoc != NULL) {
13757 xmlFreeDoc(ctxt->myDoc);
13758 ctxt->myDoc = NULL;
13759 }
Owen Taylor3473f882001-02-23 17:55:21 +000013760 xmlFreeParserCtxt(ctxt);
13761
13762 return ret;
13763}
Daniel Veillard81273902003-09-30 00:43:48 +000013764#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013765
13766/************************************************************************
13767 * *
13768 * Front ends when parsing from memory *
13769 * *
13770 ************************************************************************/
13771
13772/**
13773 * xmlCreateMemoryParserCtxt:
13774 * @buffer: a pointer to a char array
13775 * @size: the size of the array
13776 *
13777 * Create a parser context for an XML in-memory document.
13778 *
13779 * Returns the new parser context or NULL
13780 */
13781xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013782xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013783 xmlParserCtxtPtr ctxt;
13784 xmlParserInputPtr input;
13785 xmlParserInputBufferPtr buf;
13786
13787 if (buffer == NULL)
13788 return(NULL);
13789 if (size <= 0)
13790 return(NULL);
13791
13792 ctxt = xmlNewParserCtxt();
13793 if (ctxt == NULL)
13794 return(NULL);
13795
Daniel Veillard53350552003-09-18 13:35:51 +000013796 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013797 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013798 if (buf == NULL) {
13799 xmlFreeParserCtxt(ctxt);
13800 return(NULL);
13801 }
Owen Taylor3473f882001-02-23 17:55:21 +000013802
13803 input = xmlNewInputStream(ctxt);
13804 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013805 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013806 xmlFreeParserCtxt(ctxt);
13807 return(NULL);
13808 }
13809
13810 input->filename = NULL;
13811 input->buf = buf;
13812 input->base = input->buf->buffer->content;
13813 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013814 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013815
13816 inputPush(ctxt, input);
13817 return(ctxt);
13818}
13819
Daniel Veillard81273902003-09-30 00:43:48 +000013820#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013821/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013822 * xmlSAXParseMemoryWithData:
13823 * @sax: the SAX handler block
13824 * @buffer: an pointer to a char array
13825 * @size: the size of the array
13826 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13827 * documents
13828 * @data: the userdata
13829 *
13830 * parse an XML in-memory block and use the given SAX function block
13831 * to handle the parsing callback. If sax is NULL, fallback to the default
13832 * DOM tree building routines.
13833 *
13834 * User data (void *) is stored within the parser context in the
13835 * context's _private member, so it is available nearly everywhere in libxml
13836 *
13837 * Returns the resulting document tree
13838 */
13839
13840xmlDocPtr
13841xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13842 int size, int recovery, void *data) {
13843 xmlDocPtr ret;
13844 xmlParserCtxtPtr ctxt;
13845
Daniel Veillardab2a7632009-07-09 08:45:03 +020013846 xmlInitParser();
13847
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013848 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13849 if (ctxt == NULL) return(NULL);
13850 if (sax != NULL) {
13851 if (ctxt->sax != NULL)
13852 xmlFree(ctxt->sax);
13853 ctxt->sax = sax;
13854 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013855 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013856 if (data!=NULL) {
13857 ctxt->_private=data;
13858 }
13859
Daniel Veillardadba5f12003-04-04 16:09:01 +000013860 ctxt->recovery = recovery;
13861
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013862 xmlParseDocument(ctxt);
13863
13864 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13865 else {
13866 ret = NULL;
13867 xmlFreeDoc(ctxt->myDoc);
13868 ctxt->myDoc = NULL;
13869 }
13870 if (sax != NULL)
13871 ctxt->sax = NULL;
13872 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013873
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013874 return(ret);
13875}
13876
13877/**
Owen Taylor3473f882001-02-23 17:55:21 +000013878 * xmlSAXParseMemory:
13879 * @sax: the SAX handler block
13880 * @buffer: an pointer to a char array
13881 * @size: the size of the array
13882 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13883 * documents
13884 *
13885 * parse an XML in-memory block and use the given SAX function block
13886 * to handle the parsing callback. If sax is NULL, fallback to the default
13887 * DOM tree building routines.
13888 *
13889 * Returns the resulting document tree
13890 */
13891xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013892xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13893 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013894 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013895}
13896
13897/**
13898 * xmlParseMemory:
13899 * @buffer: an pointer to a char array
13900 * @size: the size of the array
13901 *
13902 * parse an XML in-memory block and build a tree.
13903 *
13904 * Returns the resulting document tree
13905 */
13906
Daniel Veillard50822cb2001-07-26 20:05:51 +000013907xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013908 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13909}
13910
13911/**
13912 * xmlRecoverMemory:
13913 * @buffer: an pointer to a char array
13914 * @size: the size of the array
13915 *
13916 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013917 * In the case the document is not Well Formed, an attempt to
13918 * build a tree is tried anyway
13919 *
13920 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013921 */
13922
Daniel Veillard50822cb2001-07-26 20:05:51 +000013923xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013924 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13925}
13926
13927/**
13928 * xmlSAXUserParseMemory:
13929 * @sax: a SAX handler
13930 * @user_data: The user data returned on SAX callbacks
13931 * @buffer: an in-memory XML document input
13932 * @size: the length of the XML document in bytes
13933 *
13934 * A better SAX parsing routine.
13935 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013936 *
Owen Taylor3473f882001-02-23 17:55:21 +000013937 * Returns 0 in case of success or a error number otherwise
13938 */
13939int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013940 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013941 int ret = 0;
13942 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013943
13944 xmlInitParser();
13945
Owen Taylor3473f882001-02-23 17:55:21 +000013946 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13947 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013948 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13949 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013950 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013951 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013952
Daniel Veillard30211a02001-04-26 09:33:18 +000013953 if (user_data != NULL)
13954 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013955
Owen Taylor3473f882001-02-23 17:55:21 +000013956 xmlParseDocument(ctxt);
13957
13958 if (ctxt->wellFormed)
13959 ret = 0;
13960 else {
13961 if (ctxt->errNo != 0)
13962 ret = ctxt->errNo;
13963 else
13964 ret = -1;
13965 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013966 if (sax != NULL)
13967 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013968 if (ctxt->myDoc != NULL) {
13969 xmlFreeDoc(ctxt->myDoc);
13970 ctxt->myDoc = NULL;
13971 }
Owen Taylor3473f882001-02-23 17:55:21 +000013972 xmlFreeParserCtxt(ctxt);
13973
13974 return ret;
13975}
Daniel Veillard81273902003-09-30 00:43:48 +000013976#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013977
13978/**
13979 * xmlCreateDocParserCtxt:
13980 * @cur: a pointer to an array of xmlChar
13981 *
13982 * Creates a parser context for an XML in-memory document.
13983 *
13984 * Returns the new parser context or NULL
13985 */
13986xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013987xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013988 int len;
13989
13990 if (cur == NULL)
13991 return(NULL);
13992 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013993 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013994}
13995
Daniel Veillard81273902003-09-30 00:43:48 +000013996#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013997/**
13998 * xmlSAXParseDoc:
13999 * @sax: the SAX handler block
14000 * @cur: a pointer to an array of xmlChar
14001 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14002 * documents
14003 *
14004 * parse an XML in-memory document and build a tree.
14005 * It use the given SAX function block to handle the parsing callback.
14006 * If sax is NULL, fallback to the default DOM tree building routines.
14007 *
14008 * Returns the resulting document tree
14009 */
14010
14011xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014012xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014013 xmlDocPtr ret;
14014 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014015 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014016
Daniel Veillard38936062004-11-04 17:45:11 +000014017 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014018
14019
14020 ctxt = xmlCreateDocParserCtxt(cur);
14021 if (ctxt == NULL) return(NULL);
14022 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014023 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014024 ctxt->sax = sax;
14025 ctxt->userData = NULL;
14026 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014027 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014028
14029 xmlParseDocument(ctxt);
14030 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14031 else {
14032 ret = NULL;
14033 xmlFreeDoc(ctxt->myDoc);
14034 ctxt->myDoc = NULL;
14035 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014036 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014037 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014038 xmlFreeParserCtxt(ctxt);
14039
14040 return(ret);
14041}
14042
14043/**
14044 * xmlParseDoc:
14045 * @cur: a pointer to an array of xmlChar
14046 *
14047 * parse an XML in-memory document and build a tree.
14048 *
14049 * Returns the resulting document tree
14050 */
14051
14052xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014053xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014054 return(xmlSAXParseDoc(NULL, cur, 0));
14055}
Daniel Veillard81273902003-09-30 00:43:48 +000014056#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014057
Daniel Veillard81273902003-09-30 00:43:48 +000014058#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014059/************************************************************************
14060 * *
14061 * Specific function to keep track of entities references *
14062 * and used by the XSLT debugger *
14063 * *
14064 ************************************************************************/
14065
14066static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14067
14068/**
14069 * xmlAddEntityReference:
14070 * @ent : A valid entity
14071 * @firstNode : A valid first node for children of entity
14072 * @lastNode : A valid last node of children entity
14073 *
14074 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14075 */
14076static void
14077xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14078 xmlNodePtr lastNode)
14079{
14080 if (xmlEntityRefFunc != NULL) {
14081 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14082 }
14083}
14084
14085
14086/**
14087 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014088 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014089 *
14090 * Set the function to call call back when a xml reference has been made
14091 */
14092void
14093xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14094{
14095 xmlEntityRefFunc = func;
14096}
Daniel Veillard81273902003-09-30 00:43:48 +000014097#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014098
14099/************************************************************************
14100 * *
14101 * Miscellaneous *
14102 * *
14103 ************************************************************************/
14104
14105#ifdef LIBXML_XPATH_ENABLED
14106#include <libxml/xpath.h>
14107#endif
14108
Daniel Veillardffa3c742005-07-21 13:24:09 +000014109extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014110static int xmlParserInitialized = 0;
14111
14112/**
14113 * xmlInitParser:
14114 *
14115 * Initialization function for the XML parser.
14116 * This is not reentrant. Call once before processing in case of
14117 * use in multithreaded programs.
14118 */
14119
14120void
14121xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014122 if (xmlParserInitialized != 0)
14123 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014124
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014125#ifdef LIBXML_THREAD_ENABLED
14126 __xmlGlobalInitMutexLock();
14127 if (xmlParserInitialized == 0) {
14128#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014129 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014130 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014131 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14132 (xmlGenericError == NULL))
14133 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014134 xmlInitMemory();
14135 xmlInitCharEncodingHandlers();
14136 xmlDefaultSAXHandlerInit();
14137 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014138#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014139 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014140#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014141#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014142 htmlInitAutoClose();
14143 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014144#endif
14145#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014146 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014147#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014148 xmlParserInitialized = 1;
14149#ifdef LIBXML_THREAD_ENABLED
14150 }
14151 __xmlGlobalInitMutexUnlock();
14152#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014153}
14154
14155/**
14156 * xmlCleanupParser:
14157 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014158 * This function name is somewhat misleading. It does not clean up
14159 * parser state, it cleans up memory allocated by the library itself.
14160 * It is a cleanup function for the XML library. It tries to reclaim all
14161 * related global memory allocated for the library processing.
14162 * It doesn't deallocate any document related memory. One should
14163 * call xmlCleanupParser() only when the process has finished using
14164 * the library and all XML/HTML documents built with it.
14165 * See also xmlInitParser() which has the opposite function of preparing
14166 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014167 *
14168 * WARNING: if your application is multithreaded or has plugin support
14169 * calling this may crash the application if another thread or
14170 * a plugin is still using libxml2. It's sometimes very hard to
14171 * guess if libxml2 is in use in the application, some libraries
14172 * or plugins may use it without notice. In case of doubt abstain
14173 * from calling this function or do it just before calling exit()
14174 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014175 */
14176
14177void
14178xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014179 if (!xmlParserInitialized)
14180 return;
14181
Owen Taylor3473f882001-02-23 17:55:21 +000014182 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014183#ifdef LIBXML_CATALOG_ENABLED
14184 xmlCatalogCleanup();
14185#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014186 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014187 xmlCleanupInputCallbacks();
14188#ifdef LIBXML_OUTPUT_ENABLED
14189 xmlCleanupOutputCallbacks();
14190#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014191#ifdef LIBXML_SCHEMAS_ENABLED
14192 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014193 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014194#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014195 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014196 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014197 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014198 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014199 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014200}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014201
14202/************************************************************************
14203 * *
14204 * New set (2.6.0) of simpler and more flexible APIs *
14205 * *
14206 ************************************************************************/
14207
14208/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014209 * DICT_FREE:
14210 * @str: a string
14211 *
14212 * Free a string if it is not owned by the "dict" dictionnary in the
14213 * current scope
14214 */
14215#define DICT_FREE(str) \
14216 if ((str) && ((!dict) || \
14217 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14218 xmlFree((char *)(str));
14219
14220/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014221 * xmlCtxtReset:
14222 * @ctxt: an XML parser context
14223 *
14224 * Reset a parser context
14225 */
14226void
14227xmlCtxtReset(xmlParserCtxtPtr ctxt)
14228{
14229 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014230 xmlDictPtr dict;
14231
14232 if (ctxt == NULL)
14233 return;
14234
14235 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014236
14237 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14238 xmlFreeInputStream(input);
14239 }
14240 ctxt->inputNr = 0;
14241 ctxt->input = NULL;
14242
14243 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014244 if (ctxt->spaceTab != NULL) {
14245 ctxt->spaceTab[0] = -1;
14246 ctxt->space = &ctxt->spaceTab[0];
14247 } else {
14248 ctxt->space = NULL;
14249 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014250
14251
14252 ctxt->nodeNr = 0;
14253 ctxt->node = NULL;
14254
14255 ctxt->nameNr = 0;
14256 ctxt->name = NULL;
14257
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014258 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014259 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014260 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014261 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014262 DICT_FREE(ctxt->directory);
14263 ctxt->directory = NULL;
14264 DICT_FREE(ctxt->extSubURI);
14265 ctxt->extSubURI = NULL;
14266 DICT_FREE(ctxt->extSubSystem);
14267 ctxt->extSubSystem = NULL;
14268 if (ctxt->myDoc != NULL)
14269 xmlFreeDoc(ctxt->myDoc);
14270 ctxt->myDoc = NULL;
14271
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014272 ctxt->standalone = -1;
14273 ctxt->hasExternalSubset = 0;
14274 ctxt->hasPErefs = 0;
14275 ctxt->html = 0;
14276 ctxt->external = 0;
14277 ctxt->instate = XML_PARSER_START;
14278 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014279
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014280 ctxt->wellFormed = 1;
14281 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014282 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014283 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014284#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014285 ctxt->vctxt.userData = ctxt;
14286 ctxt->vctxt.error = xmlParserValidityError;
14287 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014288#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014289 ctxt->record_info = 0;
14290 ctxt->nbChars = 0;
14291 ctxt->checkIndex = 0;
14292 ctxt->inSubset = 0;
14293 ctxt->errNo = XML_ERR_OK;
14294 ctxt->depth = 0;
14295 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14296 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014297 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014298 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014299 xmlInitNodeInfoSeq(&ctxt->node_seq);
14300
14301 if (ctxt->attsDefault != NULL) {
14302 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14303 ctxt->attsDefault = NULL;
14304 }
14305 if (ctxt->attsSpecial != NULL) {
14306 xmlHashFree(ctxt->attsSpecial, NULL);
14307 ctxt->attsSpecial = NULL;
14308 }
14309
Daniel Veillard4432df22003-09-28 18:58:27 +000014310#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014311 if (ctxt->catalogs != NULL)
14312 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014313#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014314 if (ctxt->lastError.code != XML_ERR_OK)
14315 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014316}
14317
14318/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014319 * xmlCtxtResetPush:
14320 * @ctxt: an XML parser context
14321 * @chunk: a pointer to an array of chars
14322 * @size: number of chars in the array
14323 * @filename: an optional file name or URI
14324 * @encoding: the document encoding, or NULL
14325 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014326 * Reset a push parser context
14327 *
14328 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014329 */
14330int
14331xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14332 int size, const char *filename, const char *encoding)
14333{
14334 xmlParserInputPtr inputStream;
14335 xmlParserInputBufferPtr buf;
14336 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14337
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014338 if (ctxt == NULL)
14339 return(1);
14340
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014341 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14342 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14343
14344 buf = xmlAllocParserInputBuffer(enc);
14345 if (buf == NULL)
14346 return(1);
14347
14348 if (ctxt == NULL) {
14349 xmlFreeParserInputBuffer(buf);
14350 return(1);
14351 }
14352
14353 xmlCtxtReset(ctxt);
14354
14355 if (ctxt->pushTab == NULL) {
14356 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14357 sizeof(xmlChar *));
14358 if (ctxt->pushTab == NULL) {
14359 xmlErrMemory(ctxt, NULL);
14360 xmlFreeParserInputBuffer(buf);
14361 return(1);
14362 }
14363 }
14364
14365 if (filename == NULL) {
14366 ctxt->directory = NULL;
14367 } else {
14368 ctxt->directory = xmlParserGetDirectory(filename);
14369 }
14370
14371 inputStream = xmlNewInputStream(ctxt);
14372 if (inputStream == NULL) {
14373 xmlFreeParserInputBuffer(buf);
14374 return(1);
14375 }
14376
14377 if (filename == NULL)
14378 inputStream->filename = NULL;
14379 else
14380 inputStream->filename = (char *)
14381 xmlCanonicPath((const xmlChar *) filename);
14382 inputStream->buf = buf;
14383 inputStream->base = inputStream->buf->buffer->content;
14384 inputStream->cur = inputStream->buf->buffer->content;
14385 inputStream->end =
14386 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14387
14388 inputPush(ctxt, inputStream);
14389
14390 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14391 (ctxt->input->buf != NULL)) {
14392 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14393 int cur = ctxt->input->cur - ctxt->input->base;
14394
14395 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14396
14397 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14398 ctxt->input->cur = ctxt->input->base + cur;
14399 ctxt->input->end =
14400 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14401 use];
14402#ifdef DEBUG_PUSH
14403 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14404#endif
14405 }
14406
14407 if (encoding != NULL) {
14408 xmlCharEncodingHandlerPtr hdlr;
14409
Daniel Veillard37334572008-07-31 08:20:02 +000014410 if (ctxt->encoding != NULL)
14411 xmlFree((xmlChar *) ctxt->encoding);
14412 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14413
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014414 hdlr = xmlFindCharEncodingHandler(encoding);
14415 if (hdlr != NULL) {
14416 xmlSwitchToEncoding(ctxt, hdlr);
14417 } else {
14418 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14419 "Unsupported encoding %s\n", BAD_CAST encoding);
14420 }
14421 } else if (enc != XML_CHAR_ENCODING_NONE) {
14422 xmlSwitchEncoding(ctxt, enc);
14423 }
14424
14425 return(0);
14426}
14427
Daniel Veillard37334572008-07-31 08:20:02 +000014428
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014429/**
Daniel Veillard37334572008-07-31 08:20:02 +000014430 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014431 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014432 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014433 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014434 *
14435 * Applies the options to the parser context
14436 *
14437 * Returns 0 in case of success, the set of unknown or unimplemented options
14438 * in case of error.
14439 */
Daniel Veillard37334572008-07-31 08:20:02 +000014440static int
14441xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014442{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014443 if (ctxt == NULL)
14444 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014445 if (encoding != NULL) {
14446 if (ctxt->encoding != NULL)
14447 xmlFree((xmlChar *) ctxt->encoding);
14448 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14449 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014450 if (options & XML_PARSE_RECOVER) {
14451 ctxt->recovery = 1;
14452 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014453 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014454 } else
14455 ctxt->recovery = 0;
14456 if (options & XML_PARSE_DTDLOAD) {
14457 ctxt->loadsubset = XML_DETECT_IDS;
14458 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014459 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014460 } else
14461 ctxt->loadsubset = 0;
14462 if (options & XML_PARSE_DTDATTR) {
14463 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14464 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014465 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014466 }
14467 if (options & XML_PARSE_NOENT) {
14468 ctxt->replaceEntities = 1;
14469 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14470 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014471 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014472 } else
14473 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014474 if (options & XML_PARSE_PEDANTIC) {
14475 ctxt->pedantic = 1;
14476 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014477 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014478 } else
14479 ctxt->pedantic = 0;
14480 if (options & XML_PARSE_NOBLANKS) {
14481 ctxt->keepBlanks = 0;
14482 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14483 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014484 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014485 } else
14486 ctxt->keepBlanks = 1;
14487 if (options & XML_PARSE_DTDVALID) {
14488 ctxt->validate = 1;
14489 if (options & XML_PARSE_NOWARNING)
14490 ctxt->vctxt.warning = NULL;
14491 if (options & XML_PARSE_NOERROR)
14492 ctxt->vctxt.error = NULL;
14493 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014494 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014495 } else
14496 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014497 if (options & XML_PARSE_NOWARNING) {
14498 ctxt->sax->warning = NULL;
14499 options -= XML_PARSE_NOWARNING;
14500 }
14501 if (options & XML_PARSE_NOERROR) {
14502 ctxt->sax->error = NULL;
14503 ctxt->sax->fatalError = NULL;
14504 options -= XML_PARSE_NOERROR;
14505 }
Daniel Veillard81273902003-09-30 00:43:48 +000014506#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014507 if (options & XML_PARSE_SAX1) {
14508 ctxt->sax->startElement = xmlSAX2StartElement;
14509 ctxt->sax->endElement = xmlSAX2EndElement;
14510 ctxt->sax->startElementNs = NULL;
14511 ctxt->sax->endElementNs = NULL;
14512 ctxt->sax->initialized = 1;
14513 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014514 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014515 }
Daniel Veillard81273902003-09-30 00:43:48 +000014516#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014517 if (options & XML_PARSE_NODICT) {
14518 ctxt->dictNames = 0;
14519 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014520 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014521 } else {
14522 ctxt->dictNames = 1;
14523 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014524 if (options & XML_PARSE_NOCDATA) {
14525 ctxt->sax->cdataBlock = NULL;
14526 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014527 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014528 }
14529 if (options & XML_PARSE_NSCLEAN) {
14530 ctxt->options |= XML_PARSE_NSCLEAN;
14531 options -= XML_PARSE_NSCLEAN;
14532 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014533 if (options & XML_PARSE_NONET) {
14534 ctxt->options |= XML_PARSE_NONET;
14535 options -= XML_PARSE_NONET;
14536 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014537 if (options & XML_PARSE_COMPACT) {
14538 ctxt->options |= XML_PARSE_COMPACT;
14539 options -= XML_PARSE_COMPACT;
14540 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014541 if (options & XML_PARSE_OLD10) {
14542 ctxt->options |= XML_PARSE_OLD10;
14543 options -= XML_PARSE_OLD10;
14544 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014545 if (options & XML_PARSE_NOBASEFIX) {
14546 ctxt->options |= XML_PARSE_NOBASEFIX;
14547 options -= XML_PARSE_NOBASEFIX;
14548 }
14549 if (options & XML_PARSE_HUGE) {
14550 ctxt->options |= XML_PARSE_HUGE;
14551 options -= XML_PARSE_HUGE;
14552 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014553 if (options & XML_PARSE_OLDSAX) {
14554 ctxt->options |= XML_PARSE_OLDSAX;
14555 options -= XML_PARSE_OLDSAX;
14556 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014557 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014558 return (options);
14559}
14560
14561/**
Daniel Veillard37334572008-07-31 08:20:02 +000014562 * xmlCtxtUseOptions:
14563 * @ctxt: an XML parser context
14564 * @options: a combination of xmlParserOption
14565 *
14566 * Applies the options to the parser context
14567 *
14568 * Returns 0 in case of success, the set of unknown or unimplemented options
14569 * in case of error.
14570 */
14571int
14572xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14573{
14574 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14575}
14576
14577/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014578 * xmlDoRead:
14579 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014580 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014581 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014582 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014583 * @reuse: keep the context for reuse
14584 *
14585 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014586 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014587 * Returns the resulting document tree or NULL
14588 */
14589static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014590xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14591 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014592{
14593 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014594
14595 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014596 if (encoding != NULL) {
14597 xmlCharEncodingHandlerPtr hdlr;
14598
14599 hdlr = xmlFindCharEncodingHandler(encoding);
14600 if (hdlr != NULL)
14601 xmlSwitchToEncoding(ctxt, hdlr);
14602 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014603 if ((URL != NULL) && (ctxt->input != NULL) &&
14604 (ctxt->input->filename == NULL))
14605 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014606 xmlParseDocument(ctxt);
14607 if ((ctxt->wellFormed) || ctxt->recovery)
14608 ret = ctxt->myDoc;
14609 else {
14610 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014611 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014612 xmlFreeDoc(ctxt->myDoc);
14613 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014614 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014615 ctxt->myDoc = NULL;
14616 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014617 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014618 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014619
14620 return (ret);
14621}
14622
14623/**
14624 * xmlReadDoc:
14625 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014626 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014627 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014628 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014629 *
14630 * parse an XML in-memory document and build a tree.
14631 *
14632 * Returns the resulting document tree
14633 */
14634xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014635xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014636{
14637 xmlParserCtxtPtr ctxt;
14638
14639 if (cur == NULL)
14640 return (NULL);
14641
14642 ctxt = xmlCreateDocParserCtxt(cur);
14643 if (ctxt == NULL)
14644 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014645 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014646}
14647
14648/**
14649 * xmlReadFile:
14650 * @filename: a file or URL
14651 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014652 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014653 *
14654 * parse an XML file from the filesystem or the network.
14655 *
14656 * Returns the resulting document tree
14657 */
14658xmlDocPtr
14659xmlReadFile(const char *filename, const char *encoding, int options)
14660{
14661 xmlParserCtxtPtr ctxt;
14662
Daniel Veillard61b93382003-11-03 14:28:31 +000014663 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014664 if (ctxt == NULL)
14665 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014666 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014667}
14668
14669/**
14670 * xmlReadMemory:
14671 * @buffer: a pointer to a char array
14672 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014673 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014674 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014675 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014676 *
14677 * parse an XML in-memory document and build a tree.
14678 *
14679 * Returns the resulting document tree
14680 */
14681xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014682xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014683{
14684 xmlParserCtxtPtr ctxt;
14685
14686 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14687 if (ctxt == NULL)
14688 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014689 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014690}
14691
14692/**
14693 * xmlReadFd:
14694 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014695 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014696 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014697 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014698 *
14699 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014700 * NOTE that the file descriptor will not be closed when the
14701 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014702 *
14703 * Returns the resulting document tree
14704 */
14705xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014706xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014707{
14708 xmlParserCtxtPtr ctxt;
14709 xmlParserInputBufferPtr input;
14710 xmlParserInputPtr stream;
14711
14712 if (fd < 0)
14713 return (NULL);
14714
14715 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14716 if (input == NULL)
14717 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014718 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014719 ctxt = xmlNewParserCtxt();
14720 if (ctxt == NULL) {
14721 xmlFreeParserInputBuffer(input);
14722 return (NULL);
14723 }
14724 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14725 if (stream == NULL) {
14726 xmlFreeParserInputBuffer(input);
14727 xmlFreeParserCtxt(ctxt);
14728 return (NULL);
14729 }
14730 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014731 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014732}
14733
14734/**
14735 * xmlReadIO:
14736 * @ioread: an I/O read function
14737 * @ioclose: an I/O close function
14738 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014739 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014740 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014741 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014742 *
14743 * parse an XML document from I/O functions and source and build a tree.
14744 *
14745 * Returns the resulting document tree
14746 */
14747xmlDocPtr
14748xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014749 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014750{
14751 xmlParserCtxtPtr ctxt;
14752 xmlParserInputBufferPtr input;
14753 xmlParserInputPtr stream;
14754
14755 if (ioread == NULL)
14756 return (NULL);
14757
14758 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14759 XML_CHAR_ENCODING_NONE);
14760 if (input == NULL)
14761 return (NULL);
14762 ctxt = xmlNewParserCtxt();
14763 if (ctxt == NULL) {
14764 xmlFreeParserInputBuffer(input);
14765 return (NULL);
14766 }
14767 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14768 if (stream == NULL) {
14769 xmlFreeParserInputBuffer(input);
14770 xmlFreeParserCtxt(ctxt);
14771 return (NULL);
14772 }
14773 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014774 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014775}
14776
14777/**
14778 * xmlCtxtReadDoc:
14779 * @ctxt: an XML parser context
14780 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014781 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014782 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014783 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014784 *
14785 * parse an XML in-memory document and build a tree.
14786 * This reuses the existing @ctxt parser context
14787 *
14788 * Returns the resulting document tree
14789 */
14790xmlDocPtr
14791xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014792 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014793{
14794 xmlParserInputPtr stream;
14795
14796 if (cur == NULL)
14797 return (NULL);
14798 if (ctxt == NULL)
14799 return (NULL);
14800
14801 xmlCtxtReset(ctxt);
14802
14803 stream = xmlNewStringInputStream(ctxt, cur);
14804 if (stream == NULL) {
14805 return (NULL);
14806 }
14807 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014808 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014809}
14810
14811/**
14812 * xmlCtxtReadFile:
14813 * @ctxt: an XML parser context
14814 * @filename: a file or URL
14815 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014816 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014817 *
14818 * parse an XML file from the filesystem or the network.
14819 * This reuses the existing @ctxt parser context
14820 *
14821 * Returns the resulting document tree
14822 */
14823xmlDocPtr
14824xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14825 const char *encoding, int options)
14826{
14827 xmlParserInputPtr stream;
14828
14829 if (filename == NULL)
14830 return (NULL);
14831 if (ctxt == NULL)
14832 return (NULL);
14833
14834 xmlCtxtReset(ctxt);
14835
Daniel Veillard29614c72004-11-26 10:47:26 +000014836 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014837 if (stream == NULL) {
14838 return (NULL);
14839 }
14840 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014841 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014842}
14843
14844/**
14845 * xmlCtxtReadMemory:
14846 * @ctxt: an XML parser context
14847 * @buffer: a pointer to a char array
14848 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014849 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014850 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014851 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014852 *
14853 * parse an XML in-memory document and build a tree.
14854 * This reuses the existing @ctxt parser context
14855 *
14856 * Returns the resulting document tree
14857 */
14858xmlDocPtr
14859xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014860 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014861{
14862 xmlParserInputBufferPtr input;
14863 xmlParserInputPtr stream;
14864
14865 if (ctxt == NULL)
14866 return (NULL);
14867 if (buffer == NULL)
14868 return (NULL);
14869
14870 xmlCtxtReset(ctxt);
14871
14872 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14873 if (input == NULL) {
14874 return(NULL);
14875 }
14876
14877 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14878 if (stream == NULL) {
14879 xmlFreeParserInputBuffer(input);
14880 return(NULL);
14881 }
14882
14883 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014884 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014885}
14886
14887/**
14888 * xmlCtxtReadFd:
14889 * @ctxt: an XML parser context
14890 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014891 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014892 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014893 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014894 *
14895 * parse an XML from a file descriptor and build a tree.
14896 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014897 * NOTE that the file descriptor will not be closed when the
14898 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014899 *
14900 * Returns the resulting document tree
14901 */
14902xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014903xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14904 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014905{
14906 xmlParserInputBufferPtr input;
14907 xmlParserInputPtr stream;
14908
14909 if (fd < 0)
14910 return (NULL);
14911 if (ctxt == NULL)
14912 return (NULL);
14913
14914 xmlCtxtReset(ctxt);
14915
14916
14917 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14918 if (input == NULL)
14919 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014920 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014921 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14922 if (stream == NULL) {
14923 xmlFreeParserInputBuffer(input);
14924 return (NULL);
14925 }
14926 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014927 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014928}
14929
14930/**
14931 * xmlCtxtReadIO:
14932 * @ctxt: an XML parser context
14933 * @ioread: an I/O read function
14934 * @ioclose: an I/O close function
14935 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014936 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014937 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014938 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014939 *
14940 * parse an XML document from I/O functions and source and build a tree.
14941 * This reuses the existing @ctxt parser context
14942 *
14943 * Returns the resulting document tree
14944 */
14945xmlDocPtr
14946xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14947 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014948 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014949 const char *encoding, int options)
14950{
14951 xmlParserInputBufferPtr input;
14952 xmlParserInputPtr stream;
14953
14954 if (ioread == NULL)
14955 return (NULL);
14956 if (ctxt == NULL)
14957 return (NULL);
14958
14959 xmlCtxtReset(ctxt);
14960
14961 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14962 XML_CHAR_ENCODING_NONE);
14963 if (input == NULL)
14964 return (NULL);
14965 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14966 if (stream == NULL) {
14967 xmlFreeParserInputBuffer(input);
14968 return (NULL);
14969 }
14970 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014971 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014972}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014973
14974#define bottom_parser
14975#include "elfgcchack.h"