blob: 96190109f8dc5bc2794aab809c7752919b82fc4b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020082#ifdef HAVE_LZMA_H
83#include <lzma.h>
84#endif
Owen Taylor3473f882001-02-23 17:55:21 +000085
Daniel Veillard0161e632008-08-28 15:36:32 +000086static void
87xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
88
Rob Richards9c0aa472009-03-26 18:10:19 +000089static xmlParserCtxtPtr
90xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
91 const xmlChar *base, xmlParserCtxtPtr pctx);
92
Daniel Veillard0161e632008-08-28 15:36:32 +000093/************************************************************************
94 * *
95 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
96 * *
97 ************************************************************************/
98
99#define XML_PARSER_BIG_ENTITY 1000
100#define XML_PARSER_LOT_ENTITY 5000
101
102/*
103 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
104 * replacement over the size in byte of the input indicates that you have
105 * and eponential behaviour. A value of 10 correspond to at least 3 entity
106 * replacement per byte of input.
107 */
108#define XML_PARSER_NON_LINEAR 10
109
110/*
111 * xmlParserEntityCheck
112 *
113 * Function to check non-linear entity expansion behaviour
114 * This is here to detect and stop exponential linear entity expansion
115 * This is not a limitation of the parser but a safety
116 * boundary feature. It can be disabled with the XML_PARSE_HUGE
117 * parser option.
118 */
119static int
120xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
121 xmlEntityPtr ent)
122{
Daniel Veillardcba68392008-08-29 12:43:40 +0000123 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000124
125 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
126 return (0);
127 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
128 return (1);
129 if (size != 0) {
130 /*
131 * Do the check based on the replacement size of the entity
132 */
133 if (size < XML_PARSER_BIG_ENTITY)
134 return(0);
135
136 /*
137 * A limit on the amount of text data reasonably used
138 */
139 if (ctxt->input != NULL) {
140 consumed = ctxt->input->consumed +
141 (ctxt->input->cur - ctxt->input->base);
142 }
143 consumed += ctxt->sizeentities;
144
145 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
146 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
147 return (0);
148 } else if (ent != NULL) {
149 /*
150 * use the number of parsed entities in the replacement
151 */
152 size = ent->checked;
153
154 /*
155 * The amount of data parsed counting entities size only once
156 */
157 if (ctxt->input != NULL) {
158 consumed = ctxt->input->consumed +
159 (ctxt->input->cur - ctxt->input->base);
160 }
161 consumed += ctxt->sizeentities;
162
163 /*
164 * Check the density of entities for the amount of data
165 * knowing an entity reference will take at least 3 bytes
166 */
167 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
168 return (0);
169 } else {
170 /*
171 * strange we got no data for checking just return
172 */
173 return (0);
174 }
175
176 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
177 return (1);
178}
179
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000180/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000181 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000182 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000183 * arbitrary depth limit for the XML documents that we allow to
184 * process. This is not a limitation of the parser but a safety
185 * boundary feature. It can be disabled with the XML_PARSE_HUGE
186 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000187 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000188unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000189
Daniel Veillard0fb18932003-09-07 09:14:37 +0000190
Daniel Veillard0161e632008-08-28 15:36:32 +0000191
192#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000193#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000194#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000195#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
196
Owen Taylor3473f882001-02-23 17:55:21 +0000197/*
Owen Taylor3473f882001-02-23 17:55:21 +0000198 * List of XML prefixed PI allowed by W3C specs
199 */
200
Daniel Veillardb44025c2001-10-11 22:55:55 +0000201static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000202 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800203 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000204 NULL
205};
206
Daniel Veillarda07050d2003-10-19 14:46:32 +0000207
Owen Taylor3473f882001-02-23 17:55:21 +0000208/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200209static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
210 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000211
Daniel Veillard7d515752003-09-26 19:12:37 +0000212static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000213xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
214 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000215 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000216 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000217
Daniel Veillard37334572008-07-31 08:20:02 +0000218static int
219xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
220 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000222static void
223xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
224 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000225#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000226
Daniel Veillard7d515752003-09-26 19:12:37 +0000227static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000228xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
229 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000231static int
232xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
233
Daniel Veillarde57ec792003-09-10 10:50:59 +0000234/************************************************************************
235 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000236 * Some factorized error routines *
237 * *
238 ************************************************************************/
239
240/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 * xmlErrAttributeDup:
242 * @ctxt: an XML parser context
243 * @prefix: the attribute prefix
244 * @localname: the attribute localname
245 *
246 * Handle a redefinition of attribute error
247 */
248static void
249xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
250 const xmlChar * localname)
251{
Daniel Veillard157fee02003-10-31 10:36:03 +0000252 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253 (ctxt->instate == XML_PARSER_EOF))
254 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000255 if (ctxt != NULL)
256 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200257
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000258 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200260 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000261 (const char *) localname, NULL, NULL, 0, 0,
262 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000263 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000264 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200265 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 (const char *) prefix, (const char *) localname,
267 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
268 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000269 if (ctxt != NULL) {
270 ctxt->wellFormed = 0;
271 if (ctxt->recovery == 0)
272 ctxt->disableSAX = 1;
273 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274}
275
276/**
277 * xmlFatalErr:
278 * @ctxt: an XML parser context
279 * @error: the error number
280 * @extra: extra information string
281 *
282 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
283 */
284static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000285xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286{
287 const char *errmsg;
288
Daniel Veillard157fee02003-10-31 10:36:03 +0000289 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
290 (ctxt->instate == XML_PARSER_EOF))
291 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 switch (error) {
293 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "CharRef: invalid hexadecimal value\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "CharRef: invalid decimal value\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "CharRef: invalid value\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "internal error";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "PEReference at end of document\n";
307 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000309 errmsg = "PEReference in prolog\n";
310 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000311 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000312 errmsg = "PEReference in epilog\n";
313 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000314 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000315 errmsg = "PEReference: no name\n";
316 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000318 errmsg = "PEReference: expecting ';'\n";
319 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000321 errmsg = "Detected an entity reference loop\n";
322 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000323 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000324 errmsg = "EntityValue: \" or ' expected\n";
325 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000326 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 errmsg = "PEReferences forbidden in internal subset\n";
328 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000329 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 errmsg = "EntityValue: \" or ' expected\n";
331 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000332 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 errmsg = "AttValue: \" or ' expected\n";
334 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000335 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000336 errmsg = "Unescaped '<' not allowed in attributes values\n";
337 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000338 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 errmsg = "SystemLiteral \" or ' expected\n";
340 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000341 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 errmsg = "Unfinished System or Public ID \" or ' expected\n";
343 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000344 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 errmsg = "Sequence ']]>' not allowed in content\n";
346 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000347 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
349 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000350 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 errmsg = "PUBLIC, the Public Identifier is missing\n";
352 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000353 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 errmsg = "Comment must not contain '--' (double-hyphen)\n";
355 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 errmsg = "xmlParsePI : no target name\n";
358 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000359 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 errmsg = "Invalid PI name\n";
361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 errmsg = "NOTATION: Name expected here\n";
364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 errmsg = "'>' required to close NOTATION declaration\n";
367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 errmsg = "Entity value required\n";
370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 errmsg = "Fragment not allowed";
373 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000374 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 errmsg = "'(' required to start ATTLIST enumeration\n";
376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 errmsg = "NmToken expected in ATTLIST enumeration\n";
379 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 errmsg = "')' required to finish ATTLIST enumeration\n";
382 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
385 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000386 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
388 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000389 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 errmsg = "ContentDecl : Name or '(' expected\n";
391 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000392 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 errmsg =
397 "PEReference: forbidden within markup decl in internal subset\n";
398 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000399 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000400 errmsg = "expected '>'\n";
401 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000402 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000403 errmsg = "XML conditional section '[' expected\n";
404 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000405 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000406 errmsg = "Content error in the external subset\n";
407 break;
408 case XML_ERR_CONDSEC_INVALID_KEYWORD:
409 errmsg =
410 "conditional section INCLUDE or IGNORE keyword expected\n";
411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000413 errmsg = "XML conditional section not closed\n";
414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000416 errmsg = "Text declaration '<?xml' required\n";
417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000419 errmsg = "parsing XML declaration: '?>' expected\n";
420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000422 errmsg = "external parsed entities cannot be standalone\n";
423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000425 errmsg = "EntityRef: expecting ';'\n";
426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg = "DOCTYPE improperly terminated\n";
429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000431 errmsg = "EndTag: '</' not found\n";
432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000434 errmsg = "expected '='\n";
435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000437 errmsg = "String not closed expecting \" or '\n";
438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000440 errmsg = "String not started expecting ' or \"\n";
441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 errmsg = "Invalid XML encoding name\n";
444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 errmsg = "standalone accepts only 'yes' or 'no'\n";
447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 errmsg = "Document is empty\n";
450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 errmsg = "Extra content at the end of the document\n";
453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 errmsg = "chunk is not well balanced\n";
456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 errmsg = "extra content at the end of well balanced chunk\n";
459 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000460 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 errmsg = "Malformed declaration expecting version\n";
462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 case:
465 errmsg = "\n";
466 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000467#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000468 default:
469 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000470 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000471 if (ctxt != NULL)
472 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000473 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000474 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
475 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000476 if (ctxt != NULL) {
477 ctxt->wellFormed = 0;
478 if (ctxt->recovery == 0)
479 ctxt->disableSAX = 1;
480 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000481}
482
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000483/**
484 * xmlFatalErrMsg:
485 * @ctxt: an XML parser context
486 * @error: the error number
487 * @msg: the error message
488 *
489 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
490 */
491static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000498 if (ctxt != NULL)
499 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000500 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200501 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000502 if (ctxt != NULL) {
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000507}
508
509/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000510 * xmlWarningMsg:
511 * @ctxt: an XML parser context
512 * @error: the error number
513 * @msg: the error message
514 * @str1: extra data
515 * @str2: extra data
516 *
517 * Handle a warning.
518 */
519static void
520xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
521 const char *msg, const xmlChar *str1, const xmlChar *str2)
522{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000523 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000524
Daniel Veillard157fee02003-10-31 10:36:03 +0000525 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
526 (ctxt->instate == XML_PARSER_EOF))
527 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000528 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
529 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000530 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200531 if (ctxt != NULL) {
532 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000533 (ctxt->sax) ? ctxt->sax->warning : NULL,
534 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000535 ctxt, NULL, XML_FROM_PARSER, error,
536 XML_ERR_WARNING, NULL, 0,
537 (const char *) str1, (const char *) str2, NULL, 0, 0,
538 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200539 } else {
540 __xmlRaiseError(schannel, NULL, NULL,
541 ctxt, NULL, XML_FROM_PARSER, error,
542 XML_ERR_WARNING, NULL, 0,
543 (const char *) str1, (const char *) str2, NULL, 0, 0,
544 msg, (const char *) str1, (const char *) str2);
545 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000546}
547
548/**
549 * xmlValidityError:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 * @str1: extra data
554 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000555 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000556 */
557static void
558xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000559 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000560{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000561 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000562
563 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
564 (ctxt->instate == XML_PARSER_EOF))
565 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000566 if (ctxt != NULL) {
567 ctxt->errNo = error;
568 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
569 schannel = ctxt->sax->serror;
570 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200571 if (ctxt != NULL) {
572 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000573 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000574 ctxt, NULL, XML_FROM_DTD, error,
575 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000576 (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000578 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200579 } else {
580 __xmlRaiseError(schannel, NULL, NULL,
581 ctxt, NULL, XML_FROM_DTD, error,
582 XML_ERR_ERROR, NULL, 0, (const char *) str1,
583 (const char *) str2, NULL, 0, 0,
584 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000585 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000586}
587
588/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000589 * xmlFatalErrMsgInt:
590 * @ctxt: an XML parser context
591 * @error: the error number
592 * @msg: the error message
593 * @val: an integer value
594 *
595 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
596 */
597static void
598xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000599 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000600{
Daniel Veillard157fee02003-10-31 10:36:03 +0000601 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
602 (ctxt->instate == XML_PARSER_EOF))
603 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000604 if (ctxt != NULL)
605 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000606 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000607 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
608 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000609 if (ctxt != NULL) {
610 ctxt->wellFormed = 0;
611 if (ctxt->recovery == 0)
612 ctxt->disableSAX = 1;
613 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000614}
615
616/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000617 * xmlFatalErrMsgStrIntStr:
618 * @ctxt: an XML parser context
619 * @error: the error number
620 * @msg: the error message
621 * @str1: an string info
622 * @val: an integer value
623 * @str2: an string info
624 *
625 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
626 */
627static void
628xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
629 const char *msg, const xmlChar *str1, int val,
630 const xmlChar *str2)
631{
Daniel Veillard157fee02003-10-31 10:36:03 +0000632 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
633 (ctxt->instate == XML_PARSER_EOF))
634 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000635 if (ctxt != NULL)
636 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000637 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000638 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
639 NULL, 0, (const char *) str1, (const char *) str2,
640 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000641 if (ctxt != NULL) {
642 ctxt->wellFormed = 0;
643 if (ctxt->recovery == 0)
644 ctxt->disableSAX = 1;
645 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000646}
647
648/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000649 * xmlFatalErrMsgStr:
650 * @ctxt: an XML parser context
651 * @error: the error number
652 * @msg: the error message
653 * @val: a string value
654 *
655 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
656 */
657static void
658xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000659 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000660{
Daniel Veillard157fee02003-10-31 10:36:03 +0000661 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
662 (ctxt->instate == XML_PARSER_EOF))
663 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000664 if (ctxt != NULL)
665 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000666 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000667 XML_FROM_PARSER, error, XML_ERR_FATAL,
668 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
669 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000670 if (ctxt != NULL) {
671 ctxt->wellFormed = 0;
672 if (ctxt->recovery == 0)
673 ctxt->disableSAX = 1;
674 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000675}
676
677/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000678 * xmlErrMsgStr:
679 * @ctxt: an XML parser context
680 * @error: the error number
681 * @msg: the error message
682 * @val: a string value
683 *
684 * Handle a non fatal parser error
685 */
686static void
687xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
688 const char *msg, const xmlChar * val)
689{
Daniel Veillard157fee02003-10-31 10:36:03 +0000690 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
691 (ctxt->instate == XML_PARSER_EOF))
692 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000693 if (ctxt != NULL)
694 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000695 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000696 XML_FROM_PARSER, error, XML_ERR_ERROR,
697 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
698 val);
699}
700
701/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000702 * xmlNsErr:
703 * @ctxt: an XML parser context
704 * @error: the error number
705 * @msg: the message
706 * @info1: extra information string
707 * @info2: extra information string
708 *
709 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
710 */
711static void
712xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
713 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000714 const xmlChar * info1, const xmlChar * info2,
715 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000716{
Daniel Veillard157fee02003-10-31 10:36:03 +0000717 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
718 (ctxt->instate == XML_PARSER_EOF))
719 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000720 if (ctxt != NULL)
721 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000722 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000723 XML_ERR_ERROR, NULL, 0, (const char *) info1,
724 (const char *) info2, (const char *) info3, 0, 0, msg,
725 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000726 if (ctxt != NULL)
727 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000728}
729
Daniel Veillard37334572008-07-31 08:20:02 +0000730/**
731 * xmlNsWarn
732 * @ctxt: an XML parser context
733 * @error: the error number
734 * @msg: the message
735 * @info1: extra information string
736 * @info2: extra information string
737 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800738 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000739 */
740static void
741xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
742 const char *msg,
743 const xmlChar * info1, const xmlChar * info2,
744 const xmlChar * info3)
745{
746 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
747 (ctxt->instate == XML_PARSER_EOF))
748 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000749 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
750 XML_ERR_WARNING, NULL, 0, (const char *) info1,
751 (const char *) info2, (const char *) info3, 0, 0, msg,
752 info1, info2, info3);
753}
754
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000755/************************************************************************
756 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757 * Library wide options *
758 * *
759 ************************************************************************/
760
761/**
762 * xmlHasFeature:
763 * @feature: the feature to be examined
764 *
765 * Examines if the library has been compiled with a given feature.
766 *
767 * Returns a non-zero value if the feature exist, otherwise zero.
768 * Returns zero (0) if the feature does not exist or an unknown
769 * unknown feature is requested, non-zero otherwise.
770 */
771int
772xmlHasFeature(xmlFeature feature)
773{
774 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000775 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000776#ifdef LIBXML_THREAD_ENABLED
777 return(1);
778#else
779 return(0);
780#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000781 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000782#ifdef LIBXML_TREE_ENABLED
783 return(1);
784#else
785 return(0);
786#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000787 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000788#ifdef LIBXML_OUTPUT_ENABLED
789 return(1);
790#else
791 return(0);
792#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000793 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000794#ifdef LIBXML_PUSH_ENABLED
795 return(1);
796#else
797 return(0);
798#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000799 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000800#ifdef LIBXML_READER_ENABLED
801 return(1);
802#else
803 return(0);
804#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000805 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000806#ifdef LIBXML_PATTERN_ENABLED
807 return(1);
808#else
809 return(0);
810#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000811 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000812#ifdef LIBXML_WRITER_ENABLED
813 return(1);
814#else
815 return(0);
816#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000817 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000818#ifdef LIBXML_SAX1_ENABLED
819 return(1);
820#else
821 return(0);
822#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000823 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000824#ifdef LIBXML_FTP_ENABLED
825 return(1);
826#else
827 return(0);
828#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000829 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000830#ifdef LIBXML_HTTP_ENABLED
831 return(1);
832#else
833 return(0);
834#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000835 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000836#ifdef LIBXML_VALID_ENABLED
837 return(1);
838#else
839 return(0);
840#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000841 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000842#ifdef LIBXML_HTML_ENABLED
843 return(1);
844#else
845 return(0);
846#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000847 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000848#ifdef LIBXML_LEGACY_ENABLED
849 return(1);
850#else
851 return(0);
852#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000853 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000854#ifdef LIBXML_C14N_ENABLED
855 return(1);
856#else
857 return(0);
858#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000859 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000860#ifdef LIBXML_CATALOG_ENABLED
861 return(1);
862#else
863 return(0);
864#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000865 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000866#ifdef LIBXML_XPATH_ENABLED
867 return(1);
868#else
869 return(0);
870#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000871 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000872#ifdef LIBXML_XPTR_ENABLED
873 return(1);
874#else
875 return(0);
876#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000877 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000878#ifdef LIBXML_XINCLUDE_ENABLED
879 return(1);
880#else
881 return(0);
882#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000883 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000884#ifdef LIBXML_ICONV_ENABLED
885 return(1);
886#else
887 return(0);
888#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000889 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000890#ifdef LIBXML_ISO8859X_ENABLED
891 return(1);
892#else
893 return(0);
894#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000895 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000896#ifdef LIBXML_UNICODE_ENABLED
897 return(1);
898#else
899 return(0);
900#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000901 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000902#ifdef LIBXML_REGEXP_ENABLED
903 return(1);
904#else
905 return(0);
906#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000907 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000908#ifdef LIBXML_AUTOMATA_ENABLED
909 return(1);
910#else
911 return(0);
912#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000913 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000914#ifdef LIBXML_EXPR_ENABLED
915 return(1);
916#else
917 return(0);
918#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000919 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000920#ifdef LIBXML_SCHEMAS_ENABLED
921 return(1);
922#else
923 return(0);
924#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000925 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000926#ifdef LIBXML_SCHEMATRON_ENABLED
927 return(1);
928#else
929 return(0);
930#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000931 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000932#ifdef LIBXML_MODULES_ENABLED
933 return(1);
934#else
935 return(0);
936#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000937 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000938#ifdef LIBXML_DEBUG_ENABLED
939 return(1);
940#else
941 return(0);
942#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000943 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000944#ifdef DEBUG_MEMORY_LOCATION
945 return(1);
946#else
947 return(0);
948#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000949 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000950#ifdef LIBXML_DEBUG_RUNTIME
951 return(1);
952#else
953 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000954#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000955 case XML_WITH_ZLIB:
956#ifdef LIBXML_ZLIB_ENABLED
957 return(1);
958#else
959 return(0);
960#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +0200961 case XML_WITH_LZMA:
962#ifdef LIBXML_LZMA_ENABLED
963 return(1);
964#else
965 return(0);
966#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100967 case XML_WITH_ICU:
968#ifdef LIBXML_ICU_ENABLED
969 return(1);
970#else
971 return(0);
972#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000973 default:
974 break;
975 }
976 return(0);
977}
978
979/************************************************************************
980 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000981 * SAX2 defaulted attributes handling *
982 * *
983 ************************************************************************/
984
985/**
986 * xmlDetectSAX2:
987 * @ctxt: an XML parser context
988 *
989 * Do the SAX2 detection and specific intialization
990 */
991static void
992xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
993 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000994#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000995 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
996 ((ctxt->sax->startElementNs != NULL) ||
997 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000998#else
999 ctxt->sax2 = 1;
1000#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001001
1002 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1003 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1004 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +00001005 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1006 (ctxt->str_xml_ns == NULL)) {
1007 xmlErrMemory(ctxt, NULL);
1008 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001009}
1010
Daniel Veillarde57ec792003-09-10 10:50:59 +00001011typedef struct _xmlDefAttrs xmlDefAttrs;
1012typedef xmlDefAttrs *xmlDefAttrsPtr;
1013struct _xmlDefAttrs {
1014 int nbAttrs; /* number of defaulted attributes on that element */
1015 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001016 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001017};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001018
1019/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001020 * xmlAttrNormalizeSpace:
1021 * @src: the source string
1022 * @dst: the target string
1023 *
1024 * Normalize the space in non CDATA attribute values:
1025 * If the attribute type is not CDATA, then the XML processor MUST further
1026 * process the normalized attribute value by discarding any leading and
1027 * trailing space (#x20) characters, and by replacing sequences of space
1028 * (#x20) characters by a single space (#x20) character.
1029 * Note that the size of dst need to be at least src, and if one doesn't need
1030 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1031 * passing src as dst is just fine.
1032 *
1033 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1034 * is needed.
1035 */
1036static xmlChar *
1037xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1038{
1039 if ((src == NULL) || (dst == NULL))
1040 return(NULL);
1041
1042 while (*src == 0x20) src++;
1043 while (*src != 0) {
1044 if (*src == 0x20) {
1045 while (*src == 0x20) src++;
1046 if (*src != 0)
1047 *dst++ = 0x20;
1048 } else {
1049 *dst++ = *src++;
1050 }
1051 }
1052 *dst = 0;
1053 if (dst == src)
1054 return(NULL);
1055 return(dst);
1056}
1057
1058/**
1059 * xmlAttrNormalizeSpace2:
1060 * @src: the source string
1061 *
1062 * Normalize the space in non CDATA attribute values, a slightly more complex
1063 * front end to avoid allocation problems when running on attribute values
1064 * coming from the input.
1065 *
1066 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1067 * is needed.
1068 */
1069static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001070xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001071{
1072 int i;
1073 int remove_head = 0;
1074 int need_realloc = 0;
1075 const xmlChar *cur;
1076
1077 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1078 return(NULL);
1079 i = *len;
1080 if (i <= 0)
1081 return(NULL);
1082
1083 cur = src;
1084 while (*cur == 0x20) {
1085 cur++;
1086 remove_head++;
1087 }
1088 while (*cur != 0) {
1089 if (*cur == 0x20) {
1090 cur++;
1091 if ((*cur == 0x20) || (*cur == 0)) {
1092 need_realloc = 1;
1093 break;
1094 }
1095 } else
1096 cur++;
1097 }
1098 if (need_realloc) {
1099 xmlChar *ret;
1100
1101 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1102 if (ret == NULL) {
1103 xmlErrMemory(ctxt, NULL);
1104 return(NULL);
1105 }
1106 xmlAttrNormalizeSpace(ret, ret);
1107 *len = (int) strlen((const char *)ret);
1108 return(ret);
1109 } else if (remove_head) {
1110 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001111 memmove(src, src + remove_head, 1 + *len);
1112 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001113 }
1114 return(NULL);
1115}
1116
1117/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001118 * xmlAddDefAttrs:
1119 * @ctxt: an XML parser context
1120 * @fullname: the element fullname
1121 * @fullattr: the attribute fullname
1122 * @value: the attribute value
1123 *
1124 * Add a defaulted attribute for an element
1125 */
1126static void
1127xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1128 const xmlChar *fullname,
1129 const xmlChar *fullattr,
1130 const xmlChar *value) {
1131 xmlDefAttrsPtr defaults;
1132 int len;
1133 const xmlChar *name;
1134 const xmlChar *prefix;
1135
Daniel Veillard6a31b832008-03-26 14:06:44 +00001136 /*
1137 * Allows to detect attribute redefinitions
1138 */
1139 if (ctxt->attsSpecial != NULL) {
1140 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1141 return;
1142 }
1143
Daniel Veillarde57ec792003-09-10 10:50:59 +00001144 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001145 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001146 if (ctxt->attsDefault == NULL)
1147 goto mem_error;
1148 }
1149
1150 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001151 * split the element name into prefix:localname , the string found
1152 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 */
1154 name = xmlSplitQName3(fullname, &len);
1155 if (name == NULL) {
1156 name = xmlDictLookup(ctxt->dict, fullname, -1);
1157 prefix = NULL;
1158 } else {
1159 name = xmlDictLookup(ctxt->dict, name, -1);
1160 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1161 }
1162
1163 /*
1164 * make sure there is some storage
1165 */
1166 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1167 if (defaults == NULL) {
1168 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001169 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001170 if (defaults == NULL)
1171 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001172 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001173 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001174 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1175 defaults, NULL) < 0) {
1176 xmlFree(defaults);
1177 goto mem_error;
1178 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001179 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001180 xmlDefAttrsPtr temp;
1181
1182 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001183 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001184 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001186 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001187 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001188 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1189 defaults, NULL) < 0) {
1190 xmlFree(defaults);
1191 goto mem_error;
1192 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001193 }
1194
1195 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001196 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001197 * are within the DTD and hen not associated to namespace names.
1198 */
1199 name = xmlSplitQName3(fullattr, &len);
1200 if (name == NULL) {
1201 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1202 prefix = NULL;
1203 } else {
1204 name = xmlDictLookup(ctxt->dict, name, -1);
1205 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1206 }
1207
Daniel Veillardae0765b2008-07-31 19:54:59 +00001208 defaults->values[5 * defaults->nbAttrs] = name;
1209 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001210 /* intern the string and precompute the end */
1211 len = xmlStrlen(value);
1212 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001213 defaults->values[5 * defaults->nbAttrs + 2] = value;
1214 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1215 if (ctxt->external)
1216 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1217 else
1218 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001219 defaults->nbAttrs++;
1220
1221 return;
1222
1223mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001224 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001225 return;
1226}
1227
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001228/**
1229 * xmlAddSpecialAttr:
1230 * @ctxt: an XML parser context
1231 * @fullname: the element fullname
1232 * @fullattr: the attribute fullname
1233 * @type: the attribute type
1234 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001235 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001236 */
1237static void
1238xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1239 const xmlChar *fullname,
1240 const xmlChar *fullattr,
1241 int type)
1242{
1243 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001244 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001245 if (ctxt->attsSpecial == NULL)
1246 goto mem_error;
1247 }
1248
Daniel Veillardac4118d2008-01-11 05:27:32 +00001249 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1250 return;
1251
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001252 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1253 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001254 return;
1255
1256mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001257 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001258 return;
1259}
1260
Daniel Veillard4432df22003-09-28 18:58:27 +00001261/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001262 * xmlCleanSpecialAttrCallback:
1263 *
1264 * Removes CDATA attributes from the special attribute table
1265 */
1266static void
1267xmlCleanSpecialAttrCallback(void *payload, void *data,
1268 const xmlChar *fullname, const xmlChar *fullattr,
1269 const xmlChar *unused ATTRIBUTE_UNUSED) {
1270 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1271
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001272 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001273 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1274 }
1275}
1276
1277/**
1278 * xmlCleanSpecialAttr:
1279 * @ctxt: an XML parser context
1280 *
1281 * Trim the list of attributes defined to remove all those of type
1282 * CDATA as they are not special. This call should be done when finishing
1283 * to parse the DTD and before starting to parse the document root.
1284 */
1285static void
1286xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1287{
1288 if (ctxt->attsSpecial == NULL)
1289 return;
1290
1291 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1292
1293 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1294 xmlHashFree(ctxt->attsSpecial, NULL);
1295 ctxt->attsSpecial = NULL;
1296 }
1297 return;
1298}
1299
1300/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001301 * xmlCheckLanguageID:
1302 * @lang: pointer to the string value
1303 *
1304 * Checks that the value conforms to the LanguageID production:
1305 *
1306 * NOTE: this is somewhat deprecated, those productions were removed from
1307 * the XML Second edition.
1308 *
1309 * [33] LanguageID ::= Langcode ('-' Subcode)*
1310 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1311 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1312 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1313 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1314 * [38] Subcode ::= ([a-z] | [A-Z])+
1315 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001316 * The current REC reference the sucessors of RFC 1766, currently 5646
1317 *
1318 * http://www.rfc-editor.org/rfc/rfc5646.txt
1319 * langtag = language
1320 * ["-" script]
1321 * ["-" region]
1322 * *("-" variant)
1323 * *("-" extension)
1324 * ["-" privateuse]
1325 * language = 2*3ALPHA ; shortest ISO 639 code
1326 * ["-" extlang] ; sometimes followed by
1327 * ; extended language subtags
1328 * / 4ALPHA ; or reserved for future use
1329 * / 5*8ALPHA ; or registered language subtag
1330 *
1331 * extlang = 3ALPHA ; selected ISO 639 codes
1332 * *2("-" 3ALPHA) ; permanently reserved
1333 *
1334 * script = 4ALPHA ; ISO 15924 code
1335 *
1336 * region = 2ALPHA ; ISO 3166-1 code
1337 * / 3DIGIT ; UN M.49 code
1338 *
1339 * variant = 5*8alphanum ; registered variants
1340 * / (DIGIT 3alphanum)
1341 *
1342 * extension = singleton 1*("-" (2*8alphanum))
1343 *
1344 * ; Single alphanumerics
1345 * ; "x" reserved for private use
1346 * singleton = DIGIT ; 0 - 9
1347 * / %x41-57 ; A - W
1348 * / %x59-5A ; Y - Z
1349 * / %x61-77 ; a - w
1350 * / %x79-7A ; y - z
1351 *
1352 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1353 * The parser below doesn't try to cope with extension or privateuse
1354 * that could be added but that's not interoperable anyway
1355 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001356 * Returns 1 if correct 0 otherwise
1357 **/
1358int
1359xmlCheckLanguageID(const xmlChar * lang)
1360{
Daniel Veillard60587d62010-11-04 15:16:27 +01001361 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001362
1363 if (cur == NULL)
1364 return (0);
1365 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001366 ((cur[0] == 'I') && (cur[1] == '-')) ||
1367 ((cur[0] == 'x') && (cur[1] == '-')) ||
1368 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001369 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001370 * Still allow IANA code and user code which were coming
1371 * from the previous version of the XML-1.0 specification
1372 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001373 */
1374 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001375 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001376 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1377 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001378 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001379 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001380 nxt = cur;
1381 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1382 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1383 nxt++;
1384 if (nxt - cur >= 4) {
1385 /*
1386 * Reserved
1387 */
1388 if ((nxt - cur > 8) || (nxt[0] != 0))
1389 return(0);
1390 return(1);
1391 }
1392 if (nxt - cur < 2)
1393 return(0);
1394 /* we got an ISO 639 code */
1395 if (nxt[0] == 0)
1396 return(1);
1397 if (nxt[0] != '-')
1398 return(0);
1399
1400 nxt++;
1401 cur = nxt;
1402 /* now we can have extlang or script or region or variant */
1403 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1404 goto region_m49;
1405
1406 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1407 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1408 nxt++;
1409 if (nxt - cur == 4)
1410 goto script;
1411 if (nxt - cur == 2)
1412 goto region;
1413 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1414 goto variant;
1415 if (nxt - cur != 3)
1416 return(0);
1417 /* we parsed an extlang */
1418 if (nxt[0] == 0)
1419 return(1);
1420 if (nxt[0] != '-')
1421 return(0);
1422
1423 nxt++;
1424 cur = nxt;
1425 /* now we can have script or region or variant */
1426 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1427 goto region_m49;
1428
1429 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1430 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1431 nxt++;
1432 if (nxt - cur == 2)
1433 goto region;
1434 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1435 goto variant;
1436 if (nxt - cur != 4)
1437 return(0);
1438 /* we parsed a script */
1439script:
1440 if (nxt[0] == 0)
1441 return(1);
1442 if (nxt[0] != '-')
1443 return(0);
1444
1445 nxt++;
1446 cur = nxt;
1447 /* now we can have region or variant */
1448 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1449 goto region_m49;
1450
1451 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1452 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1453 nxt++;
1454
1455 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1456 goto variant;
1457 if (nxt - cur != 2)
1458 return(0);
1459 /* we parsed a region */
1460region:
1461 if (nxt[0] == 0)
1462 return(1);
1463 if (nxt[0] != '-')
1464 return(0);
1465
1466 nxt++;
1467 cur = nxt;
1468 /* now we can just have a variant */
1469 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1470 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1471 nxt++;
1472
1473 if ((nxt - cur < 5) || (nxt - cur > 8))
1474 return(0);
1475
1476 /* we parsed a variant */
1477variant:
1478 if (nxt[0] == 0)
1479 return(1);
1480 if (nxt[0] != '-')
1481 return(0);
1482 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001483 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001484
1485region_m49:
1486 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1487 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1488 nxt += 3;
1489 goto region;
1490 }
1491 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001492}
1493
Owen Taylor3473f882001-02-23 17:55:21 +00001494/************************************************************************
1495 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001496 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001497 * *
1498 ************************************************************************/
1499
Daniel Veillard8ed10722009-08-20 19:17:36 +02001500static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1501 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001502
Daniel Veillard0fb18932003-09-07 09:14:37 +00001503#ifdef SAX2
1504/**
1505 * nsPush:
1506 * @ctxt: an XML parser context
1507 * @prefix: the namespace prefix or NULL
1508 * @URL: the namespace name
1509 *
1510 * Pushes a new parser namespace on top of the ns stack
1511 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001512 * Returns -1 in case of error, -2 if the namespace should be discarded
1513 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001514 */
1515static int
1516nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1517{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001518 if (ctxt->options & XML_PARSE_NSCLEAN) {
1519 int i;
1520 for (i = 0;i < ctxt->nsNr;i += 2) {
1521 if (ctxt->nsTab[i] == prefix) {
1522 /* in scope */
1523 if (ctxt->nsTab[i + 1] == URL)
1524 return(-2);
1525 /* out of scope keep it */
1526 break;
1527 }
1528 }
1529 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001530 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1531 ctxt->nsMax = 10;
1532 ctxt->nsNr = 0;
1533 ctxt->nsTab = (const xmlChar **)
1534 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1535 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001536 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001537 ctxt->nsMax = 0;
1538 return (-1);
1539 }
1540 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001541 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001542 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001543 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1544 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1545 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001546 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001547 ctxt->nsMax /= 2;
1548 return (-1);
1549 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001550 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001551 }
1552 ctxt->nsTab[ctxt->nsNr++] = prefix;
1553 ctxt->nsTab[ctxt->nsNr++] = URL;
1554 return (ctxt->nsNr);
1555}
1556/**
1557 * nsPop:
1558 * @ctxt: an XML parser context
1559 * @nr: the number to pop
1560 *
1561 * Pops the top @nr parser prefix/namespace from the ns stack
1562 *
1563 * Returns the number of namespaces removed
1564 */
1565static int
1566nsPop(xmlParserCtxtPtr ctxt, int nr)
1567{
1568 int i;
1569
1570 if (ctxt->nsTab == NULL) return(0);
1571 if (ctxt->nsNr < nr) {
1572 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1573 nr = ctxt->nsNr;
1574 }
1575 if (ctxt->nsNr <= 0)
1576 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001577
Daniel Veillard0fb18932003-09-07 09:14:37 +00001578 for (i = 0;i < nr;i++) {
1579 ctxt->nsNr--;
1580 ctxt->nsTab[ctxt->nsNr] = NULL;
1581 }
1582 return(nr);
1583}
1584#endif
1585
1586static int
1587xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1588 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001589 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001590 int maxatts;
1591
1592 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001593 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001594 atts = (const xmlChar **)
1595 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001596 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001597 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001598 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1599 if (attallocs == NULL) goto mem_error;
1600 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001601 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001602 } else if (nr + 5 > ctxt->maxatts) {
1603 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001604 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1605 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001606 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001607 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001608 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1609 (maxatts / 5) * sizeof(int));
1610 if (attallocs == NULL) goto mem_error;
1611 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001612 ctxt->maxatts = maxatts;
1613 }
1614 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001615mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001616 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001617 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001618}
1619
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001620/**
1621 * inputPush:
1622 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001623 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001624 *
1625 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001626 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001627 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001628 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001629int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001630inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1631{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001632 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001633 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001634 if (ctxt->inputNr >= ctxt->inputMax) {
1635 ctxt->inputMax *= 2;
1636 ctxt->inputTab =
1637 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1638 ctxt->inputMax *
1639 sizeof(ctxt->inputTab[0]));
1640 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001641 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001642 xmlFreeInputStream(value);
1643 ctxt->inputMax /= 2;
1644 value = NULL;
1645 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001646 }
1647 }
1648 ctxt->inputTab[ctxt->inputNr] = value;
1649 ctxt->input = value;
1650 return (ctxt->inputNr++);
1651}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001652/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001653 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001654 * @ctxt: an XML parser context
1655 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001656 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001657 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001658 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001659 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001660xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001661inputPop(xmlParserCtxtPtr ctxt)
1662{
1663 xmlParserInputPtr ret;
1664
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001665 if (ctxt == NULL)
1666 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001667 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001668 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669 ctxt->inputNr--;
1670 if (ctxt->inputNr > 0)
1671 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1672 else
1673 ctxt->input = NULL;
1674 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001675 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001676 return (ret);
1677}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001678/**
1679 * nodePush:
1680 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001681 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001682 *
1683 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001684 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001685 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001686 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001687int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001688nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1689{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001690 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001691 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001692 xmlNodePtr *tmp;
1693
1694 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1695 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001696 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001697 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001698 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001699 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001701 ctxt->nodeTab = tmp;
1702 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001703 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001704 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1705 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001706 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001707 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001708 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001709 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001710 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001711 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001712 ctxt->nodeTab[ctxt->nodeNr] = value;
1713 ctxt->node = value;
1714 return (ctxt->nodeNr++);
1715}
Daniel Veillard8915c152008-08-26 13:05:34 +00001716
Daniel Veillard1c732d22002-11-30 11:22:59 +00001717/**
1718 * nodePop:
1719 * @ctxt: an XML parser context
1720 *
1721 * Pops the top element node from the node stack
1722 *
1723 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001724 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001725xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001726nodePop(xmlParserCtxtPtr ctxt)
1727{
1728 xmlNodePtr ret;
1729
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001730 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001731 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001732 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001733 ctxt->nodeNr--;
1734 if (ctxt->nodeNr > 0)
1735 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1736 else
1737 ctxt->node = NULL;
1738 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001739 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001740 return (ret);
1741}
Daniel Veillarda2351322004-06-27 12:08:10 +00001742
1743#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001744/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001745 * nameNsPush:
1746 * @ctxt: an XML parser context
1747 * @value: the element name
1748 * @prefix: the element prefix
1749 * @URI: the element namespace name
1750 *
1751 * Pushes a new element name/prefix/URL on top of the name stack
1752 *
1753 * Returns -1 in case of error, the index in the stack otherwise
1754 */
1755static int
1756nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1757 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1758{
1759 if (ctxt->nameNr >= ctxt->nameMax) {
1760 const xmlChar * *tmp;
1761 void **tmp2;
1762 ctxt->nameMax *= 2;
1763 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1764 ctxt->nameMax *
1765 sizeof(ctxt->nameTab[0]));
1766 if (tmp == NULL) {
1767 ctxt->nameMax /= 2;
1768 goto mem_error;
1769 }
1770 ctxt->nameTab = tmp;
1771 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1772 ctxt->nameMax * 3 *
1773 sizeof(ctxt->pushTab[0]));
1774 if (tmp2 == NULL) {
1775 ctxt->nameMax /= 2;
1776 goto mem_error;
1777 }
1778 ctxt->pushTab = tmp2;
1779 }
1780 ctxt->nameTab[ctxt->nameNr] = value;
1781 ctxt->name = value;
1782 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1783 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001784 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001785 return (ctxt->nameNr++);
1786mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001787 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001788 return (-1);
1789}
1790/**
1791 * nameNsPop:
1792 * @ctxt: an XML parser context
1793 *
1794 * Pops the top element/prefix/URI name from the name stack
1795 *
1796 * Returns the name just removed
1797 */
1798static const xmlChar *
1799nameNsPop(xmlParserCtxtPtr ctxt)
1800{
1801 const xmlChar *ret;
1802
1803 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001804 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001805 ctxt->nameNr--;
1806 if (ctxt->nameNr > 0)
1807 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1808 else
1809 ctxt->name = NULL;
1810 ret = ctxt->nameTab[ctxt->nameNr];
1811 ctxt->nameTab[ctxt->nameNr] = NULL;
1812 return (ret);
1813}
Daniel Veillarda2351322004-06-27 12:08:10 +00001814#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001815
1816/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001817 * namePush:
1818 * @ctxt: an XML parser context
1819 * @value: the element name
1820 *
1821 * Pushes a new element name on top of the name stack
1822 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001823 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001824 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001825int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001826namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001827{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001828 if (ctxt == NULL) return (-1);
1829
Daniel Veillard1c732d22002-11-30 11:22:59 +00001830 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001831 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001832 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001833 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001834 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001835 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001836 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001837 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001838 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001839 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001840 }
1841 ctxt->nameTab[ctxt->nameNr] = value;
1842 ctxt->name = value;
1843 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001844mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001845 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001846 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001847}
1848/**
1849 * namePop:
1850 * @ctxt: an XML parser context
1851 *
1852 * Pops the top element name from the name stack
1853 *
1854 * Returns the name just removed
1855 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001856const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001857namePop(xmlParserCtxtPtr ctxt)
1858{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001859 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001860
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001861 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1862 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 ctxt->nameNr--;
1864 if (ctxt->nameNr > 0)
1865 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1866 else
1867 ctxt->name = NULL;
1868 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001869 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001870 return (ret);
1871}
Owen Taylor3473f882001-02-23 17:55:21 +00001872
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001873static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001874 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001875 int *tmp;
1876
Owen Taylor3473f882001-02-23 17:55:21 +00001877 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001878 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1879 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1880 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001881 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001882 ctxt->spaceMax /=2;
1883 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001884 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001885 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001886 }
1887 ctxt->spaceTab[ctxt->spaceNr] = val;
1888 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1889 return(ctxt->spaceNr++);
1890}
1891
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001892static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001893 int ret;
1894 if (ctxt->spaceNr <= 0) return(0);
1895 ctxt->spaceNr--;
1896 if (ctxt->spaceNr > 0)
1897 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1898 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001899 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001900 ret = ctxt->spaceTab[ctxt->spaceNr];
1901 ctxt->spaceTab[ctxt->spaceNr] = -1;
1902 return(ret);
1903}
1904
1905/*
1906 * Macros for accessing the content. Those should be used only by the parser,
1907 * and not exported.
1908 *
1909 * Dirty macros, i.e. one often need to make assumption on the context to
1910 * use them
1911 *
1912 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1913 * To be used with extreme caution since operations consuming
1914 * characters may move the input buffer to a different location !
1915 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1916 * This should be used internally by the parser
1917 * only to compare to ASCII values otherwise it would break when
1918 * running with UTF-8 encoding.
1919 * RAW same as CUR but in the input buffer, bypass any token
1920 * extraction that may have been done
1921 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1922 * to compare on ASCII based substring.
1923 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001924 * strings without newlines within the parser.
1925 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1926 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001927 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1928 *
1929 * NEXT Skip to the next character, this does the proper decoding
1930 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001931 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001932 * CUR_CHAR(l) returns the current unicode character (int), set l
1933 * to the number of xmlChars used for the encoding [0-5].
1934 * CUR_SCHAR same but operate on a string instead of the context
1935 * COPY_BUF copy the current unicode char to the target buffer, increment
1936 * the index
1937 * GROW, SHRINK handling of input buffers
1938 */
1939
Daniel Veillardfdc91562002-07-01 21:52:03 +00001940#define RAW (*ctxt->input->cur)
1941#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001942#define NXT(val) ctxt->input->cur[(val)]
1943#define CUR_PTR ctxt->input->cur
1944
Daniel Veillarda07050d2003-10-19 14:46:32 +00001945#define CMP4( s, c1, c2, c3, c4 ) \
1946 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1947 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1948#define CMP5( s, c1, c2, c3, c4, c5 ) \
1949 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1950#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1951 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1952#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1953 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1954#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1955 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1956#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1957 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1958 ((unsigned char *) s)[ 8 ] == c9 )
1959#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1960 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1961 ((unsigned char *) s)[ 9 ] == c10 )
1962
Owen Taylor3473f882001-02-23 17:55:21 +00001963#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001964 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001965 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001966 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1968 xmlPopInput(ctxt); \
1969 } while (0)
1970
Daniel Veillard0b787f32004-03-26 17:29:53 +00001971#define SKIPL(val) do { \
1972 int skipl; \
1973 for(skipl=0; skipl<val; skipl++) { \
1974 if (*(ctxt->input->cur) == '\n') { \
1975 ctxt->input->line++; ctxt->input->col = 1; \
1976 } else ctxt->input->col++; \
1977 ctxt->nbChars++; \
1978 ctxt->input->cur++; \
1979 } \
1980 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1981 if ((*ctxt->input->cur == 0) && \
1982 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1983 xmlPopInput(ctxt); \
1984 } while (0)
1985
Daniel Veillarda880b122003-04-21 21:36:41 +00001986#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001987 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1988 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001989 xmlSHRINK (ctxt);
1990
1991static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1992 xmlParserInputShrink(ctxt->input);
1993 if ((*ctxt->input->cur == 0) &&
1994 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1995 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001996 }
Owen Taylor3473f882001-02-23 17:55:21 +00001997
Daniel Veillarda880b122003-04-21 21:36:41 +00001998#define GROW if ((ctxt->progressive == 0) && \
1999 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002000 xmlGROW (ctxt);
2001
2002static void xmlGROW (xmlParserCtxtPtr ctxt) {
2003 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002004 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002005 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2006 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002007}
Owen Taylor3473f882001-02-23 17:55:21 +00002008
2009#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2010
2011#define NEXT xmlNextChar(ctxt)
2012
Daniel Veillard21a0f912001-02-25 19:54:14 +00002013#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002014 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002015 ctxt->input->cur++; \
2016 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002017 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002018 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2019 }
2020
Owen Taylor3473f882001-02-23 17:55:21 +00002021#define NEXTL(l) do { \
2022 if (*(ctxt->input->cur) == '\n') { \
2023 ctxt->input->line++; ctxt->input->col = 1; \
2024 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002025 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002026 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002027 } while (0)
2028
2029#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2030#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2031
2032#define COPY_BUF(l,b,i,v) \
2033 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002034 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002035
2036/**
2037 * xmlSkipBlankChars:
2038 * @ctxt: the XML parser context
2039 *
2040 * skip all blanks character found at that point in the input streams.
2041 * It pops up finished entities in the process if allowable at that point.
2042 *
2043 * Returns the number of space chars skipped
2044 */
2045
2046int
2047xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002048 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002049
2050 /*
2051 * It's Okay to use CUR/NEXT here since all the blanks are on
2052 * the ASCII range.
2053 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002054 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2055 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002056 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002057 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002058 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002059 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002060 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002061 if (*cur == '\n') {
2062 ctxt->input->line++; ctxt->input->col = 1;
2063 }
2064 cur++;
2065 res++;
2066 if (*cur == 0) {
2067 ctxt->input->cur = cur;
2068 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2069 cur = ctxt->input->cur;
2070 }
2071 }
2072 ctxt->input->cur = cur;
2073 } else {
2074 int cur;
2075 do {
2076 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002077 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002078 NEXT;
2079 cur = CUR;
2080 res++;
2081 }
2082 while ((cur == 0) && (ctxt->inputNr > 1) &&
2083 (ctxt->instate != XML_PARSER_COMMENT)) {
2084 xmlPopInput(ctxt);
2085 cur = CUR;
2086 }
2087 /*
2088 * Need to handle support of entities branching here
2089 */
2090 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2091 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2092 }
Owen Taylor3473f882001-02-23 17:55:21 +00002093 return(res);
2094}
2095
2096/************************************************************************
2097 * *
2098 * Commodity functions to handle entities *
2099 * *
2100 ************************************************************************/
2101
2102/**
2103 * xmlPopInput:
2104 * @ctxt: an XML parser context
2105 *
2106 * xmlPopInput: the current input pointed by ctxt->input came to an end
2107 * pop it and return the next char.
2108 *
2109 * Returns the current xmlChar in the parser context
2110 */
2111xmlChar
2112xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002113 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002114 if (xmlParserDebugEntities)
2115 xmlGenericError(xmlGenericErrorContext,
2116 "Popping input %d\n", ctxt->inputNr);
2117 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002118 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002119 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2120 return(xmlPopInput(ctxt));
2121 return(CUR);
2122}
2123
2124/**
2125 * xmlPushInput:
2126 * @ctxt: an XML parser context
2127 * @input: an XML parser input fragment (entity, XML fragment ...).
2128 *
2129 * xmlPushInput: switch to a new input stream which is stacked on top
2130 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002131 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002132 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002133int
Owen Taylor3473f882001-02-23 17:55:21 +00002134xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002135 int ret;
2136 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002137
2138 if (xmlParserDebugEntities) {
2139 if ((ctxt->input != NULL) && (ctxt->input->filename))
2140 xmlGenericError(xmlGenericErrorContext,
2141 "%s(%d): ", ctxt->input->filename,
2142 ctxt->input->line);
2143 xmlGenericError(xmlGenericErrorContext,
2144 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2145 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002146 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002147 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002148 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002149}
2150
2151/**
2152 * xmlParseCharRef:
2153 * @ctxt: an XML parser context
2154 *
2155 * parse Reference declarations
2156 *
2157 * [66] CharRef ::= '&#' [0-9]+ ';' |
2158 * '&#x' [0-9a-fA-F]+ ';'
2159 *
2160 * [ WFC: Legal Character ]
2161 * Characters referred to using character references must match the
2162 * production for Char.
2163 *
2164 * Returns the value parsed (as an int), 0 in case of error
2165 */
2166int
2167xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002168 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002169 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002170 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002171
Owen Taylor3473f882001-02-23 17:55:21 +00002172 /*
2173 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2174 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002175 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002176 (NXT(2) == 'x')) {
2177 SKIP(3);
2178 GROW;
2179 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002180 if (count++ > 20) {
2181 count = 0;
2182 GROW;
2183 }
2184 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002185 val = val * 16 + (CUR - '0');
2186 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2187 val = val * 16 + (CUR - 'a') + 10;
2188 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2189 val = val * 16 + (CUR - 'A') + 10;
2190 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002191 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002192 val = 0;
2193 break;
2194 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002195 if (val > 0x10FFFF)
2196 outofrange = val;
2197
Owen Taylor3473f882001-02-23 17:55:21 +00002198 NEXT;
2199 count++;
2200 }
2201 if (RAW == ';') {
2202 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002203 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002204 ctxt->nbChars ++;
2205 ctxt->input->cur++;
2206 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002207 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002208 SKIP(2);
2209 GROW;
2210 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002211 if (count++ > 20) {
2212 count = 0;
2213 GROW;
2214 }
2215 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002216 val = val * 10 + (CUR - '0');
2217 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002218 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002219 val = 0;
2220 break;
2221 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002222 if (val > 0x10FFFF)
2223 outofrange = val;
2224
Owen Taylor3473f882001-02-23 17:55:21 +00002225 NEXT;
2226 count++;
2227 }
2228 if (RAW == ';') {
2229 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002230 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002231 ctxt->nbChars ++;
2232 ctxt->input->cur++;
2233 }
2234 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002235 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002236 }
2237
2238 /*
2239 * [ WFC: Legal Character ]
2240 * Characters referred to using character references must match the
2241 * production for Char.
2242 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002243 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002244 return(val);
2245 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002246 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2247 "xmlParseCharRef: invalid xmlChar value %d\n",
2248 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002249 }
2250 return(0);
2251}
2252
2253/**
2254 * xmlParseStringCharRef:
2255 * @ctxt: an XML parser context
2256 * @str: a pointer to an index in the string
2257 *
2258 * parse Reference declarations, variant parsing from a string rather
2259 * than an an input flow.
2260 *
2261 * [66] CharRef ::= '&#' [0-9]+ ';' |
2262 * '&#x' [0-9a-fA-F]+ ';'
2263 *
2264 * [ WFC: Legal Character ]
2265 * Characters referred to using character references must match the
2266 * production for Char.
2267 *
2268 * Returns the value parsed (as an int), 0 in case of error, str will be
2269 * updated to the current value of the index
2270 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002271static int
Owen Taylor3473f882001-02-23 17:55:21 +00002272xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2273 const xmlChar *ptr;
2274 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002275 unsigned int val = 0;
2276 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002277
2278 if ((str == NULL) || (*str == NULL)) return(0);
2279 ptr = *str;
2280 cur = *ptr;
2281 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2282 ptr += 3;
2283 cur = *ptr;
2284 while (cur != ';') { /* Non input consuming loop */
2285 if ((cur >= '0') && (cur <= '9'))
2286 val = val * 16 + (cur - '0');
2287 else if ((cur >= 'a') && (cur <= 'f'))
2288 val = val * 16 + (cur - 'a') + 10;
2289 else if ((cur >= 'A') && (cur <= 'F'))
2290 val = val * 16 + (cur - 'A') + 10;
2291 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002292 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002293 val = 0;
2294 break;
2295 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002296 if (val > 0x10FFFF)
2297 outofrange = val;
2298
Owen Taylor3473f882001-02-23 17:55:21 +00002299 ptr++;
2300 cur = *ptr;
2301 }
2302 if (cur == ';')
2303 ptr++;
2304 } else if ((cur == '&') && (ptr[1] == '#')){
2305 ptr += 2;
2306 cur = *ptr;
2307 while (cur != ';') { /* Non input consuming loops */
2308 if ((cur >= '0') && (cur <= '9'))
2309 val = val * 10 + (cur - '0');
2310 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002311 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002312 val = 0;
2313 break;
2314 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002315 if (val > 0x10FFFF)
2316 outofrange = val;
2317
Owen Taylor3473f882001-02-23 17:55:21 +00002318 ptr++;
2319 cur = *ptr;
2320 }
2321 if (cur == ';')
2322 ptr++;
2323 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002324 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002325 return(0);
2326 }
2327 *str = ptr;
2328
2329 /*
2330 * [ WFC: Legal Character ]
2331 * Characters referred to using character references must match the
2332 * production for Char.
2333 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002334 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002335 return(val);
2336 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002337 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2338 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2339 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002340 }
2341 return(0);
2342}
2343
2344/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002345 * xmlNewBlanksWrapperInputStream:
2346 * @ctxt: an XML parser context
2347 * @entity: an Entity pointer
2348 *
2349 * Create a new input stream for wrapping
2350 * blanks around a PEReference
2351 *
2352 * Returns the new input stream or NULL
2353 */
2354
2355static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2356
Daniel Veillardf4862f02002-09-10 11:13:43 +00002357static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002358xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2359 xmlParserInputPtr input;
2360 xmlChar *buffer;
2361 size_t length;
2362 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002363 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2364 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002365 return(NULL);
2366 }
2367 if (xmlParserDebugEntities)
2368 xmlGenericError(xmlGenericErrorContext,
2369 "new blanks wrapper for entity: %s\n", entity->name);
2370 input = xmlNewInputStream(ctxt);
2371 if (input == NULL) {
2372 return(NULL);
2373 }
2374 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002375 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002376 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002377 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002378 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002379 return(NULL);
2380 }
2381 buffer [0] = ' ';
2382 buffer [1] = '%';
2383 buffer [length-3] = ';';
2384 buffer [length-2] = ' ';
2385 buffer [length-1] = 0;
2386 memcpy(buffer + 2, entity->name, length - 5);
2387 input->free = deallocblankswrapper;
2388 input->base = buffer;
2389 input->cur = buffer;
2390 input->length = length;
2391 input->end = &buffer[length];
2392 return(input);
2393}
2394
2395/**
Owen Taylor3473f882001-02-23 17:55:21 +00002396 * xmlParserHandlePEReference:
2397 * @ctxt: the parser context
2398 *
2399 * [69] PEReference ::= '%' Name ';'
2400 *
2401 * [ WFC: No Recursion ]
2402 * A parsed entity must not contain a recursive
2403 * reference to itself, either directly or indirectly.
2404 *
2405 * [ WFC: Entity Declared ]
2406 * In a document without any DTD, a document with only an internal DTD
2407 * subset which contains no parameter entity references, or a document
2408 * with "standalone='yes'", ... ... The declaration of a parameter
2409 * entity must precede any reference to it...
2410 *
2411 * [ VC: Entity Declared ]
2412 * In a document with an external subset or external parameter entities
2413 * with "standalone='no'", ... ... The declaration of a parameter entity
2414 * must precede any reference to it...
2415 *
2416 * [ WFC: In DTD ]
2417 * Parameter-entity references may only appear in the DTD.
2418 * NOTE: misleading but this is handled.
2419 *
2420 * A PEReference may have been detected in the current input stream
2421 * the handling is done accordingly to
2422 * http://www.w3.org/TR/REC-xml#entproc
2423 * i.e.
2424 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002425 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002426 */
2427void
2428xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002429 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002430 xmlEntityPtr entity = NULL;
2431 xmlParserInputPtr input;
2432
Owen Taylor3473f882001-02-23 17:55:21 +00002433 if (RAW != '%') return;
2434 switch(ctxt->instate) {
2435 case XML_PARSER_CDATA_SECTION:
2436 return;
2437 case XML_PARSER_COMMENT:
2438 return;
2439 case XML_PARSER_START_TAG:
2440 return;
2441 case XML_PARSER_END_TAG:
2442 return;
2443 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002444 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002445 return;
2446 case XML_PARSER_PROLOG:
2447 case XML_PARSER_START:
2448 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002449 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002450 return;
2451 case XML_PARSER_ENTITY_DECL:
2452 case XML_PARSER_CONTENT:
2453 case XML_PARSER_ATTRIBUTE_VALUE:
2454 case XML_PARSER_PI:
2455 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002456 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002457 /* we just ignore it there */
2458 return;
2459 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002460 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002461 return;
2462 case XML_PARSER_ENTITY_VALUE:
2463 /*
2464 * NOTE: in the case of entity values, we don't do the
2465 * substitution here since we need the literal
2466 * entity value to be able to save the internal
2467 * subset of the document.
2468 * This will be handled by xmlStringDecodeEntities
2469 */
2470 return;
2471 case XML_PARSER_DTD:
2472 /*
2473 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2474 * In the internal DTD subset, parameter-entity references
2475 * can occur only where markup declarations can occur, not
2476 * within markup declarations.
2477 * In that case this is handled in xmlParseMarkupDecl
2478 */
2479 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2480 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002481 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002482 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002483 break;
2484 case XML_PARSER_IGNORE:
2485 return;
2486 }
2487
2488 NEXT;
2489 name = xmlParseName(ctxt);
2490 if (xmlParserDebugEntities)
2491 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002492 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002493 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002494 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002495 } else {
2496 if (RAW == ';') {
2497 NEXT;
2498 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2499 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2500 if (entity == NULL) {
2501
2502 /*
2503 * [ WFC: Entity Declared ]
2504 * In a document without any DTD, a document with only an
2505 * internal DTD subset which contains no parameter entity
2506 * references, or a document with "standalone='yes'", ...
2507 * ... The declaration of a parameter entity must precede
2508 * any reference to it...
2509 */
2510 if ((ctxt->standalone == 1) ||
2511 ((ctxt->hasExternalSubset == 0) &&
2512 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002513 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002514 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002515 } else {
2516 /*
2517 * [ VC: Entity Declared ]
2518 * In a document with an external subset or external
2519 * parameter entities with "standalone='no'", ...
2520 * ... The declaration of a parameter entity must precede
2521 * any reference to it...
2522 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002523 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2524 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2525 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002526 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002527 } else
2528 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2529 "PEReference: %%%s; not found\n",
2530 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002531 ctxt->valid = 0;
2532 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002533 } else if (ctxt->input->free != deallocblankswrapper) {
2534 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002535 if (xmlPushInput(ctxt, input) < 0)
2536 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002537 } else {
2538 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2539 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002540 xmlChar start[4];
2541 xmlCharEncoding enc;
2542
Owen Taylor3473f882001-02-23 17:55:21 +00002543 /*
2544 * handle the extra spaces added before and after
2545 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002546 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002547 */
2548 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002549 if (xmlPushInput(ctxt, input) < 0)
2550 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002551
2552 /*
2553 * Get the 4 first bytes and decode the charset
2554 * if enc != XML_CHAR_ENCODING_NONE
2555 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002556 * Note that, since we may have some non-UTF8
2557 * encoding (like UTF16, bug 135229), the 'length'
2558 * is not known, but we can calculate based upon
2559 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002560 */
2561 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002562 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002563 start[0] = RAW;
2564 start[1] = NXT(1);
2565 start[2] = NXT(2);
2566 start[3] = NXT(3);
2567 enc = xmlDetectCharEncoding(start, 4);
2568 if (enc != XML_CHAR_ENCODING_NONE) {
2569 xmlSwitchEncoding(ctxt, enc);
2570 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002571 }
2572
Owen Taylor3473f882001-02-23 17:55:21 +00002573 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002574 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2575 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002576 xmlParseTextDecl(ctxt);
2577 }
Owen Taylor3473f882001-02-23 17:55:21 +00002578 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002579 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2580 "PEReference: %s is not a parameter entity\n",
2581 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002582 }
2583 }
2584 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002585 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002586 }
Owen Taylor3473f882001-02-23 17:55:21 +00002587 }
2588}
2589
2590/*
2591 * Macro used to grow the current buffer.
2592 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002593#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002594 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002595 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002596 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002597 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002598 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002599 if (tmp == NULL) goto mem_error; \
2600 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002601}
2602
2603/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002604 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002605 * @ctxt: the parser context
2606 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002607 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002608 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2609 * @end: an end marker xmlChar, 0 if none
2610 * @end2: an end marker xmlChar, 0 if none
2611 * @end3: an end marker xmlChar, 0 if none
2612 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002613 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002614 *
2615 * [67] Reference ::= EntityRef | CharRef
2616 *
2617 * [69] PEReference ::= '%' Name ';'
2618 *
2619 * Returns A newly allocated string with the substitution done. The caller
2620 * must deallocate it !
2621 */
2622xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002623xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2624 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002625 xmlChar *buffer = NULL;
2626 int buffer_size = 0;
2627
2628 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002629 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002630 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002631 xmlEntityPtr ent;
2632 int c,l;
2633 int nbchars = 0;
2634
Daniel Veillarda82b1822004-11-08 16:24:57 +00002635 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002636 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002637 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002638
Daniel Veillard0161e632008-08-28 15:36:32 +00002639 if (((ctxt->depth > 40) &&
2640 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2641 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002642 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002643 return(NULL);
2644 }
2645
2646 /*
2647 * allocate a translation buffer.
2648 */
2649 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002650 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002651 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002652
2653 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002654 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002655 * we are operating on already parsed values.
2656 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002657 if (str < last)
2658 c = CUR_SCHAR(str, l);
2659 else
2660 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002661 while ((c != 0) && (c != end) && /* non input consuming loop */
2662 (c != end2) && (c != end3)) {
2663
2664 if (c == 0) break;
2665 if ((c == '&') && (str[1] == '#')) {
2666 int val = xmlParseStringCharRef(ctxt, &str);
2667 if (val != 0) {
2668 COPY_BUF(0,buffer,nbchars,val);
2669 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002670 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002671 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002672 }
Owen Taylor3473f882001-02-23 17:55:21 +00002673 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2674 if (xmlParserDebugEntities)
2675 xmlGenericError(xmlGenericErrorContext,
2676 "String decoding Entity Reference: %.30s\n",
2677 str);
2678 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002679 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2680 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002681 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002682 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002683 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002684 if ((ent != NULL) &&
2685 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2686 if (ent->content != NULL) {
2687 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002688 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002689 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002690 }
Owen Taylor3473f882001-02-23 17:55:21 +00002691 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002692 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2693 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002694 }
2695 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002696 ctxt->depth++;
2697 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2698 0, 0, 0);
2699 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002700
Owen Taylor3473f882001-02-23 17:55:21 +00002701 if (rep != NULL) {
2702 current = rep;
2703 while (*current != 0) { /* non input consuming loop */
2704 buffer[nbchars++] = *current++;
2705 if (nbchars >
2706 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002707 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2708 goto int_error;
2709 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002710 }
2711 }
2712 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002713 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002714 }
2715 } else if (ent != NULL) {
2716 int i = xmlStrlen(ent->name);
2717 const xmlChar *cur = ent->name;
2718
2719 buffer[nbchars++] = '&';
2720 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002721 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002722 }
2723 for (;i > 0;i--)
2724 buffer[nbchars++] = *cur++;
2725 buffer[nbchars++] = ';';
2726 }
2727 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2728 if (xmlParserDebugEntities)
2729 xmlGenericError(xmlGenericErrorContext,
2730 "String decoding PE Reference: %.30s\n", str);
2731 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002732 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2733 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002734 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002735 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002736 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002737 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002738 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002739 }
Owen Taylor3473f882001-02-23 17:55:21 +00002740 ctxt->depth++;
2741 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2742 0, 0, 0);
2743 ctxt->depth--;
2744 if (rep != NULL) {
2745 current = rep;
2746 while (*current != 0) { /* non input consuming loop */
2747 buffer[nbchars++] = *current++;
2748 if (nbchars >
2749 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002750 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2751 goto int_error;
2752 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002753 }
2754 }
2755 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002756 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002757 }
2758 }
2759 } else {
2760 COPY_BUF(l,buffer,nbchars,c);
2761 str += l;
2762 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002763 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002764 }
2765 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002766 if (str < last)
2767 c = CUR_SCHAR(str, l);
2768 else
2769 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002770 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002771 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002772 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002773
2774mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002775 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002776int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002777 if (rep != NULL)
2778 xmlFree(rep);
2779 if (buffer != NULL)
2780 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002781 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002782}
2783
Daniel Veillarde57ec792003-09-10 10:50:59 +00002784/**
2785 * xmlStringDecodeEntities:
2786 * @ctxt: the parser context
2787 * @str: the input string
2788 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2789 * @end: an end marker xmlChar, 0 if none
2790 * @end2: an end marker xmlChar, 0 if none
2791 * @end3: an end marker xmlChar, 0 if none
2792 *
2793 * Takes a entity string content and process to do the adequate substitutions.
2794 *
2795 * [67] Reference ::= EntityRef | CharRef
2796 *
2797 * [69] PEReference ::= '%' Name ';'
2798 *
2799 * Returns A newly allocated string with the substitution done. The caller
2800 * must deallocate it !
2801 */
2802xmlChar *
2803xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2804 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002805 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002806 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2807 end, end2, end3));
2808}
Owen Taylor3473f882001-02-23 17:55:21 +00002809
2810/************************************************************************
2811 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002812 * Commodity functions, cleanup needed ? *
2813 * *
2814 ************************************************************************/
2815
2816/**
2817 * areBlanks:
2818 * @ctxt: an XML parser context
2819 * @str: a xmlChar *
2820 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002821 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002822 *
2823 * Is this a sequence of blank chars that one can ignore ?
2824 *
2825 * Returns 1 if ignorable 0 otherwise.
2826 */
2827
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002828static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2829 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002830 int i, ret;
2831 xmlNodePtr lastChild;
2832
Daniel Veillard05c13a22001-09-09 08:38:09 +00002833 /*
2834 * Don't spend time trying to differentiate them, the same callback is
2835 * used !
2836 */
2837 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002838 return(0);
2839
Owen Taylor3473f882001-02-23 17:55:21 +00002840 /*
2841 * Check for xml:space value.
2842 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002843 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2844 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002845 return(0);
2846
2847 /*
2848 * Check that the string is made of blanks
2849 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002850 if (blank_chars == 0) {
2851 for (i = 0;i < len;i++)
2852 if (!(IS_BLANK_CH(str[i]))) return(0);
2853 }
Owen Taylor3473f882001-02-23 17:55:21 +00002854
2855 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002856 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002857 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002858 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002859 if (ctxt->myDoc != NULL) {
2860 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2861 if (ret == 0) return(1);
2862 if (ret == 1) return(0);
2863 }
2864
2865 /*
2866 * Otherwise, heuristic :-\
2867 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002868 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002869 if ((ctxt->node->children == NULL) &&
2870 (RAW == '<') && (NXT(1) == '/')) return(0);
2871
2872 lastChild = xmlGetLastChild(ctxt->node);
2873 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002874 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2875 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002876 } else if (xmlNodeIsText(lastChild))
2877 return(0);
2878 else if ((ctxt->node->children != NULL) &&
2879 (xmlNodeIsText(ctxt->node->children)))
2880 return(0);
2881 return(1);
2882}
2883
Owen Taylor3473f882001-02-23 17:55:21 +00002884/************************************************************************
2885 * *
2886 * Extra stuff for namespace support *
2887 * Relates to http://www.w3.org/TR/WD-xml-names *
2888 * *
2889 ************************************************************************/
2890
2891/**
2892 * xmlSplitQName:
2893 * @ctxt: an XML parser context
2894 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002895 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002896 *
2897 * parse an UTF8 encoded XML qualified name string
2898 *
2899 * [NS 5] QName ::= (Prefix ':')? LocalPart
2900 *
2901 * [NS 6] Prefix ::= NCName
2902 *
2903 * [NS 7] LocalPart ::= NCName
2904 *
2905 * Returns the local part, and prefix is updated
2906 * to get the Prefix if any.
2907 */
2908
2909xmlChar *
2910xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2911 xmlChar buf[XML_MAX_NAMELEN + 5];
2912 xmlChar *buffer = NULL;
2913 int len = 0;
2914 int max = XML_MAX_NAMELEN;
2915 xmlChar *ret = NULL;
2916 const xmlChar *cur = name;
2917 int c;
2918
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002919 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002920 *prefix = NULL;
2921
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002922 if (cur == NULL) return(NULL);
2923
Owen Taylor3473f882001-02-23 17:55:21 +00002924#ifndef XML_XML_NAMESPACE
2925 /* xml: prefix is not really a namespace */
2926 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2927 (cur[2] == 'l') && (cur[3] == ':'))
2928 return(xmlStrdup(name));
2929#endif
2930
Daniel Veillard597bc482003-07-24 16:08:28 +00002931 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002932 if (cur[0] == ':')
2933 return(xmlStrdup(name));
2934
2935 c = *cur++;
2936 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2937 buf[len++] = c;
2938 c = *cur++;
2939 }
2940 if (len >= max) {
2941 /*
2942 * Okay someone managed to make a huge name, so he's ready to pay
2943 * for the processing speed.
2944 */
2945 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002946
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002947 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002948 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002949 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002950 return(NULL);
2951 }
2952 memcpy(buffer, buf, len);
2953 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2954 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002955 xmlChar *tmp;
2956
Owen Taylor3473f882001-02-23 17:55:21 +00002957 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002958 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002959 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002960 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002961 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002962 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002963 return(NULL);
2964 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002965 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002966 }
2967 buffer[len++] = c;
2968 c = *cur++;
2969 }
2970 buffer[len] = 0;
2971 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002972
Daniel Veillard597bc482003-07-24 16:08:28 +00002973 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002974 if (buffer != NULL)
2975 xmlFree(buffer);
2976 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002977 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002978 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002979
Owen Taylor3473f882001-02-23 17:55:21 +00002980 if (buffer == NULL)
2981 ret = xmlStrndup(buf, len);
2982 else {
2983 ret = buffer;
2984 buffer = NULL;
2985 max = XML_MAX_NAMELEN;
2986 }
2987
2988
2989 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002990 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002991 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002992 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002993 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002994 }
Owen Taylor3473f882001-02-23 17:55:21 +00002995 len = 0;
2996
Daniel Veillardbb284f42002-10-16 18:02:47 +00002997 /*
2998 * Check that the first character is proper to start
2999 * a new name
3000 */
3001 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3002 ((c >= 0x41) && (c <= 0x5A)) ||
3003 (c == '_') || (c == ':'))) {
3004 int l;
3005 int first = CUR_SCHAR(cur, l);
3006
3007 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003008 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003009 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003010 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003011 }
3012 }
3013 cur++;
3014
Owen Taylor3473f882001-02-23 17:55:21 +00003015 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3016 buf[len++] = c;
3017 c = *cur++;
3018 }
3019 if (len >= max) {
3020 /*
3021 * Okay someone managed to make a huge name, so he's ready to pay
3022 * for the processing speed.
3023 */
3024 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003025
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003026 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003027 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003028 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003029 return(NULL);
3030 }
3031 memcpy(buffer, buf, len);
3032 while (c != 0) { /* tested bigname2.xml */
3033 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003034 xmlChar *tmp;
3035
Owen Taylor3473f882001-02-23 17:55:21 +00003036 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003037 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003038 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003039 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003040 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003041 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003042 return(NULL);
3043 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003044 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003045 }
3046 buffer[len++] = c;
3047 c = *cur++;
3048 }
3049 buffer[len] = 0;
3050 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003051
Owen Taylor3473f882001-02-23 17:55:21 +00003052 if (buffer == NULL)
3053 ret = xmlStrndup(buf, len);
3054 else {
3055 ret = buffer;
3056 }
3057 }
3058
3059 return(ret);
3060}
3061
3062/************************************************************************
3063 * *
3064 * The parser itself *
3065 * Relates to http://www.w3.org/TR/REC-xml *
3066 * *
3067 ************************************************************************/
3068
Daniel Veillard34e3f642008-07-29 09:02:27 +00003069/************************************************************************
3070 * *
3071 * Routines to parse Name, NCName and NmToken *
3072 * *
3073 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003074#ifdef DEBUG
3075static unsigned long nbParseName = 0;
3076static unsigned long nbParseNmToken = 0;
3077static unsigned long nbParseNCName = 0;
3078static unsigned long nbParseNCNameComplex = 0;
3079static unsigned long nbParseNameComplex = 0;
3080static unsigned long nbParseStringName = 0;
3081#endif
3082
Daniel Veillard34e3f642008-07-29 09:02:27 +00003083/*
3084 * The two following functions are related to the change of accepted
3085 * characters for Name and NmToken in the Revision 5 of XML-1.0
3086 * They correspond to the modified production [4] and the new production [4a]
3087 * changes in that revision. Also note that the macros used for the
3088 * productions Letter, Digit, CombiningChar and Extender are not needed
3089 * anymore.
3090 * We still keep compatibility to pre-revision5 parsing semantic if the
3091 * new XML_PARSE_OLD10 option is given to the parser.
3092 */
3093static int
3094xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3095 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3096 /*
3097 * Use the new checks of production [4] [4a] amd [5] of the
3098 * Update 5 of XML-1.0
3099 */
3100 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3101 (((c >= 'a') && (c <= 'z')) ||
3102 ((c >= 'A') && (c <= 'Z')) ||
3103 (c == '_') || (c == ':') ||
3104 ((c >= 0xC0) && (c <= 0xD6)) ||
3105 ((c >= 0xD8) && (c <= 0xF6)) ||
3106 ((c >= 0xF8) && (c <= 0x2FF)) ||
3107 ((c >= 0x370) && (c <= 0x37D)) ||
3108 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3109 ((c >= 0x200C) && (c <= 0x200D)) ||
3110 ((c >= 0x2070) && (c <= 0x218F)) ||
3111 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3112 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3113 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3114 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3115 ((c >= 0x10000) && (c <= 0xEFFFF))))
3116 return(1);
3117 } else {
3118 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3119 return(1);
3120 }
3121 return(0);
3122}
3123
3124static int
3125xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3126 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3127 /*
3128 * Use the new checks of production [4] [4a] amd [5] of the
3129 * Update 5 of XML-1.0
3130 */
3131 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3132 (((c >= 'a') && (c <= 'z')) ||
3133 ((c >= 'A') && (c <= 'Z')) ||
3134 ((c >= '0') && (c <= '9')) || /* !start */
3135 (c == '_') || (c == ':') ||
3136 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3137 ((c >= 0xC0) && (c <= 0xD6)) ||
3138 ((c >= 0xD8) && (c <= 0xF6)) ||
3139 ((c >= 0xF8) && (c <= 0x2FF)) ||
3140 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3141 ((c >= 0x370) && (c <= 0x37D)) ||
3142 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3143 ((c >= 0x200C) && (c <= 0x200D)) ||
3144 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3145 ((c >= 0x2070) && (c <= 0x218F)) ||
3146 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3147 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3148 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3149 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3150 ((c >= 0x10000) && (c <= 0xEFFFF))))
3151 return(1);
3152 } else {
3153 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3154 (c == '.') || (c == '-') ||
3155 (c == '_') || (c == ':') ||
3156 (IS_COMBINING(c)) ||
3157 (IS_EXTENDER(c)))
3158 return(1);
3159 }
3160 return(0);
3161}
3162
Daniel Veillarde57ec792003-09-10 10:50:59 +00003163static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003164 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003165
Daniel Veillard34e3f642008-07-29 09:02:27 +00003166static const xmlChar *
3167xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3168 int len = 0, l;
3169 int c;
3170 int count = 0;
3171
Daniel Veillardc6561462009-03-25 10:22:31 +00003172#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003173 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003174#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003175
3176 /*
3177 * Handler for more complex cases
3178 */
3179 GROW;
3180 c = CUR_CHAR(l);
3181 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3182 /*
3183 * Use the new checks of production [4] [4a] amd [5] of the
3184 * Update 5 of XML-1.0
3185 */
3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187 (!(((c >= 'a') && (c <= 'z')) ||
3188 ((c >= 'A') && (c <= 'Z')) ||
3189 (c == '_') || (c == ':') ||
3190 ((c >= 0xC0) && (c <= 0xD6)) ||
3191 ((c >= 0xD8) && (c <= 0xF6)) ||
3192 ((c >= 0xF8) && (c <= 0x2FF)) ||
3193 ((c >= 0x370) && (c <= 0x37D)) ||
3194 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3195 ((c >= 0x200C) && (c <= 0x200D)) ||
3196 ((c >= 0x2070) && (c <= 0x218F)) ||
3197 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3198 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3199 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3200 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3201 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3202 return(NULL);
3203 }
3204 len += l;
3205 NEXTL(l);
3206 c = CUR_CHAR(l);
3207 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 (((c >= 'a') && (c <= 'z')) ||
3209 ((c >= 'A') && (c <= 'Z')) ||
3210 ((c >= '0') && (c <= '9')) || /* !start */
3211 (c == '_') || (c == ':') ||
3212 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3213 ((c >= 0xC0) && (c <= 0xD6)) ||
3214 ((c >= 0xD8) && (c <= 0xF6)) ||
3215 ((c >= 0xF8) && (c <= 0x2FF)) ||
3216 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3217 ((c >= 0x370) && (c <= 0x37D)) ||
3218 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3219 ((c >= 0x200C) && (c <= 0x200D)) ||
3220 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3221 ((c >= 0x2070) && (c <= 0x218F)) ||
3222 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3223 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3224 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3225 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3226 ((c >= 0x10000) && (c <= 0xEFFFF))
3227 )) {
3228 if (count++ > 100) {
3229 count = 0;
3230 GROW;
3231 }
3232 len += l;
3233 NEXTL(l);
3234 c = CUR_CHAR(l);
3235 }
3236 } else {
3237 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3238 (!IS_LETTER(c) && (c != '_') &&
3239 (c != ':'))) {
3240 return(NULL);
3241 }
3242 len += l;
3243 NEXTL(l);
3244 c = CUR_CHAR(l);
3245
3246 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3247 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3248 (c == '.') || (c == '-') ||
3249 (c == '_') || (c == ':') ||
3250 (IS_COMBINING(c)) ||
3251 (IS_EXTENDER(c)))) {
3252 if (count++ > 100) {
3253 count = 0;
3254 GROW;
3255 }
3256 len += l;
3257 NEXTL(l);
3258 c = CUR_CHAR(l);
3259 }
3260 }
3261 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3262 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3263 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3264}
3265
Owen Taylor3473f882001-02-23 17:55:21 +00003266/**
3267 * xmlParseName:
3268 * @ctxt: an XML parser context
3269 *
3270 * parse an XML name.
3271 *
3272 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3273 * CombiningChar | Extender
3274 *
3275 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3276 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003277 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003278 *
3279 * Returns the Name parsed or NULL
3280 */
3281
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003282const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003283xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003284 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003285 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003286 int count = 0;
3287
3288 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003289
Daniel Veillardc6561462009-03-25 10:22:31 +00003290#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003291 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003292#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003293
Daniel Veillard48b2f892001-02-25 16:11:03 +00003294 /*
3295 * Accelerator for simple ASCII names
3296 */
3297 in = ctxt->input->cur;
3298 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3299 ((*in >= 0x41) && (*in <= 0x5A)) ||
3300 (*in == '_') || (*in == ':')) {
3301 in++;
3302 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3303 ((*in >= 0x41) && (*in <= 0x5A)) ||
3304 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003305 (*in == '_') || (*in == '-') ||
3306 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003307 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003308 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003309 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003310 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003311 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003312 ctxt->nbChars += count;
3313 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003314 if (ret == NULL)
3315 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003316 return(ret);
3317 }
3318 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003319 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003320 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003321}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003322
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323static const xmlChar *
3324xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3325 int len = 0, l;
3326 int c;
3327 int count = 0;
3328
Daniel Veillardc6561462009-03-25 10:22:31 +00003329#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003330 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003331#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003332
3333 /*
3334 * Handler for more complex cases
3335 */
3336 GROW;
3337 c = CUR_CHAR(l);
3338 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3339 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3340 return(NULL);
3341 }
3342
3343 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3344 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3345 if (count++ > 100) {
3346 count = 0;
3347 GROW;
3348 }
3349 len += l;
3350 NEXTL(l);
3351 c = CUR_CHAR(l);
3352 }
3353 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3354}
3355
3356/**
3357 * xmlParseNCName:
3358 * @ctxt: an XML parser context
3359 * @len: lenght of the string parsed
3360 *
3361 * parse an XML name.
3362 *
3363 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3364 * CombiningChar | Extender
3365 *
3366 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3367 *
3368 * Returns the Name parsed or NULL
3369 */
3370
3371static const xmlChar *
3372xmlParseNCName(xmlParserCtxtPtr ctxt) {
3373 const xmlChar *in;
3374 const xmlChar *ret;
3375 int count = 0;
3376
Daniel Veillardc6561462009-03-25 10:22:31 +00003377#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003378 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003379#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003380
3381 /*
3382 * Accelerator for simple ASCII names
3383 */
3384 in = ctxt->input->cur;
3385 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3386 ((*in >= 0x41) && (*in <= 0x5A)) ||
3387 (*in == '_')) {
3388 in++;
3389 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3390 ((*in >= 0x41) && (*in <= 0x5A)) ||
3391 ((*in >= 0x30) && (*in <= 0x39)) ||
3392 (*in == '_') || (*in == '-') ||
3393 (*in == '.'))
3394 in++;
3395 if ((*in > 0) && (*in < 0x80)) {
3396 count = in - ctxt->input->cur;
3397 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3398 ctxt->input->cur = in;
3399 ctxt->nbChars += count;
3400 ctxt->input->col += count;
3401 if (ret == NULL) {
3402 xmlErrMemory(ctxt, NULL);
3403 }
3404 return(ret);
3405 }
3406 }
3407 return(xmlParseNCNameComplex(ctxt));
3408}
3409
Daniel Veillard46de64e2002-05-29 08:21:33 +00003410/**
3411 * xmlParseNameAndCompare:
3412 * @ctxt: an XML parser context
3413 *
3414 * parse an XML name and compares for match
3415 * (specialized for endtag parsing)
3416 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003417 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3418 * and the name for mismatch
3419 */
3420
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003421static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003422xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003423 register const xmlChar *cmp = other;
3424 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003425 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003426
3427 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003428
Daniel Veillard46de64e2002-05-29 08:21:33 +00003429 in = ctxt->input->cur;
3430 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003431 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003432 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003433 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003434 }
William M. Brack76e95df2003-10-18 16:20:14 +00003435 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003436 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003437 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003438 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003439 }
3440 /* failure (or end of input buffer), check with full function */
3441 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003442 /* strings coming from the dictionnary direct compare possible */
3443 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003444 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003445 }
3446 return ret;
3447}
3448
Owen Taylor3473f882001-02-23 17:55:21 +00003449/**
3450 * xmlParseStringName:
3451 * @ctxt: an XML parser context
3452 * @str: a pointer to the string pointer (IN/OUT)
3453 *
3454 * parse an XML name.
3455 *
3456 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3457 * CombiningChar | Extender
3458 *
3459 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3460 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003461 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003462 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003463 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003464 * is updated to the current location in the string.
3465 */
3466
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003467static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003468xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3469 xmlChar buf[XML_MAX_NAMELEN + 5];
3470 const xmlChar *cur = *str;
3471 int len = 0, l;
3472 int c;
3473
Daniel Veillardc6561462009-03-25 10:22:31 +00003474#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003475 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003476#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003477
Owen Taylor3473f882001-02-23 17:55:21 +00003478 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003479 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003480 return(NULL);
3481 }
3482
Daniel Veillard34e3f642008-07-29 09:02:27 +00003483 COPY_BUF(l,buf,len,c);
3484 cur += l;
3485 c = CUR_SCHAR(cur, l);
3486 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003487 COPY_BUF(l,buf,len,c);
3488 cur += l;
3489 c = CUR_SCHAR(cur, l);
3490 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3491 /*
3492 * Okay someone managed to make a huge name, so he's ready to pay
3493 * for the processing speed.
3494 */
3495 xmlChar *buffer;
3496 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003497
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003498 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003499 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003500 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003501 return(NULL);
3502 }
3503 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003504 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003505 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003506 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003507 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003508 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003509 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003510 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003511 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003512 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003513 return(NULL);
3514 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003515 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003516 }
3517 COPY_BUF(l,buffer,len,c);
3518 cur += l;
3519 c = CUR_SCHAR(cur, l);
3520 }
3521 buffer[len] = 0;
3522 *str = cur;
3523 return(buffer);
3524 }
3525 }
3526 *str = cur;
3527 return(xmlStrndup(buf, len));
3528}
3529
3530/**
3531 * xmlParseNmtoken:
3532 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003533 *
Owen Taylor3473f882001-02-23 17:55:21 +00003534 * parse an XML Nmtoken.
3535 *
3536 * [7] Nmtoken ::= (NameChar)+
3537 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003538 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003539 *
3540 * Returns the Nmtoken parsed or NULL
3541 */
3542
3543xmlChar *
3544xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3545 xmlChar buf[XML_MAX_NAMELEN + 5];
3546 int len = 0, l;
3547 int c;
3548 int count = 0;
3549
Daniel Veillardc6561462009-03-25 10:22:31 +00003550#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003551 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003552#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003553
Owen Taylor3473f882001-02-23 17:55:21 +00003554 GROW;
3555 c = CUR_CHAR(l);
3556
Daniel Veillard34e3f642008-07-29 09:02:27 +00003557 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003558 if (count++ > 100) {
3559 count = 0;
3560 GROW;
3561 }
3562 COPY_BUF(l,buf,len,c);
3563 NEXTL(l);
3564 c = CUR_CHAR(l);
3565 if (len >= XML_MAX_NAMELEN) {
3566 /*
3567 * Okay someone managed to make a huge token, so he's ready to pay
3568 * for the processing speed.
3569 */
3570 xmlChar *buffer;
3571 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003572
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003573 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003574 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003575 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003576 return(NULL);
3577 }
3578 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003579 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003580 if (count++ > 100) {
3581 count = 0;
3582 GROW;
3583 }
3584 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003585 xmlChar *tmp;
3586
Owen Taylor3473f882001-02-23 17:55:21 +00003587 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003588 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003589 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003590 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003591 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003592 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003593 return(NULL);
3594 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003595 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003596 }
3597 COPY_BUF(l,buffer,len,c);
3598 NEXTL(l);
3599 c = CUR_CHAR(l);
3600 }
3601 buffer[len] = 0;
3602 return(buffer);
3603 }
3604 }
3605 if (len == 0)
3606 return(NULL);
3607 return(xmlStrndup(buf, len));
3608}
3609
3610/**
3611 * xmlParseEntityValue:
3612 * @ctxt: an XML parser context
3613 * @orig: if non-NULL store a copy of the original entity value
3614 *
3615 * parse a value for ENTITY declarations
3616 *
3617 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3618 * "'" ([^%&'] | PEReference | Reference)* "'"
3619 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003620 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003621 */
3622
3623xmlChar *
3624xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3625 xmlChar *buf = NULL;
3626 int len = 0;
3627 int size = XML_PARSER_BUFFER_SIZE;
3628 int c, l;
3629 xmlChar stop;
3630 xmlChar *ret = NULL;
3631 const xmlChar *cur = NULL;
3632 xmlParserInputPtr input;
3633
3634 if (RAW == '"') stop = '"';
3635 else if (RAW == '\'') stop = '\'';
3636 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003637 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003638 return(NULL);
3639 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003640 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003641 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003642 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003643 return(NULL);
3644 }
3645
3646 /*
3647 * The content of the entity definition is copied in a buffer.
3648 */
3649
3650 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3651 input = ctxt->input;
3652 GROW;
3653 NEXT;
3654 c = CUR_CHAR(l);
3655 /*
3656 * NOTE: 4.4.5 Included in Literal
3657 * When a parameter entity reference appears in a literal entity
3658 * value, ... a single or double quote character in the replacement
3659 * text is always treated as a normal data character and will not
3660 * terminate the literal.
3661 * In practice it means we stop the loop only when back at parsing
3662 * the initial entity and the quote is found
3663 */
William M. Brack871611b2003-10-18 04:53:14 +00003664 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003665 (ctxt->input != input))) {
3666 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003667 xmlChar *tmp;
3668
Owen Taylor3473f882001-02-23 17:55:21 +00003669 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003670 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3671 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003672 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003673 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003674 return(NULL);
3675 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003676 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003677 }
3678 COPY_BUF(l,buf,len,c);
3679 NEXTL(l);
3680 /*
3681 * Pop-up of finished entities.
3682 */
3683 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3684 xmlPopInput(ctxt);
3685
3686 GROW;
3687 c = CUR_CHAR(l);
3688 if (c == 0) {
3689 GROW;
3690 c = CUR_CHAR(l);
3691 }
3692 }
3693 buf[len] = 0;
3694
3695 /*
3696 * Raise problem w.r.t. '&' and '%' being used in non-entities
3697 * reference constructs. Note Charref will be handled in
3698 * xmlStringDecodeEntities()
3699 */
3700 cur = buf;
3701 while (*cur != 0) { /* non input consuming */
3702 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3703 xmlChar *name;
3704 xmlChar tmp = *cur;
3705
3706 cur++;
3707 name = xmlParseStringName(ctxt, &cur);
3708 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003709 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003710 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003711 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003712 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003713 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3714 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003715 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 }
3717 if (name != NULL)
3718 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003719 if (*cur == 0)
3720 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003721 }
3722 cur++;
3723 }
3724
3725 /*
3726 * Then PEReference entities are substituted.
3727 */
3728 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003729 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003730 xmlFree(buf);
3731 } else {
3732 NEXT;
3733 /*
3734 * NOTE: 4.4.7 Bypassed
3735 * When a general entity reference appears in the EntityValue in
3736 * an entity declaration, it is bypassed and left as is.
3737 * so XML_SUBSTITUTE_REF is not set here.
3738 */
3739 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3740 0, 0, 0);
3741 if (orig != NULL)
3742 *orig = buf;
3743 else
3744 xmlFree(buf);
3745 }
3746
3747 return(ret);
3748}
3749
3750/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003751 * xmlParseAttValueComplex:
3752 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003753 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003754 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003755 *
3756 * parse a value for an attribute, this is the fallback function
3757 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003758 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003759 *
3760 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3761 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003762static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003763xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003764 xmlChar limit = 0;
3765 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003766 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003767 int len = 0;
3768 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003769 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003770 xmlChar *current = NULL;
3771 xmlEntityPtr ent;
3772
Owen Taylor3473f882001-02-23 17:55:21 +00003773 if (NXT(0) == '"') {
3774 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3775 limit = '"';
3776 NEXT;
3777 } else if (NXT(0) == '\'') {
3778 limit = '\'';
3779 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3780 NEXT;
3781 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003782 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003783 return(NULL);
3784 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003785
Owen Taylor3473f882001-02-23 17:55:21 +00003786 /*
3787 * allocate a translation buffer.
3788 */
3789 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003790 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003791 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003792
3793 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003794 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003795 */
3796 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003797 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003798 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003799 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003800 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003801 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003802 if (NXT(1) == '#') {
3803 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003804
Owen Taylor3473f882001-02-23 17:55:21 +00003805 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003806 if (ctxt->replaceEntities) {
3807 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003808 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003809 }
3810 buf[len++] = '&';
3811 } else {
3812 /*
3813 * The reparsing will be done in xmlStringGetNodeList()
3814 * called by the attribute() function in SAX.c
3815 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003816 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003817 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003818 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003819 buf[len++] = '&';
3820 buf[len++] = '#';
3821 buf[len++] = '3';
3822 buf[len++] = '8';
3823 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003824 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003825 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003826 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003827 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003828 }
Owen Taylor3473f882001-02-23 17:55:21 +00003829 len += xmlCopyChar(0, &buf[len], val);
3830 }
3831 } else {
3832 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003833 ctxt->nbentities++;
3834 if (ent != NULL)
3835 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003836 if ((ent != NULL) &&
3837 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3838 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003839 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003840 }
3841 if ((ctxt->replaceEntities == 0) &&
3842 (ent->content[0] == '&')) {
3843 buf[len++] = '&';
3844 buf[len++] = '#';
3845 buf[len++] = '3';
3846 buf[len++] = '8';
3847 buf[len++] = ';';
3848 } else {
3849 buf[len++] = ent->content[0];
3850 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003851 } else if ((ent != NULL) &&
3852 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003853 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3854 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003855 XML_SUBSTITUTE_REF,
3856 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003857 if (rep != NULL) {
3858 current = rep;
3859 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003860 if ((*current == 0xD) || (*current == 0xA) ||
3861 (*current == 0x9)) {
3862 buf[len++] = 0x20;
3863 current++;
3864 } else
3865 buf[len++] = *current++;
Owen Taylor3473f882001-02-23 17:55:21 +00003866 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003867 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003868 }
3869 }
3870 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003871 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003872 }
3873 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003874 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003875 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003876 }
Owen Taylor3473f882001-02-23 17:55:21 +00003877 if (ent->content != NULL)
3878 buf[len++] = ent->content[0];
3879 }
3880 } else if (ent != NULL) {
3881 int i = xmlStrlen(ent->name);
3882 const xmlChar *cur = ent->name;
3883
3884 /*
3885 * This may look absurd but is needed to detect
3886 * entities problems
3887 */
3888 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3889 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003890 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003891 XML_SUBSTITUTE_REF, 0, 0, 0);
3892 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003893 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003894 rep = NULL;
3895 }
Owen Taylor3473f882001-02-23 17:55:21 +00003896 }
3897
3898 /*
3899 * Just output the reference
3900 */
3901 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003902 while (len > buf_size - i - 10) {
3903 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003904 }
3905 for (;i > 0;i--)
3906 buf[len++] = *cur++;
3907 buf[len++] = ';';
3908 }
3909 }
3910 } else {
3911 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003912 if ((len != 0) || (!normalize)) {
3913 if ((!normalize) || (!in_space)) {
3914 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003915 while (len > buf_size - 10) {
3916 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003917 }
3918 }
3919 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003920 }
3921 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003922 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003923 COPY_BUF(l,buf,len,c);
3924 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003925 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003926 }
3927 }
3928 NEXTL(l);
3929 }
3930 GROW;
3931 c = CUR_CHAR(l);
3932 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003933 if ((in_space) && (normalize)) {
3934 while (buf[len - 1] == 0x20) len--;
3935 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003936 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003937 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003938 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003939 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003940 if ((c != 0) && (!IS_CHAR(c))) {
3941 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3942 "invalid character in attribute value\n");
3943 } else {
3944 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3945 "AttValue: ' expected\n");
3946 }
Owen Taylor3473f882001-02-23 17:55:21 +00003947 } else
3948 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003949 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003951
3952mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003953 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003954 if (buf != NULL)
3955 xmlFree(buf);
3956 if (rep != NULL)
3957 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003958 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003959}
3960
3961/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003962 * xmlParseAttValue:
3963 * @ctxt: an XML parser context
3964 *
3965 * parse a value for an attribute
3966 * Note: the parser won't do substitution of entities here, this
3967 * will be handled later in xmlStringGetNodeList
3968 *
3969 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3970 * "'" ([^<&'] | Reference)* "'"
3971 *
3972 * 3.3.3 Attribute-Value Normalization:
3973 * Before the value of an attribute is passed to the application or
3974 * checked for validity, the XML processor must normalize it as follows:
3975 * - a character reference is processed by appending the referenced
3976 * character to the attribute value
3977 * - an entity reference is processed by recursively processing the
3978 * replacement text of the entity
3979 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3980 * appending #x20 to the normalized value, except that only a single
3981 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3982 * parsed entity or the literal entity value of an internal parsed entity
3983 * - other characters are processed by appending them to the normalized value
3984 * If the declared value is not CDATA, then the XML processor must further
3985 * process the normalized attribute value by discarding any leading and
3986 * trailing space (#x20) characters, and by replacing sequences of space
3987 * (#x20) characters by a single space (#x20) character.
3988 * All attributes for which no declaration has been read should be treated
3989 * by a non-validating parser as if declared CDATA.
3990 *
3991 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3992 */
3993
3994
3995xmlChar *
3996xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003997 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003998 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003999}
4000
4001/**
Owen Taylor3473f882001-02-23 17:55:21 +00004002 * xmlParseSystemLiteral:
4003 * @ctxt: an XML parser context
4004 *
4005 * parse an XML Literal
4006 *
4007 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4008 *
4009 * Returns the SystemLiteral parsed or NULL
4010 */
4011
4012xmlChar *
4013xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4014 xmlChar *buf = NULL;
4015 int len = 0;
4016 int size = XML_PARSER_BUFFER_SIZE;
4017 int cur, l;
4018 xmlChar stop;
4019 int state = ctxt->instate;
4020 int count = 0;
4021
4022 SHRINK;
4023 if (RAW == '"') {
4024 NEXT;
4025 stop = '"';
4026 } else if (RAW == '\'') {
4027 NEXT;
4028 stop = '\'';
4029 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004030 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004031 return(NULL);
4032 }
4033
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004034 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004035 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004036 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004037 return(NULL);
4038 }
4039 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4040 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004041 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004042 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004043 xmlChar *tmp;
4044
Owen Taylor3473f882001-02-23 17:55:21 +00004045 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004046 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4047 if (tmp == NULL) {
4048 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004049 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004050 ctxt->instate = (xmlParserInputState) state;
4051 return(NULL);
4052 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004053 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004054 }
4055 count++;
4056 if (count > 50) {
4057 GROW;
4058 count = 0;
4059 }
4060 COPY_BUF(l,buf,len,cur);
4061 NEXTL(l);
4062 cur = CUR_CHAR(l);
4063 if (cur == 0) {
4064 GROW;
4065 SHRINK;
4066 cur = CUR_CHAR(l);
4067 }
4068 }
4069 buf[len] = 0;
4070 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004071 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004072 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004073 } else {
4074 NEXT;
4075 }
4076 return(buf);
4077}
4078
4079/**
4080 * xmlParsePubidLiteral:
4081 * @ctxt: an XML parser context
4082 *
4083 * parse an XML public literal
4084 *
4085 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4086 *
4087 * Returns the PubidLiteral parsed or NULL.
4088 */
4089
4090xmlChar *
4091xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4092 xmlChar *buf = NULL;
4093 int len = 0;
4094 int size = XML_PARSER_BUFFER_SIZE;
4095 xmlChar cur;
4096 xmlChar stop;
4097 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004098 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004099
4100 SHRINK;
4101 if (RAW == '"') {
4102 NEXT;
4103 stop = '"';
4104 } else if (RAW == '\'') {
4105 NEXT;
4106 stop = '\'';
4107 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004108 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004109 return(NULL);
4110 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004111 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004112 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004113 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004114 return(NULL);
4115 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004116 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004117 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004118 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004119 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004120 xmlChar *tmp;
4121
Owen Taylor3473f882001-02-23 17:55:21 +00004122 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004123 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4124 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004125 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004126 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004127 return(NULL);
4128 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004129 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004130 }
4131 buf[len++] = cur;
4132 count++;
4133 if (count > 50) {
4134 GROW;
4135 count = 0;
4136 }
4137 NEXT;
4138 cur = CUR;
4139 if (cur == 0) {
4140 GROW;
4141 SHRINK;
4142 cur = CUR;
4143 }
4144 }
4145 buf[len] = 0;
4146 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004147 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004148 } else {
4149 NEXT;
4150 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004151 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004152 return(buf);
4153}
4154
Daniel Veillard8ed10722009-08-20 19:17:36 +02004155static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004156
4157/*
4158 * used for the test in the inner loop of the char data testing
4159 */
4160static const unsigned char test_char_data[256] = {
4161 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4162 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4163 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4164 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4165 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4166 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4167 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4168 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4169 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4170 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4171 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4172 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4173 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4174 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4175 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4176 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4184 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4185 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4186 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4187 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4188 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4193};
4194
Owen Taylor3473f882001-02-23 17:55:21 +00004195/**
4196 * xmlParseCharData:
4197 * @ctxt: an XML parser context
4198 * @cdata: int indicating whether we are within a CDATA section
4199 *
4200 * parse a CharData section.
4201 * if we are within a CDATA section ']]>' marks an end of section.
4202 *
4203 * The right angle bracket (>) may be represented using the string "&gt;",
4204 * and must, for compatibility, be escaped using "&gt;" or a character
4205 * reference when it appears in the string "]]>" in content, when that
4206 * string is not marking the end of a CDATA section.
4207 *
4208 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4209 */
4210
4211void
4212xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004213 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004214 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004215 int line = ctxt->input->line;
4216 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004217 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004218
4219 SHRINK;
4220 GROW;
4221 /*
4222 * Accelerated common case where input don't need to be
4223 * modified before passing it to the handler.
4224 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004225 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004226 in = ctxt->input->cur;
4227 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004228get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004229 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004230 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004231 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004232 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004233 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004234 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004235 goto get_more_space;
4236 }
4237 if (*in == '<') {
4238 nbchar = in - ctxt->input->cur;
4239 if (nbchar > 0) {
4240 const xmlChar *tmp = ctxt->input->cur;
4241 ctxt->input->cur = in;
4242
Daniel Veillard34099b42004-11-04 17:34:35 +00004243 if ((ctxt->sax != NULL) &&
4244 (ctxt->sax->ignorableWhitespace !=
4245 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004246 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004247 if (ctxt->sax->ignorableWhitespace != NULL)
4248 ctxt->sax->ignorableWhitespace(ctxt->userData,
4249 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004250 } else {
4251 if (ctxt->sax->characters != NULL)
4252 ctxt->sax->characters(ctxt->userData,
4253 tmp, nbchar);
4254 if (*ctxt->space == -1)
4255 *ctxt->space = -2;
4256 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004257 } else if ((ctxt->sax != NULL) &&
4258 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004259 ctxt->sax->characters(ctxt->userData,
4260 tmp, nbchar);
4261 }
4262 }
4263 return;
4264 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004265
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004266get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004267 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004268 while (test_char_data[*in]) {
4269 in++;
4270 ccol++;
4271 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004272 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004273 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004274 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004275 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004276 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004277 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004278 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004279 }
4280 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004281 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004282 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004283 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004284 return;
4285 }
4286 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004287 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004288 goto get_more;
4289 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004290 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004291 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004292 if ((ctxt->sax != NULL) &&
4293 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004294 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004295 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004296 const xmlChar *tmp = ctxt->input->cur;
4297 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004298
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004299 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004300 if (ctxt->sax->ignorableWhitespace != NULL)
4301 ctxt->sax->ignorableWhitespace(ctxt->userData,
4302 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004303 } else {
4304 if (ctxt->sax->characters != NULL)
4305 ctxt->sax->characters(ctxt->userData,
4306 tmp, nbchar);
4307 if (*ctxt->space == -1)
4308 *ctxt->space = -2;
4309 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004310 line = ctxt->input->line;
4311 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004312 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004313 if (ctxt->sax->characters != NULL)
4314 ctxt->sax->characters(ctxt->userData,
4315 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004316 line = ctxt->input->line;
4317 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004318 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004319 /* something really bad happened in the SAX callback */
4320 if (ctxt->instate != XML_PARSER_CONTENT)
4321 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004322 }
4323 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004324 if (*in == 0xD) {
4325 in++;
4326 if (*in == 0xA) {
4327 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004328 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004329 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004330 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004331 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004332 in--;
4333 }
4334 if (*in == '<') {
4335 return;
4336 }
4337 if (*in == '&') {
4338 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004339 }
4340 SHRINK;
4341 GROW;
4342 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004343 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004344 nbchar = 0;
4345 }
Daniel Veillard50582112001-03-26 22:52:16 +00004346 ctxt->input->line = line;
4347 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004348 xmlParseCharDataComplex(ctxt, cdata);
4349}
4350
Daniel Veillard01c13b52002-12-10 15:19:08 +00004351/**
4352 * xmlParseCharDataComplex:
4353 * @ctxt: an XML parser context
4354 * @cdata: int indicating whether we are within a CDATA section
4355 *
4356 * parse a CharData section.this is the fallback function
4357 * of xmlParseCharData() when the parsing requires handling
4358 * of non-ASCII characters.
4359 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004360static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004361xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004362 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4363 int nbchar = 0;
4364 int cur, l;
4365 int count = 0;
4366
4367 SHRINK;
4368 GROW;
4369 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004370 while ((cur != '<') && /* checked */
4371 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004372 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004373 if ((cur == ']') && (NXT(1) == ']') &&
4374 (NXT(2) == '>')) {
4375 if (cdata) break;
4376 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004377 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004378 }
4379 }
4380 COPY_BUF(l,buf,nbchar,cur);
4381 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004382 buf[nbchar] = 0;
4383
Owen Taylor3473f882001-02-23 17:55:21 +00004384 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004385 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004386 */
4387 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004388 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004389 if (ctxt->sax->ignorableWhitespace != NULL)
4390 ctxt->sax->ignorableWhitespace(ctxt->userData,
4391 buf, nbchar);
4392 } else {
4393 if (ctxt->sax->characters != NULL)
4394 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004395 if ((ctxt->sax->characters !=
4396 ctxt->sax->ignorableWhitespace) &&
4397 (*ctxt->space == -1))
4398 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
4400 }
4401 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004402 /* something really bad happened in the SAX callback */
4403 if (ctxt->instate != XML_PARSER_CONTENT)
4404 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004405 }
4406 count++;
4407 if (count > 50) {
4408 GROW;
4409 count = 0;
4410 }
4411 NEXTL(l);
4412 cur = CUR_CHAR(l);
4413 }
4414 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004415 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004416 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004417 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004418 */
4419 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004420 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004421 if (ctxt->sax->ignorableWhitespace != NULL)
4422 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4423 } else {
4424 if (ctxt->sax->characters != NULL)
4425 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004426 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4427 (*ctxt->space == -1))
4428 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004429 }
4430 }
4431 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004432 if ((cur != 0) && (!IS_CHAR(cur))) {
4433 /* Generate the error and skip the offending character */
4434 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4435 "PCDATA invalid Char value %d\n",
4436 cur);
4437 NEXTL(l);
4438 }
Owen Taylor3473f882001-02-23 17:55:21 +00004439}
4440
4441/**
4442 * xmlParseExternalID:
4443 * @ctxt: an XML parser context
4444 * @publicID: a xmlChar** receiving PubidLiteral
4445 * @strict: indicate whether we should restrict parsing to only
4446 * production [75], see NOTE below
4447 *
4448 * Parse an External ID or a Public ID
4449 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004450 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004451 * 'PUBLIC' S PubidLiteral S SystemLiteral
4452 *
4453 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4454 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4455 *
4456 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4457 *
4458 * Returns the function returns SystemLiteral and in the second
4459 * case publicID receives PubidLiteral, is strict is off
4460 * it is possible to return NULL and have publicID set.
4461 */
4462
4463xmlChar *
4464xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4465 xmlChar *URI = NULL;
4466
4467 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004468
4469 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004470 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004471 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004472 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4474 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004475 }
4476 SKIP_BLANKS;
4477 URI = xmlParseSystemLiteral(ctxt);
4478 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004479 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004480 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004481 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004482 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004483 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004484 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004485 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004486 }
4487 SKIP_BLANKS;
4488 *publicID = xmlParsePubidLiteral(ctxt);
4489 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004490 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004491 }
4492 if (strict) {
4493 /*
4494 * We don't handle [83] so "S SystemLiteral" is required.
4495 */
William M. Brack76e95df2003-10-18 16:20:14 +00004496 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004498 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004499 }
4500 } else {
4501 /*
4502 * We handle [83] so we return immediately, if
4503 * "S SystemLiteral" is not detected. From a purely parsing
4504 * point of view that's a nice mess.
4505 */
4506 const xmlChar *ptr;
4507 GROW;
4508
4509 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004510 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004511
William M. Brack76e95df2003-10-18 16:20:14 +00004512 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004513 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4514 }
4515 SKIP_BLANKS;
4516 URI = xmlParseSystemLiteral(ctxt);
4517 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004518 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004519 }
4520 }
4521 return(URI);
4522}
4523
4524/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004525 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004526 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004527 * @buf: the already parsed part of the buffer
4528 * @len: number of bytes filles in the buffer
4529 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004530 *
4531 * Skip an XML (SGML) comment <!-- .... -->
4532 * The spec says that "For compatibility, the string "--" (double-hyphen)
4533 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004534 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004535 *
4536 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4537 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004538static void
4539xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004540 int q, ql;
4541 int r, rl;
4542 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004543 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004544 int inputid;
4545
4546 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004547
Owen Taylor3473f882001-02-23 17:55:21 +00004548 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004549 len = 0;
4550 size = XML_PARSER_BUFFER_SIZE;
4551 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4552 if (buf == NULL) {
4553 xmlErrMemory(ctxt, NULL);
4554 return;
4555 }
Owen Taylor3473f882001-02-23 17:55:21 +00004556 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004557 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004558 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004559 if (q == 0)
4560 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004561 if (!IS_CHAR(q)) {
4562 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4563 "xmlParseComment: invalid xmlChar value %d\n",
4564 q);
4565 xmlFree (buf);
4566 return;
4567 }
Owen Taylor3473f882001-02-23 17:55:21 +00004568 NEXTL(ql);
4569 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004570 if (r == 0)
4571 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004572 if (!IS_CHAR(r)) {
4573 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4574 "xmlParseComment: invalid xmlChar value %d\n",
4575 q);
4576 xmlFree (buf);
4577 return;
4578 }
Owen Taylor3473f882001-02-23 17:55:21 +00004579 NEXTL(rl);
4580 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004581 if (cur == 0)
4582 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004583 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004584 ((cur != '>') ||
4585 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004586 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004587 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004588 }
4589 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004590 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004591 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004592 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4593 if (new_buf == NULL) {
4594 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004595 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004596 return;
4597 }
William M. Bracka3215c72004-07-31 16:24:01 +00004598 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004599 }
4600 COPY_BUF(ql,buf,len,q);
4601 q = r;
4602 ql = rl;
4603 r = cur;
4604 rl = l;
4605
4606 count++;
4607 if (count > 50) {
4608 GROW;
4609 count = 0;
4610 }
4611 NEXTL(l);
4612 cur = CUR_CHAR(l);
4613 if (cur == 0) {
4614 SHRINK;
4615 GROW;
4616 cur = CUR_CHAR(l);
4617 }
4618 }
4619 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004620 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004621 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004622 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004623 } else if (!IS_CHAR(cur)) {
4624 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4625 "xmlParseComment: invalid xmlChar value %d\n",
4626 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004627 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004628 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004629 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4630 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004631 }
4632 NEXT;
4633 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4634 (!ctxt->disableSAX))
4635 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004636 }
Daniel Veillardda629342007-08-01 07:49:06 +00004637 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004638 return;
4639not_terminated:
4640 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4641 "Comment not terminated\n", NULL);
4642 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004643 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004644}
Daniel Veillardda629342007-08-01 07:49:06 +00004645
Daniel Veillard4c778d82005-01-23 17:37:44 +00004646/**
4647 * xmlParseComment:
4648 * @ctxt: an XML parser context
4649 *
4650 * Skip an XML (SGML) comment <!-- .... -->
4651 * The spec says that "For compatibility, the string "--" (double-hyphen)
4652 * must not occur within comments. "
4653 *
4654 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4655 */
4656void
4657xmlParseComment(xmlParserCtxtPtr ctxt) {
4658 xmlChar *buf = NULL;
4659 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004660 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004661 xmlParserInputState state;
4662 const xmlChar *in;
4663 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004664 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004665
4666 /*
4667 * Check that there is a comment right here.
4668 */
4669 if ((RAW != '<') || (NXT(1) != '!') ||
4670 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004671 state = ctxt->instate;
4672 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004673 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004674 SKIP(4);
4675 SHRINK;
4676 GROW;
4677
4678 /*
4679 * Accelerated common case where input don't need to be
4680 * modified before passing it to the handler.
4681 */
4682 in = ctxt->input->cur;
4683 do {
4684 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004685 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004686 ctxt->input->line++; ctxt->input->col = 1;
4687 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004688 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004689 }
4690get_more:
4691 ccol = ctxt->input->col;
4692 while (((*in > '-') && (*in <= 0x7F)) ||
4693 ((*in >= 0x20) && (*in < '-')) ||
4694 (*in == 0x09)) {
4695 in++;
4696 ccol++;
4697 }
4698 ctxt->input->col = ccol;
4699 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004700 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004701 ctxt->input->line++; ctxt->input->col = 1;
4702 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004703 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004704 goto get_more;
4705 }
4706 nbchar = in - ctxt->input->cur;
4707 /*
4708 * save current set of data
4709 */
4710 if (nbchar > 0) {
4711 if ((ctxt->sax != NULL) &&
4712 (ctxt->sax->comment != NULL)) {
4713 if (buf == NULL) {
4714 if ((*in == '-') && (in[1] == '-'))
4715 size = nbchar + 1;
4716 else
4717 size = XML_PARSER_BUFFER_SIZE + nbchar;
4718 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4719 if (buf == NULL) {
4720 xmlErrMemory(ctxt, NULL);
4721 ctxt->instate = state;
4722 return;
4723 }
4724 len = 0;
4725 } else if (len + nbchar + 1 >= size) {
4726 xmlChar *new_buf;
4727 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4728 new_buf = (xmlChar *) xmlRealloc(buf,
4729 size * sizeof(xmlChar));
4730 if (new_buf == NULL) {
4731 xmlFree (buf);
4732 xmlErrMemory(ctxt, NULL);
4733 ctxt->instate = state;
4734 return;
4735 }
4736 buf = new_buf;
4737 }
4738 memcpy(&buf[len], ctxt->input->cur, nbchar);
4739 len += nbchar;
4740 buf[len] = 0;
4741 }
4742 }
4743 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004744 if (*in == 0xA) {
4745 in++;
4746 ctxt->input->line++; ctxt->input->col = 1;
4747 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004748 if (*in == 0xD) {
4749 in++;
4750 if (*in == 0xA) {
4751 ctxt->input->cur = in;
4752 in++;
4753 ctxt->input->line++; ctxt->input->col = 1;
4754 continue; /* while */
4755 }
4756 in--;
4757 }
4758 SHRINK;
4759 GROW;
4760 in = ctxt->input->cur;
4761 if (*in == '-') {
4762 if (in[1] == '-') {
4763 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004764 if (ctxt->input->id != inputid) {
4765 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4766 "comment doesn't start and stop in the same entity\n");
4767 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004768 SKIP(3);
4769 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4770 (!ctxt->disableSAX)) {
4771 if (buf != NULL)
4772 ctxt->sax->comment(ctxt->userData, buf);
4773 else
4774 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4775 }
4776 if (buf != NULL)
4777 xmlFree(buf);
4778 ctxt->instate = state;
4779 return;
4780 }
4781 if (buf != NULL)
4782 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4783 "Comment not terminated \n<!--%.50s\n",
4784 buf);
4785 else
4786 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4787 "Comment not terminated \n", NULL);
4788 in++;
4789 ctxt->input->col++;
4790 }
4791 in++;
4792 ctxt->input->col++;
4793 goto get_more;
4794 }
4795 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4796 xmlParseCommentComplex(ctxt, buf, len, size);
4797 ctxt->instate = state;
4798 return;
4799}
4800
Owen Taylor3473f882001-02-23 17:55:21 +00004801
4802/**
4803 * xmlParsePITarget:
4804 * @ctxt: an XML parser context
4805 *
4806 * parse the name of a PI
4807 *
4808 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4809 *
4810 * Returns the PITarget name or NULL
4811 */
4812
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004813const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004814xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004815 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004816
4817 name = xmlParseName(ctxt);
4818 if ((name != NULL) &&
4819 ((name[0] == 'x') || (name[0] == 'X')) &&
4820 ((name[1] == 'm') || (name[1] == 'M')) &&
4821 ((name[2] == 'l') || (name[2] == 'L'))) {
4822 int i;
4823 if ((name[0] == 'x') && (name[1] == 'm') &&
4824 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004825 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004826 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004827 return(name);
4828 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004829 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004830 return(name);
4831 }
4832 for (i = 0;;i++) {
4833 if (xmlW3CPIs[i] == NULL) break;
4834 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4835 return(name);
4836 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004837 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4838 "xmlParsePITarget: invalid name prefix 'xml'\n",
4839 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004840 }
Daniel Veillard37334572008-07-31 08:20:02 +00004841 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4842 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4843 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4844 }
Owen Taylor3473f882001-02-23 17:55:21 +00004845 return(name);
4846}
4847
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004848#ifdef LIBXML_CATALOG_ENABLED
4849/**
4850 * xmlParseCatalogPI:
4851 * @ctxt: an XML parser context
4852 * @catalog: the PI value string
4853 *
4854 * parse an XML Catalog Processing Instruction.
4855 *
4856 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4857 *
4858 * Occurs only if allowed by the user and if happening in the Misc
4859 * part of the document before any doctype informations
4860 * This will add the given catalog to the parsing context in order
4861 * to be used if there is a resolution need further down in the document
4862 */
4863
4864static void
4865xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4866 xmlChar *URL = NULL;
4867 const xmlChar *tmp, *base;
4868 xmlChar marker;
4869
4870 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004871 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004872 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4873 goto error;
4874 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004875 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004876 if (*tmp != '=') {
4877 return;
4878 }
4879 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004880 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004881 marker = *tmp;
4882 if ((marker != '\'') && (marker != '"'))
4883 goto error;
4884 tmp++;
4885 base = tmp;
4886 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4887 if (*tmp == 0)
4888 goto error;
4889 URL = xmlStrndup(base, tmp - base);
4890 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004891 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004892 if (*tmp != 0)
4893 goto error;
4894
4895 if (URL != NULL) {
4896 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4897 xmlFree(URL);
4898 }
4899 return;
4900
4901error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004902 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4903 "Catalog PI syntax error: %s\n",
4904 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004905 if (URL != NULL)
4906 xmlFree(URL);
4907}
4908#endif
4909
Owen Taylor3473f882001-02-23 17:55:21 +00004910/**
4911 * xmlParsePI:
4912 * @ctxt: an XML parser context
4913 *
4914 * parse an XML Processing Instruction.
4915 *
4916 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4917 *
4918 * The processing is transfered to SAX once parsed.
4919 */
4920
4921void
4922xmlParsePI(xmlParserCtxtPtr ctxt) {
4923 xmlChar *buf = NULL;
4924 int len = 0;
4925 int size = XML_PARSER_BUFFER_SIZE;
4926 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004927 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004928 xmlParserInputState state;
4929 int count = 0;
4930
4931 if ((RAW == '<') && (NXT(1) == '?')) {
4932 xmlParserInputPtr input = ctxt->input;
4933 state = ctxt->instate;
4934 ctxt->instate = XML_PARSER_PI;
4935 /*
4936 * this is a Processing Instruction.
4937 */
4938 SKIP(2);
4939 SHRINK;
4940
4941 /*
4942 * Parse the target name and check for special support like
4943 * namespace.
4944 */
4945 target = xmlParsePITarget(ctxt);
4946 if (target != NULL) {
4947 if ((RAW == '?') && (NXT(1) == '>')) {
4948 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004949 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4950 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004951 }
4952 SKIP(2);
4953
4954 /*
4955 * SAX: PI detected.
4956 */
4957 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4958 (ctxt->sax->processingInstruction != NULL))
4959 ctxt->sax->processingInstruction(ctxt->userData,
4960 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08004961 if (ctxt->instate != XML_PARSER_EOF)
4962 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004963 return;
4964 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004965 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004966 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004967 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004968 ctxt->instate = state;
4969 return;
4970 }
4971 cur = CUR;
4972 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004973 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4974 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004975 }
4976 SKIP_BLANKS;
4977 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004978 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004979 ((cur != '?') || (NXT(1) != '>'))) {
4980 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004981 xmlChar *tmp;
4982
Owen Taylor3473f882001-02-23 17:55:21 +00004983 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004984 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4985 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004986 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004987 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004988 ctxt->instate = state;
4989 return;
4990 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004991 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004992 }
4993 count++;
4994 if (count > 50) {
4995 GROW;
4996 count = 0;
4997 }
4998 COPY_BUF(l,buf,len,cur);
4999 NEXTL(l);
5000 cur = CUR_CHAR(l);
5001 if (cur == 0) {
5002 SHRINK;
5003 GROW;
5004 cur = CUR_CHAR(l);
5005 }
5006 }
5007 buf[len] = 0;
5008 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005009 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5010 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005011 } else {
5012 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005013 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5014 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005015 }
5016 SKIP(2);
5017
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005018#ifdef LIBXML_CATALOG_ENABLED
5019 if (((state == XML_PARSER_MISC) ||
5020 (state == XML_PARSER_START)) &&
5021 (xmlStrEqual(target, XML_CATALOG_PI))) {
5022 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5023 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5024 (allow == XML_CATA_ALLOW_ALL))
5025 xmlParseCatalogPI(ctxt, buf);
5026 }
5027#endif
5028
5029
Owen Taylor3473f882001-02-23 17:55:21 +00005030 /*
5031 * SAX: PI detected.
5032 */
5033 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5034 (ctxt->sax->processingInstruction != NULL))
5035 ctxt->sax->processingInstruction(ctxt->userData,
5036 target, buf);
5037 }
5038 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005039 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005040 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005041 }
Chris Evans77404b82011-12-14 16:18:25 +08005042 if (ctxt->instate != XML_PARSER_EOF)
5043 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005044 }
5045}
5046
5047/**
5048 * xmlParseNotationDecl:
5049 * @ctxt: an XML parser context
5050 *
5051 * parse a notation declaration
5052 *
5053 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5054 *
5055 * Hence there is actually 3 choices:
5056 * 'PUBLIC' S PubidLiteral
5057 * 'PUBLIC' S PubidLiteral S SystemLiteral
5058 * and 'SYSTEM' S SystemLiteral
5059 *
5060 * See the NOTE on xmlParseExternalID().
5061 */
5062
5063void
5064xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005065 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005066 xmlChar *Pubid;
5067 xmlChar *Systemid;
5068
Daniel Veillarda07050d2003-10-19 14:46:32 +00005069 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005070 xmlParserInputPtr input = ctxt->input;
5071 SHRINK;
5072 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005073 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005074 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5075 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005076 return;
5077 }
5078 SKIP_BLANKS;
5079
Daniel Veillard76d66f42001-05-16 21:05:17 +00005080 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005081 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005082 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005083 return;
5084 }
William M. Brack76e95df2003-10-18 16:20:14 +00005085 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005086 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005087 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005088 return;
5089 }
Daniel Veillard37334572008-07-31 08:20:02 +00005090 if (xmlStrchr(name, ':') != NULL) {
5091 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5092 "colon are forbidden from notation names '%s'\n",
5093 name, NULL, NULL);
5094 }
Owen Taylor3473f882001-02-23 17:55:21 +00005095 SKIP_BLANKS;
5096
5097 /*
5098 * Parse the IDs.
5099 */
5100 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5101 SKIP_BLANKS;
5102
5103 if (RAW == '>') {
5104 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005105 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5106 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005107 }
5108 NEXT;
5109 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5110 (ctxt->sax->notationDecl != NULL))
5111 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5112 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005113 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005114 }
Owen Taylor3473f882001-02-23 17:55:21 +00005115 if (Systemid != NULL) xmlFree(Systemid);
5116 if (Pubid != NULL) xmlFree(Pubid);
5117 }
5118}
5119
5120/**
5121 * xmlParseEntityDecl:
5122 * @ctxt: an XML parser context
5123 *
5124 * parse <!ENTITY declarations
5125 *
5126 * [70] EntityDecl ::= GEDecl | PEDecl
5127 *
5128 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5129 *
5130 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5131 *
5132 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5133 *
5134 * [74] PEDef ::= EntityValue | ExternalID
5135 *
5136 * [76] NDataDecl ::= S 'NDATA' S Name
5137 *
5138 * [ VC: Notation Declared ]
5139 * The Name must match the declared name of a notation.
5140 */
5141
5142void
5143xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005144 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005145 xmlChar *value = NULL;
5146 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005147 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005148 int isParameter = 0;
5149 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005150 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005151
Daniel Veillard4c778d82005-01-23 17:37:44 +00005152 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005153 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005154 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005155 SHRINK;
5156 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005157 skipped = SKIP_BLANKS;
5158 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5160 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005161 }
Owen Taylor3473f882001-02-23 17:55:21 +00005162
5163 if (RAW == '%') {
5164 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005165 skipped = SKIP_BLANKS;
5166 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005167 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5168 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005169 }
Owen Taylor3473f882001-02-23 17:55:21 +00005170 isParameter = 1;
5171 }
5172
Daniel Veillard76d66f42001-05-16 21:05:17 +00005173 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005174 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005175 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5176 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005177 return;
5178 }
Daniel Veillard37334572008-07-31 08:20:02 +00005179 if (xmlStrchr(name, ':') != NULL) {
5180 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5181 "colon are forbidden from entities names '%s'\n",
5182 name, NULL, NULL);
5183 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005184 skipped = SKIP_BLANKS;
5185 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005186 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5187 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005188 }
Owen Taylor3473f882001-02-23 17:55:21 +00005189
Daniel Veillardf5582f12002-06-11 10:08:16 +00005190 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005191 /*
5192 * handle the various case of definitions...
5193 */
5194 if (isParameter) {
5195 if ((RAW == '"') || (RAW == '\'')) {
5196 value = xmlParseEntityValue(ctxt, &orig);
5197 if (value) {
5198 if ((ctxt->sax != NULL) &&
5199 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5200 ctxt->sax->entityDecl(ctxt->userData, name,
5201 XML_INTERNAL_PARAMETER_ENTITY,
5202 NULL, NULL, value);
5203 }
5204 } else {
5205 URI = xmlParseExternalID(ctxt, &literal, 1);
5206 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005207 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005208 }
5209 if (URI) {
5210 xmlURIPtr uri;
5211
5212 uri = xmlParseURI((const char *) URI);
5213 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005214 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5215 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005216 /*
5217 * This really ought to be a well formedness error
5218 * but the XML Core WG decided otherwise c.f. issue
5219 * E26 of the XML erratas.
5220 */
Owen Taylor3473f882001-02-23 17:55:21 +00005221 } else {
5222 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005223 /*
5224 * Okay this is foolish to block those but not
5225 * invalid URIs.
5226 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005227 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005228 } else {
5229 if ((ctxt->sax != NULL) &&
5230 (!ctxt->disableSAX) &&
5231 (ctxt->sax->entityDecl != NULL))
5232 ctxt->sax->entityDecl(ctxt->userData, name,
5233 XML_EXTERNAL_PARAMETER_ENTITY,
5234 literal, URI, NULL);
5235 }
5236 xmlFreeURI(uri);
5237 }
5238 }
5239 }
5240 } else {
5241 if ((RAW == '"') || (RAW == '\'')) {
5242 value = xmlParseEntityValue(ctxt, &orig);
5243 if ((ctxt->sax != NULL) &&
5244 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5245 ctxt->sax->entityDecl(ctxt->userData, name,
5246 XML_INTERNAL_GENERAL_ENTITY,
5247 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005248 /*
5249 * For expat compatibility in SAX mode.
5250 */
5251 if ((ctxt->myDoc == NULL) ||
5252 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5253 if (ctxt->myDoc == NULL) {
5254 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005255 if (ctxt->myDoc == NULL) {
5256 xmlErrMemory(ctxt, "New Doc failed");
5257 return;
5258 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005259 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005260 }
5261 if (ctxt->myDoc->intSubset == NULL)
5262 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5263 BAD_CAST "fake", NULL, NULL);
5264
Daniel Veillard1af9a412003-08-20 22:54:39 +00005265 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5266 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005267 }
Owen Taylor3473f882001-02-23 17:55:21 +00005268 } else {
5269 URI = xmlParseExternalID(ctxt, &literal, 1);
5270 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005271 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005272 }
5273 if (URI) {
5274 xmlURIPtr uri;
5275
5276 uri = xmlParseURI((const char *)URI);
5277 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005278 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5279 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005280 /*
5281 * This really ought to be a well formedness error
5282 * but the XML Core WG decided otherwise c.f. issue
5283 * E26 of the XML erratas.
5284 */
Owen Taylor3473f882001-02-23 17:55:21 +00005285 } else {
5286 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005287 /*
5288 * Okay this is foolish to block those but not
5289 * invalid URIs.
5290 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005291 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005292 }
5293 xmlFreeURI(uri);
5294 }
5295 }
William M. Brack76e95df2003-10-18 16:20:14 +00005296 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5298 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005299 }
5300 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005301 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005302 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005303 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005304 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5305 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005306 }
5307 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005308 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005309 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5310 (ctxt->sax->unparsedEntityDecl != NULL))
5311 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5312 literal, URI, ndata);
5313 } else {
5314 if ((ctxt->sax != NULL) &&
5315 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5316 ctxt->sax->entityDecl(ctxt->userData, name,
5317 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5318 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005319 /*
5320 * For expat compatibility in SAX mode.
5321 * assuming the entity repalcement was asked for
5322 */
5323 if ((ctxt->replaceEntities != 0) &&
5324 ((ctxt->myDoc == NULL) ||
5325 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5326 if (ctxt->myDoc == NULL) {
5327 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005328 if (ctxt->myDoc == NULL) {
5329 xmlErrMemory(ctxt, "New Doc failed");
5330 return;
5331 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005332 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005333 }
5334
5335 if (ctxt->myDoc->intSubset == NULL)
5336 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5337 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005338 xmlSAX2EntityDecl(ctxt, name,
5339 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5340 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005341 }
Owen Taylor3473f882001-02-23 17:55:21 +00005342 }
5343 }
5344 }
5345 SKIP_BLANKS;
5346 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005347 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005348 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005349 } else {
5350 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005351 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5352 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005353 }
5354 NEXT;
5355 }
5356 if (orig != NULL) {
5357 /*
5358 * Ugly mechanism to save the raw entity value.
5359 */
5360 xmlEntityPtr cur = NULL;
5361
5362 if (isParameter) {
5363 if ((ctxt->sax != NULL) &&
5364 (ctxt->sax->getParameterEntity != NULL))
5365 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5366 } else {
5367 if ((ctxt->sax != NULL) &&
5368 (ctxt->sax->getEntity != NULL))
5369 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005370 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005371 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005372 }
Owen Taylor3473f882001-02-23 17:55:21 +00005373 }
5374 if (cur != NULL) {
5375 if (cur->orig != NULL)
5376 xmlFree(orig);
5377 else
5378 cur->orig = orig;
5379 } else
5380 xmlFree(orig);
5381 }
Owen Taylor3473f882001-02-23 17:55:21 +00005382 if (value != NULL) xmlFree(value);
5383 if (URI != NULL) xmlFree(URI);
5384 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005385 }
5386}
5387
5388/**
5389 * xmlParseDefaultDecl:
5390 * @ctxt: an XML parser context
5391 * @value: Receive a possible fixed default value for the attribute
5392 *
5393 * Parse an attribute default declaration
5394 *
5395 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5396 *
5397 * [ VC: Required Attribute ]
5398 * if the default declaration is the keyword #REQUIRED, then the
5399 * attribute must be specified for all elements of the type in the
5400 * attribute-list declaration.
5401 *
5402 * [ VC: Attribute Default Legal ]
5403 * The declared default value must meet the lexical constraints of
5404 * the declared attribute type c.f. xmlValidateAttributeDecl()
5405 *
5406 * [ VC: Fixed Attribute Default ]
5407 * if an attribute has a default value declared with the #FIXED
5408 * keyword, instances of that attribute must match the default value.
5409 *
5410 * [ WFC: No < in Attribute Values ]
5411 * handled in xmlParseAttValue()
5412 *
5413 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5414 * or XML_ATTRIBUTE_FIXED.
5415 */
5416
5417int
5418xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5419 int val;
5420 xmlChar *ret;
5421
5422 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005423 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005424 SKIP(9);
5425 return(XML_ATTRIBUTE_REQUIRED);
5426 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005427 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005428 SKIP(8);
5429 return(XML_ATTRIBUTE_IMPLIED);
5430 }
5431 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005432 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005433 SKIP(6);
5434 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005435 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005438 }
5439 SKIP_BLANKS;
5440 }
5441 ret = xmlParseAttValue(ctxt);
5442 ctxt->instate = XML_PARSER_DTD;
5443 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005444 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005445 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005446 } else
5447 *value = ret;
5448 return(val);
5449}
5450
5451/**
5452 * xmlParseNotationType:
5453 * @ctxt: an XML parser context
5454 *
5455 * parse an Notation attribute type.
5456 *
5457 * Note: the leading 'NOTATION' S part has already being parsed...
5458 *
5459 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5460 *
5461 * [ VC: Notation Attributes ]
5462 * Values of this type must match one of the notation names included
5463 * in the declaration; all notation names in the declaration must be declared.
5464 *
5465 * Returns: the notation attribute tree built while parsing
5466 */
5467
5468xmlEnumerationPtr
5469xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005470 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005471 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005472
5473 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005474 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005475 return(NULL);
5476 }
5477 SHRINK;
5478 do {
5479 NEXT;
5480 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005481 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005482 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005483 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5484 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005485 xmlFreeEnumeration(ret);
5486 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005487 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005488 tmp = ret;
5489 while (tmp != NULL) {
5490 if (xmlStrEqual(name, tmp->name)) {
5491 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5492 "standalone: attribute notation value token %s duplicated\n",
5493 name, NULL);
5494 if (!xmlDictOwns(ctxt->dict, name))
5495 xmlFree((xmlChar *) name);
5496 break;
5497 }
5498 tmp = tmp->next;
5499 }
5500 if (tmp == NULL) {
5501 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005502 if (cur == NULL) {
5503 xmlFreeEnumeration(ret);
5504 return(NULL);
5505 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005506 if (last == NULL) ret = last = cur;
5507 else {
5508 last->next = cur;
5509 last = cur;
5510 }
Owen Taylor3473f882001-02-23 17:55:21 +00005511 }
5512 SKIP_BLANKS;
5513 } while (RAW == '|');
5514 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005515 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005516 xmlFreeEnumeration(ret);
5517 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005518 }
5519 NEXT;
5520 return(ret);
5521}
5522
5523/**
5524 * xmlParseEnumerationType:
5525 * @ctxt: an XML parser context
5526 *
5527 * parse an Enumeration attribute type.
5528 *
5529 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5530 *
5531 * [ VC: Enumeration ]
5532 * Values of this type must match one of the Nmtoken tokens in
5533 * the declaration
5534 *
5535 * Returns: the enumeration attribute tree built while parsing
5536 */
5537
5538xmlEnumerationPtr
5539xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5540 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005541 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005542
5543 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005544 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005545 return(NULL);
5546 }
5547 SHRINK;
5548 do {
5549 NEXT;
5550 SKIP_BLANKS;
5551 name = xmlParseNmtoken(ctxt);
5552 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005553 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005554 return(ret);
5555 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005556 tmp = ret;
5557 while (tmp != NULL) {
5558 if (xmlStrEqual(name, tmp->name)) {
5559 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5560 "standalone: attribute enumeration value token %s duplicated\n",
5561 name, NULL);
5562 if (!xmlDictOwns(ctxt->dict, name))
5563 xmlFree(name);
5564 break;
5565 }
5566 tmp = tmp->next;
5567 }
5568 if (tmp == NULL) {
5569 cur = xmlCreateEnumeration(name);
5570 if (!xmlDictOwns(ctxt->dict, name))
5571 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005572 if (cur == NULL) {
5573 xmlFreeEnumeration(ret);
5574 return(NULL);
5575 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005576 if (last == NULL) ret = last = cur;
5577 else {
5578 last->next = cur;
5579 last = cur;
5580 }
Owen Taylor3473f882001-02-23 17:55:21 +00005581 }
5582 SKIP_BLANKS;
5583 } while (RAW == '|');
5584 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005585 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005586 return(ret);
5587 }
5588 NEXT;
5589 return(ret);
5590}
5591
5592/**
5593 * xmlParseEnumeratedType:
5594 * @ctxt: an XML parser context
5595 * @tree: the enumeration tree built while parsing
5596 *
5597 * parse an Enumerated attribute type.
5598 *
5599 * [57] EnumeratedType ::= NotationType | Enumeration
5600 *
5601 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5602 *
5603 *
5604 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5605 */
5606
5607int
5608xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005609 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005610 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005611 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005612 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5613 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005614 return(0);
5615 }
5616 SKIP_BLANKS;
5617 *tree = xmlParseNotationType(ctxt);
5618 if (*tree == NULL) return(0);
5619 return(XML_ATTRIBUTE_NOTATION);
5620 }
5621 *tree = xmlParseEnumerationType(ctxt);
5622 if (*tree == NULL) return(0);
5623 return(XML_ATTRIBUTE_ENUMERATION);
5624}
5625
5626/**
5627 * xmlParseAttributeType:
5628 * @ctxt: an XML parser context
5629 * @tree: the enumeration tree built while parsing
5630 *
5631 * parse the Attribute list def for an element
5632 *
5633 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5634 *
5635 * [55] StringType ::= 'CDATA'
5636 *
5637 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5638 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5639 *
5640 * Validity constraints for attribute values syntax are checked in
5641 * xmlValidateAttributeValue()
5642 *
5643 * [ VC: ID ]
5644 * Values of type ID must match the Name production. A name must not
5645 * appear more than once in an XML document as a value of this type;
5646 * i.e., ID values must uniquely identify the elements which bear them.
5647 *
5648 * [ VC: One ID per Element Type ]
5649 * No element type may have more than one ID attribute specified.
5650 *
5651 * [ VC: ID Attribute Default ]
5652 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5653 *
5654 * [ VC: IDREF ]
5655 * Values of type IDREF must match the Name production, and values
5656 * of type IDREFS must match Names; each IDREF Name must match the value
5657 * of an ID attribute on some element in the XML document; i.e. IDREF
5658 * values must match the value of some ID attribute.
5659 *
5660 * [ VC: Entity Name ]
5661 * Values of type ENTITY must match the Name production, values
5662 * of type ENTITIES must match Names; each Entity Name must match the
5663 * name of an unparsed entity declared in the DTD.
5664 *
5665 * [ VC: Name Token ]
5666 * Values of type NMTOKEN must match the Nmtoken production; values
5667 * of type NMTOKENS must match Nmtokens.
5668 *
5669 * Returns the attribute type
5670 */
5671int
5672xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5673 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005674 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005675 SKIP(5);
5676 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005677 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005678 SKIP(6);
5679 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005680 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005681 SKIP(5);
5682 return(XML_ATTRIBUTE_IDREF);
5683 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5684 SKIP(2);
5685 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005686 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005687 SKIP(6);
5688 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005689 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005690 SKIP(8);
5691 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005692 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005693 SKIP(8);
5694 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005695 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005696 SKIP(7);
5697 return(XML_ATTRIBUTE_NMTOKEN);
5698 }
5699 return(xmlParseEnumeratedType(ctxt, tree));
5700}
5701
5702/**
5703 * xmlParseAttributeListDecl:
5704 * @ctxt: an XML parser context
5705 *
5706 * : parse the Attribute list def for an element
5707 *
5708 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5709 *
5710 * [53] AttDef ::= S Name S AttType S DefaultDecl
5711 *
5712 */
5713void
5714xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005715 const xmlChar *elemName;
5716 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005717 xmlEnumerationPtr tree;
5718
Daniel Veillarda07050d2003-10-19 14:46:32 +00005719 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005720 xmlParserInputPtr input = ctxt->input;
5721
5722 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005723 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005724 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005725 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005726 }
5727 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005728 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005729 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005730 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5731 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005732 return;
5733 }
5734 SKIP_BLANKS;
5735 GROW;
5736 while (RAW != '>') {
5737 const xmlChar *check = CUR_PTR;
5738 int type;
5739 int def;
5740 xmlChar *defaultValue = NULL;
5741
5742 GROW;
5743 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005744 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005745 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5747 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005748 break;
5749 }
5750 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005751 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005752 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005753 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005754 break;
5755 }
5756 SKIP_BLANKS;
5757
5758 type = xmlParseAttributeType(ctxt, &tree);
5759 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005760 break;
5761 }
5762
5763 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005764 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005765 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5766 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005767 if (tree != NULL)
5768 xmlFreeEnumeration(tree);
5769 break;
5770 }
5771 SKIP_BLANKS;
5772
5773 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5774 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005775 if (defaultValue != NULL)
5776 xmlFree(defaultValue);
5777 if (tree != NULL)
5778 xmlFreeEnumeration(tree);
5779 break;
5780 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005781 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5782 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005783
5784 GROW;
5785 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005786 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005787 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005788 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005789 if (defaultValue != NULL)
5790 xmlFree(defaultValue);
5791 if (tree != NULL)
5792 xmlFreeEnumeration(tree);
5793 break;
5794 }
5795 SKIP_BLANKS;
5796 }
5797 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005798 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5799 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005800 if (defaultValue != NULL)
5801 xmlFree(defaultValue);
5802 if (tree != NULL)
5803 xmlFreeEnumeration(tree);
5804 break;
5805 }
5806 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5807 (ctxt->sax->attributeDecl != NULL))
5808 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5809 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005810 else if (tree != NULL)
5811 xmlFreeEnumeration(tree);
5812
5813 if ((ctxt->sax2) && (defaultValue != NULL) &&
5814 (def != XML_ATTRIBUTE_IMPLIED) &&
5815 (def != XML_ATTRIBUTE_REQUIRED)) {
5816 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5817 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005818 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005819 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5820 }
Owen Taylor3473f882001-02-23 17:55:21 +00005821 if (defaultValue != NULL)
5822 xmlFree(defaultValue);
5823 GROW;
5824 }
5825 if (RAW == '>') {
5826 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005827 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5828 "Attribute list declaration doesn't start and stop in the same entity\n",
5829 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005830 }
5831 NEXT;
5832 }
Owen Taylor3473f882001-02-23 17:55:21 +00005833 }
5834}
5835
5836/**
5837 * xmlParseElementMixedContentDecl:
5838 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005839 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005840 *
5841 * parse the declaration for a Mixed Element content
5842 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5843 *
5844 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5845 * '(' S? '#PCDATA' S? ')'
5846 *
5847 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5848 *
5849 * [ VC: No Duplicate Types ]
5850 * The same name must not appear more than once in a single
5851 * mixed-content declaration.
5852 *
5853 * returns: the list of the xmlElementContentPtr describing the element choices
5854 */
5855xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005856xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005857 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005858 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005859
5860 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005861 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005862 SKIP(7);
5863 SKIP_BLANKS;
5864 SHRINK;
5865 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005866 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005867 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5868"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005869 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005870 }
Owen Taylor3473f882001-02-23 17:55:21 +00005871 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005872 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005873 if (ret == NULL)
5874 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005875 if (RAW == '*') {
5876 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5877 NEXT;
5878 }
5879 return(ret);
5880 }
5881 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005882 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005883 if (ret == NULL) return(NULL);
5884 }
5885 while (RAW == '|') {
5886 NEXT;
5887 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005888 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005889 if (ret == NULL) return(NULL);
5890 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005891 if (cur != NULL)
5892 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005893 cur = ret;
5894 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005895 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005896 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005897 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005898 if (n->c1 != NULL)
5899 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005900 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005901 if (n != NULL)
5902 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005903 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005904 }
5905 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005906 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005907 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005908 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005909 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005910 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005911 return(NULL);
5912 }
5913 SKIP_BLANKS;
5914 GROW;
5915 }
5916 if ((RAW == ')') && (NXT(1) == '*')) {
5917 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005918 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005919 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005920 if (cur->c2 != NULL)
5921 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005922 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02005923 if (ret != NULL)
5924 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005925 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005926 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5927"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005928 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005929 }
Owen Taylor3473f882001-02-23 17:55:21 +00005930 SKIP(2);
5931 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005932 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005933 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005934 return(NULL);
5935 }
5936
5937 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005938 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005939 }
5940 return(ret);
5941}
5942
5943/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005944 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005945 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005946 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005947 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005948 *
5949 * parse the declaration for a Mixed Element content
5950 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5951 *
5952 *
5953 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5954 *
5955 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5956 *
5957 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5958 *
5959 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5960 *
5961 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5962 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005963 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005964 * opening or closing parentheses in a choice, seq, or Mixed
5965 * construct is contained in the replacement text for a parameter
5966 * entity, both must be contained in the same replacement text. For
5967 * interoperability, if a parameter-entity reference appears in a
5968 * choice, seq, or Mixed construct, its replacement text should not
5969 * be empty, and neither the first nor last non-blank character of
5970 * the replacement text should be a connector (| or ,).
5971 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005972 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005973 * hierarchy.
5974 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005975static xmlElementContentPtr
5976xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5977 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005978 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005979 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005980 xmlChar type = 0;
5981
Daniel Veillard489f9672009-08-10 16:49:30 +02005982 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5983 (depth > 2048)) {
5984 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5985"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5986 depth);
5987 return(NULL);
5988 }
Owen Taylor3473f882001-02-23 17:55:21 +00005989 SKIP_BLANKS;
5990 GROW;
5991 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005992 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005993
Owen Taylor3473f882001-02-23 17:55:21 +00005994 /* Recurse on first child */
5995 NEXT;
5996 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005997 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5998 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005999 SKIP_BLANKS;
6000 GROW;
6001 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006002 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006003 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006004 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006005 return(NULL);
6006 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006007 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006008 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006009 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006010 return(NULL);
6011 }
Owen Taylor3473f882001-02-23 17:55:21 +00006012 GROW;
6013 if (RAW == '?') {
6014 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6015 NEXT;
6016 } else if (RAW == '*') {
6017 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6018 NEXT;
6019 } else if (RAW == '+') {
6020 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6021 NEXT;
6022 } else {
6023 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6024 }
Owen Taylor3473f882001-02-23 17:55:21 +00006025 GROW;
6026 }
6027 SKIP_BLANKS;
6028 SHRINK;
6029 while (RAW != ')') {
6030 /*
6031 * Each loop we parse one separator and one element.
6032 */
6033 if (RAW == ',') {
6034 if (type == 0) type = CUR;
6035
6036 /*
6037 * Detect "Name | Name , Name" error
6038 */
6039 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006040 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006041 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006042 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006043 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006044 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006045 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006046 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006047 return(NULL);
6048 }
6049 NEXT;
6050
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006051 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006052 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006053 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006054 xmlFreeDocElementContent(ctxt->myDoc, last);
6055 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006056 return(NULL);
6057 }
6058 if (last == NULL) {
6059 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006060 if (ret != NULL)
6061 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006062 ret = cur = op;
6063 } else {
6064 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006065 if (op != NULL)
6066 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006067 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006068 if (last != NULL)
6069 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006070 cur =op;
6071 last = NULL;
6072 }
6073 } else if (RAW == '|') {
6074 if (type == 0) type = CUR;
6075
6076 /*
6077 * Detect "Name , Name | Name" error
6078 */
6079 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006080 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006081 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006082 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006083 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006084 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006085 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006086 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006087 return(NULL);
6088 }
6089 NEXT;
6090
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006091 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006092 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006093 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006094 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006095 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006096 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006097 return(NULL);
6098 }
6099 if (last == NULL) {
6100 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006101 if (ret != NULL)
6102 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006103 ret = cur = op;
6104 } else {
6105 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006106 if (op != NULL)
6107 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006108 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006109 if (last != NULL)
6110 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006111 cur =op;
6112 last = NULL;
6113 }
6114 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006115 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006116 if ((last != NULL) && (last != ret))
6117 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006118 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006119 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006120 return(NULL);
6121 }
6122 GROW;
6123 SKIP_BLANKS;
6124 GROW;
6125 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006126 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006127 /* Recurse on second child */
6128 NEXT;
6129 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006130 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6131 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006132 SKIP_BLANKS;
6133 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006134 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006135 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006136 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006137 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006138 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006139 return(NULL);
6140 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006141 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006142 if (last == NULL) {
6143 if (ret != NULL)
6144 xmlFreeDocElementContent(ctxt->myDoc, ret);
6145 return(NULL);
6146 }
Owen Taylor3473f882001-02-23 17:55:21 +00006147 if (RAW == '?') {
6148 last->ocur = XML_ELEMENT_CONTENT_OPT;
6149 NEXT;
6150 } else if (RAW == '*') {
6151 last->ocur = XML_ELEMENT_CONTENT_MULT;
6152 NEXT;
6153 } else if (RAW == '+') {
6154 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6155 NEXT;
6156 } else {
6157 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6158 }
6159 }
6160 SKIP_BLANKS;
6161 GROW;
6162 }
6163 if ((cur != NULL) && (last != NULL)) {
6164 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006165 if (last != NULL)
6166 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006167 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006168 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006169 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6170"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006171 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006172 }
Owen Taylor3473f882001-02-23 17:55:21 +00006173 NEXT;
6174 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006175 if (ret != NULL) {
6176 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6177 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6178 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6179 else
6180 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6181 }
Owen Taylor3473f882001-02-23 17:55:21 +00006182 NEXT;
6183 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006184 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006185 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006186 cur = ret;
6187 /*
6188 * Some normalization:
6189 * (a | b* | c?)* == (a | b | c)*
6190 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006191 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006192 if ((cur->c1 != NULL) &&
6193 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6194 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6195 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6196 if ((cur->c2 != NULL) &&
6197 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6198 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6199 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6200 cur = cur->c2;
6201 }
6202 }
Owen Taylor3473f882001-02-23 17:55:21 +00006203 NEXT;
6204 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006205 if (ret != NULL) {
6206 int found = 0;
6207
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006208 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6209 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6210 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006211 else
6212 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006213 /*
6214 * Some normalization:
6215 * (a | b*)+ == (a | b)*
6216 * (a | b?)+ == (a | b)*
6217 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006218 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006219 if ((cur->c1 != NULL) &&
6220 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6221 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6222 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6223 found = 1;
6224 }
6225 if ((cur->c2 != NULL) &&
6226 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6227 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6228 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6229 found = 1;
6230 }
6231 cur = cur->c2;
6232 }
6233 if (found)
6234 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6235 }
Owen Taylor3473f882001-02-23 17:55:21 +00006236 NEXT;
6237 }
6238 return(ret);
6239}
6240
6241/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006242 * xmlParseElementChildrenContentDecl:
6243 * @ctxt: an XML parser context
6244 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006245 *
6246 * parse the declaration for a Mixed Element content
6247 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6248 *
6249 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6250 *
6251 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6252 *
6253 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6254 *
6255 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6256 *
6257 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6258 * TODO Parameter-entity replacement text must be properly nested
6259 * with parenthesized groups. That is to say, if either of the
6260 * opening or closing parentheses in a choice, seq, or Mixed
6261 * construct is contained in the replacement text for a parameter
6262 * entity, both must be contained in the same replacement text. For
6263 * interoperability, if a parameter-entity reference appears in a
6264 * choice, seq, or Mixed construct, its replacement text should not
6265 * be empty, and neither the first nor last non-blank character of
6266 * the replacement text should be a connector (| or ,).
6267 *
6268 * Returns the tree of xmlElementContentPtr describing the element
6269 * hierarchy.
6270 */
6271xmlElementContentPtr
6272xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6273 /* stub left for API/ABI compat */
6274 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6275}
6276
6277/**
Owen Taylor3473f882001-02-23 17:55:21 +00006278 * xmlParseElementContentDecl:
6279 * @ctxt: an XML parser context
6280 * @name: the name of the element being defined.
6281 * @result: the Element Content pointer will be stored here if any
6282 *
6283 * parse the declaration for an Element content either Mixed or Children,
6284 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6285 *
6286 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6287 *
6288 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6289 */
6290
6291int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006292xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006293 xmlElementContentPtr *result) {
6294
6295 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006296 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006297 int res;
6298
6299 *result = NULL;
6300
6301 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006302 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006303 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006304 return(-1);
6305 }
6306 NEXT;
6307 GROW;
6308 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006309 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006310 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006311 res = XML_ELEMENT_TYPE_MIXED;
6312 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006313 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006314 res = XML_ELEMENT_TYPE_ELEMENT;
6315 }
Owen Taylor3473f882001-02-23 17:55:21 +00006316 SKIP_BLANKS;
6317 *result = tree;
6318 return(res);
6319}
6320
6321/**
6322 * xmlParseElementDecl:
6323 * @ctxt: an XML parser context
6324 *
6325 * parse an Element declaration.
6326 *
6327 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6328 *
6329 * [ VC: Unique Element Type Declaration ]
6330 * No element type may be declared more than once
6331 *
6332 * Returns the type of the element, or -1 in case of error
6333 */
6334int
6335xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006336 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006337 int ret = -1;
6338 xmlElementContentPtr content = NULL;
6339
Daniel Veillard4c778d82005-01-23 17:37:44 +00006340 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006341 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006342 xmlParserInputPtr input = ctxt->input;
6343
6344 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006345 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006346 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6347 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006348 }
6349 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006350 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006351 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006352 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6353 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006354 return(-1);
6355 }
6356 while ((RAW == 0) && (ctxt->inputNr > 1))
6357 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006358 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006359 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6360 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006361 }
6362 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006363 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006364 SKIP(5);
6365 /*
6366 * Element must always be empty.
6367 */
6368 ret = XML_ELEMENT_TYPE_EMPTY;
6369 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6370 (NXT(2) == 'Y')) {
6371 SKIP(3);
6372 /*
6373 * Element is a generic container.
6374 */
6375 ret = XML_ELEMENT_TYPE_ANY;
6376 } else if (RAW == '(') {
6377 ret = xmlParseElementContentDecl(ctxt, name, &content);
6378 } else {
6379 /*
6380 * [ WFC: PEs in Internal Subset ] error handling.
6381 */
6382 if ((RAW == '%') && (ctxt->external == 0) &&
6383 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006384 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006385 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006386 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006387 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006388 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6389 }
Owen Taylor3473f882001-02-23 17:55:21 +00006390 return(-1);
6391 }
6392
6393 SKIP_BLANKS;
6394 /*
6395 * Pop-up of finished entities.
6396 */
6397 while ((RAW == 0) && (ctxt->inputNr > 1))
6398 xmlPopInput(ctxt);
6399 SKIP_BLANKS;
6400
6401 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006402 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006403 if (content != NULL) {
6404 xmlFreeDocElementContent(ctxt->myDoc, content);
6405 }
Owen Taylor3473f882001-02-23 17:55:21 +00006406 } else {
6407 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006408 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6409 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006410 }
6411
6412 NEXT;
6413 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006414 (ctxt->sax->elementDecl != NULL)) {
6415 if (content != NULL)
6416 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006417 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6418 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006419 if ((content != NULL) && (content->parent == NULL)) {
6420 /*
6421 * this is a trick: if xmlAddElementDecl is called,
6422 * instead of copying the full tree it is plugged directly
6423 * if called from the parser. Avoid duplicating the
6424 * interfaces or change the API/ABI
6425 */
6426 xmlFreeDocElementContent(ctxt->myDoc, content);
6427 }
6428 } else if (content != NULL) {
6429 xmlFreeDocElementContent(ctxt->myDoc, content);
6430 }
Owen Taylor3473f882001-02-23 17:55:21 +00006431 }
Owen Taylor3473f882001-02-23 17:55:21 +00006432 }
6433 return(ret);
6434}
6435
6436/**
Owen Taylor3473f882001-02-23 17:55:21 +00006437 * xmlParseConditionalSections
6438 * @ctxt: an XML parser context
6439 *
6440 * [61] conditionalSect ::= includeSect | ignoreSect
6441 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6442 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6443 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6444 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6445 */
6446
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006447static void
Owen Taylor3473f882001-02-23 17:55:21 +00006448xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006449 int id = ctxt->input->id;
6450
Owen Taylor3473f882001-02-23 17:55:21 +00006451 SKIP(3);
6452 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006453 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006454 SKIP(7);
6455 SKIP_BLANKS;
6456 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006457 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006458 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006459 if (ctxt->input->id != id) {
6460 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6461 "All markup of the conditional section is not in the same entity\n",
6462 NULL, NULL);
6463 }
Owen Taylor3473f882001-02-23 17:55:21 +00006464 NEXT;
6465 }
6466 if (xmlParserDebugEntities) {
6467 if ((ctxt->input != NULL) && (ctxt->input->filename))
6468 xmlGenericError(xmlGenericErrorContext,
6469 "%s(%d): ", ctxt->input->filename,
6470 ctxt->input->line);
6471 xmlGenericError(xmlGenericErrorContext,
6472 "Entering INCLUDE Conditional Section\n");
6473 }
6474
6475 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6476 (NXT(2) != '>'))) {
6477 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006478 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006479
6480 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6481 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006482 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006483 NEXT;
6484 } else if (RAW == '%') {
6485 xmlParsePEReference(ctxt);
6486 } else
6487 xmlParseMarkupDecl(ctxt);
6488
6489 /*
6490 * Pop-up of finished entities.
6491 */
6492 while ((RAW == 0) && (ctxt->inputNr > 1))
6493 xmlPopInput(ctxt);
6494
Daniel Veillardfdc91562002-07-01 21:52:03 +00006495 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006496 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006497 break;
6498 }
6499 }
6500 if (xmlParserDebugEntities) {
6501 if ((ctxt->input != NULL) && (ctxt->input->filename))
6502 xmlGenericError(xmlGenericErrorContext,
6503 "%s(%d): ", ctxt->input->filename,
6504 ctxt->input->line);
6505 xmlGenericError(xmlGenericErrorContext,
6506 "Leaving INCLUDE Conditional Section\n");
6507 }
6508
Daniel Veillarda07050d2003-10-19 14:46:32 +00006509 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006510 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006511 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006512 int depth = 0;
6513
6514 SKIP(6);
6515 SKIP_BLANKS;
6516 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006517 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006518 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006519 if (ctxt->input->id != id) {
6520 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6521 "All markup of the conditional section is not in the same entity\n",
6522 NULL, NULL);
6523 }
Owen Taylor3473f882001-02-23 17:55:21 +00006524 NEXT;
6525 }
6526 if (xmlParserDebugEntities) {
6527 if ((ctxt->input != NULL) && (ctxt->input->filename))
6528 xmlGenericError(xmlGenericErrorContext,
6529 "%s(%d): ", ctxt->input->filename,
6530 ctxt->input->line);
6531 xmlGenericError(xmlGenericErrorContext,
6532 "Entering IGNORE Conditional Section\n");
6533 }
6534
6535 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006536 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006537 * But disable SAX event generating DTD building in the meantime
6538 */
6539 state = ctxt->disableSAX;
6540 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006541 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006542 ctxt->instate = XML_PARSER_IGNORE;
6543
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006544 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006545 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6546 depth++;
6547 SKIP(3);
6548 continue;
6549 }
6550 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6551 if (--depth >= 0) SKIP(3);
6552 continue;
6553 }
6554 NEXT;
6555 continue;
6556 }
6557
6558 ctxt->disableSAX = state;
6559 ctxt->instate = instate;
6560
6561 if (xmlParserDebugEntities) {
6562 if ((ctxt->input != NULL) && (ctxt->input->filename))
6563 xmlGenericError(xmlGenericErrorContext,
6564 "%s(%d): ", ctxt->input->filename,
6565 ctxt->input->line);
6566 xmlGenericError(xmlGenericErrorContext,
6567 "Leaving IGNORE Conditional Section\n");
6568 }
6569
6570 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006571 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006572 }
6573
6574 if (RAW == 0)
6575 SHRINK;
6576
6577 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006578 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006579 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006580 if (ctxt->input->id != id) {
6581 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6582 "All markup of the conditional section is not in the same entity\n",
6583 NULL, NULL);
6584 }
Owen Taylor3473f882001-02-23 17:55:21 +00006585 SKIP(3);
6586 }
6587}
6588
6589/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006590 * xmlParseMarkupDecl:
6591 * @ctxt: an XML parser context
6592 *
6593 * parse Markup declarations
6594 *
6595 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6596 * NotationDecl | PI | Comment
6597 *
6598 * [ VC: Proper Declaration/PE Nesting ]
6599 * Parameter-entity replacement text must be properly nested with
6600 * markup declarations. That is to say, if either the first character
6601 * or the last character of a markup declaration (markupdecl above) is
6602 * contained in the replacement text for a parameter-entity reference,
6603 * both must be contained in the same replacement text.
6604 *
6605 * [ WFC: PEs in Internal Subset ]
6606 * In the internal DTD subset, parameter-entity references can occur
6607 * only where markup declarations can occur, not within markup declarations.
6608 * (This does not apply to references that occur in external parameter
6609 * entities or to the external subset.)
6610 */
6611void
6612xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6613 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006614 if (CUR == '<') {
6615 if (NXT(1) == '!') {
6616 switch (NXT(2)) {
6617 case 'E':
6618 if (NXT(3) == 'L')
6619 xmlParseElementDecl(ctxt);
6620 else if (NXT(3) == 'N')
6621 xmlParseEntityDecl(ctxt);
6622 break;
6623 case 'A':
6624 xmlParseAttributeListDecl(ctxt);
6625 break;
6626 case 'N':
6627 xmlParseNotationDecl(ctxt);
6628 break;
6629 case '-':
6630 xmlParseComment(ctxt);
6631 break;
6632 default:
6633 /* there is an error but it will be detected later */
6634 break;
6635 }
6636 } else if (NXT(1) == '?') {
6637 xmlParsePI(ctxt);
6638 }
6639 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006640 /*
6641 * This is only for internal subset. On external entities,
6642 * the replacement is done before parsing stage
6643 */
6644 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6645 xmlParsePEReference(ctxt);
6646
6647 /*
6648 * Conditional sections are allowed from entities included
6649 * by PE References in the internal subset.
6650 */
6651 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6652 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6653 xmlParseConditionalSections(ctxt);
6654 }
6655 }
6656
6657 ctxt->instate = XML_PARSER_DTD;
6658}
6659
6660/**
6661 * xmlParseTextDecl:
6662 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006663 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006664 * parse an XML declaration header for external entities
6665 *
6666 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006667 */
6668
6669void
6670xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6671 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006672 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006673
6674 /*
6675 * We know that '<?xml' is here.
6676 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006677 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006678 SKIP(5);
6679 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006680 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006681 return;
6682 }
6683
William M. Brack76e95df2003-10-18 16:20:14 +00006684 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6686 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006687 }
6688 SKIP_BLANKS;
6689
6690 /*
6691 * We may have the VersionInfo here.
6692 */
6693 version = xmlParseVersionInfo(ctxt);
6694 if (version == NULL)
6695 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006696 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006697 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006698 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6699 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006700 }
6701 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006702 ctxt->input->version = version;
6703
6704 /*
6705 * We must have the encoding declaration
6706 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006707 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006708 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6709 /*
6710 * The XML REC instructs us to stop parsing right here
6711 */
6712 return;
6713 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006714 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6715 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6716 "Missing encoding in text declaration\n");
6717 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006718
6719 SKIP_BLANKS;
6720 if ((RAW == '?') && (NXT(1) == '>')) {
6721 SKIP(2);
6722 } else if (RAW == '>') {
6723 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006724 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006725 NEXT;
6726 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006727 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006728 MOVETO_ENDTAG(CUR_PTR);
6729 NEXT;
6730 }
6731}
6732
6733/**
Owen Taylor3473f882001-02-23 17:55:21 +00006734 * xmlParseExternalSubset:
6735 * @ctxt: an XML parser context
6736 * @ExternalID: the external identifier
6737 * @SystemID: the system identifier (or URL)
6738 *
6739 * parse Markup declarations from an external subset
6740 *
6741 * [30] extSubset ::= textDecl? extSubsetDecl
6742 *
6743 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6744 */
6745void
6746xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6747 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006748 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006749 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006750
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006751 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006752 (ctxt->input->end - ctxt->input->cur >= 4)) {
6753 xmlChar start[4];
6754 xmlCharEncoding enc;
6755
6756 start[0] = RAW;
6757 start[1] = NXT(1);
6758 start[2] = NXT(2);
6759 start[3] = NXT(3);
6760 enc = xmlDetectCharEncoding(start, 4);
6761 if (enc != XML_CHAR_ENCODING_NONE)
6762 xmlSwitchEncoding(ctxt, enc);
6763 }
6764
Daniel Veillarda07050d2003-10-19 14:46:32 +00006765 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006766 xmlParseTextDecl(ctxt);
6767 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6768 /*
6769 * The XML REC instructs us to stop parsing right here
6770 */
6771 ctxt->instate = XML_PARSER_EOF;
6772 return;
6773 }
6774 }
6775 if (ctxt->myDoc == NULL) {
6776 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006777 if (ctxt->myDoc == NULL) {
6778 xmlErrMemory(ctxt, "New Doc failed");
6779 return;
6780 }
6781 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006782 }
6783 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6784 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6785
6786 ctxt->instate = XML_PARSER_DTD;
6787 ctxt->external = 1;
6788 while (((RAW == '<') && (NXT(1) == '?')) ||
6789 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006790 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006791 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006792 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006793
6794 GROW;
6795 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6796 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006797 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006798 NEXT;
6799 } else if (RAW == '%') {
6800 xmlParsePEReference(ctxt);
6801 } else
6802 xmlParseMarkupDecl(ctxt);
6803
6804 /*
6805 * Pop-up of finished entities.
6806 */
6807 while ((RAW == 0) && (ctxt->inputNr > 1))
6808 xmlPopInput(ctxt);
6809
Daniel Veillardfdc91562002-07-01 21:52:03 +00006810 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006811 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006812 break;
6813 }
6814 }
6815
6816 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006817 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006818 }
6819
6820}
6821
6822/**
6823 * xmlParseReference:
6824 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006825 *
Owen Taylor3473f882001-02-23 17:55:21 +00006826 * parse and handle entity references in content, depending on the SAX
6827 * interface, this may end-up in a call to character() if this is a
6828 * CharRef, a predefined entity, if there is no reference() callback.
6829 * or if the parser was asked to switch to that mode.
6830 *
6831 * [67] Reference ::= EntityRef | CharRef
6832 */
6833void
6834xmlParseReference(xmlParserCtxtPtr ctxt) {
6835 xmlEntityPtr ent;
6836 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006837 int was_checked;
6838 xmlNodePtr list = NULL;
6839 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006840
Daniel Veillard0161e632008-08-28 15:36:32 +00006841
6842 if (RAW != '&')
6843 return;
6844
6845 /*
6846 * Simple case of a CharRef
6847 */
Owen Taylor3473f882001-02-23 17:55:21 +00006848 if (NXT(1) == '#') {
6849 int i = 0;
6850 xmlChar out[10];
6851 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006852 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006853
Daniel Veillarddc171602008-03-26 17:41:38 +00006854 if (value == 0)
6855 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006856 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6857 /*
6858 * So we are using non-UTF-8 buffers
6859 * Check that the char fit on 8bits, if not
6860 * generate a CharRef.
6861 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006862 if (value <= 0xFF) {
6863 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006864 out[1] = 0;
6865 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6866 (!ctxt->disableSAX))
6867 ctxt->sax->characters(ctxt->userData, out, 1);
6868 } else {
6869 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006870 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006871 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006872 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006873 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6874 (!ctxt->disableSAX))
6875 ctxt->sax->reference(ctxt->userData, out);
6876 }
6877 } else {
6878 /*
6879 * Just encode the value in UTF-8
6880 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006881 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006882 out[i] = 0;
6883 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6884 (!ctxt->disableSAX))
6885 ctxt->sax->characters(ctxt->userData, out, i);
6886 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006887 return;
6888 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006889
Daniel Veillard0161e632008-08-28 15:36:32 +00006890 /*
6891 * We are seeing an entity reference
6892 */
6893 ent = xmlParseEntityRef(ctxt);
6894 if (ent == NULL) return;
6895 if (!ctxt->wellFormed)
6896 return;
6897 was_checked = ent->checked;
6898
6899 /* special case of predefined entities */
6900 if ((ent->name == NULL) ||
6901 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6902 val = ent->content;
6903 if (val == NULL) return;
6904 /*
6905 * inline the entity.
6906 */
6907 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6908 (!ctxt->disableSAX))
6909 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6910 return;
6911 }
6912
6913 /*
6914 * The first reference to the entity trigger a parsing phase
6915 * where the ent->children is filled with the result from
6916 * the parsing.
6917 */
6918 if (ent->checked == 0) {
6919 unsigned long oldnbent = ctxt->nbentities;
6920
6921 /*
6922 * This is a bit hackish but this seems the best
6923 * way to make sure both SAX and DOM entity support
6924 * behaves okay.
6925 */
6926 void *user_data;
6927 if (ctxt->userData == ctxt)
6928 user_data = NULL;
6929 else
6930 user_data = ctxt->userData;
6931
6932 /*
6933 * Check that this entity is well formed
6934 * 4.3.2: An internal general parsed entity is well-formed
6935 * if its replacement text matches the production labeled
6936 * content.
6937 */
6938 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6939 ctxt->depth++;
6940 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6941 user_data, &list);
6942 ctxt->depth--;
6943
6944 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6945 ctxt->depth++;
6946 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6947 user_data, ctxt->depth, ent->URI,
6948 ent->ExternalID, &list);
6949 ctxt->depth--;
6950 } else {
6951 ret = XML_ERR_ENTITY_PE_INTERNAL;
6952 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6953 "invalid entity type found\n", NULL);
6954 }
6955
6956 /*
6957 * Store the number of entities needing parsing for this entity
6958 * content and do checkings
6959 */
6960 ent->checked = ctxt->nbentities - oldnbent;
6961 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006962 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006963 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006964 return;
6965 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006966 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6967 xmlFreeNodeList(list);
6968 return;
6969 }
Owen Taylor3473f882001-02-23 17:55:21 +00006970
Daniel Veillard0161e632008-08-28 15:36:32 +00006971 if ((ret == XML_ERR_OK) && (list != NULL)) {
6972 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6973 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6974 (ent->children == NULL)) {
6975 ent->children = list;
6976 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006977 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006978 * Prune it directly in the generated document
6979 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006980 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006981 if (((list->type == XML_TEXT_NODE) &&
6982 (list->next == NULL)) ||
6983 (ctxt->parseMode == XML_PARSE_READER)) {
6984 list->parent = (xmlNodePtr) ent;
6985 list = NULL;
6986 ent->owner = 1;
6987 } else {
6988 ent->owner = 0;
6989 while (list != NULL) {
6990 list->parent = (xmlNodePtr) ctxt->node;
6991 list->doc = ctxt->myDoc;
6992 if (list->next == NULL)
6993 ent->last = list;
6994 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006995 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006996 list = ent->children;
6997#ifdef LIBXML_LEGACY_ENABLED
6998 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6999 xmlAddEntityReference(ent, list, NULL);
7000#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007001 }
7002 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007003 ent->owner = 1;
7004 while (list != NULL) {
7005 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007006 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007007 if (list->next == NULL)
7008 ent->last = list;
7009 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007010 }
7011 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007012 } else {
7013 xmlFreeNodeList(list);
7014 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007015 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007016 } else if ((ret != XML_ERR_OK) &&
7017 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7018 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7019 "Entity '%s' failed to parse\n", ent->name);
7020 } else if (list != NULL) {
7021 xmlFreeNodeList(list);
7022 list = NULL;
7023 }
7024 if (ent->checked == 0)
7025 ent->checked = 1;
7026 } else if (ent->checked != 1) {
7027 ctxt->nbentities += ent->checked;
7028 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007029
Daniel Veillard0161e632008-08-28 15:36:32 +00007030 /*
7031 * Now that the entity content has been gathered
7032 * provide it to the application, this can take different forms based
7033 * on the parsing modes.
7034 */
7035 if (ent->children == NULL) {
7036 /*
7037 * Probably running in SAX mode and the callbacks don't
7038 * build the entity content. So unless we already went
7039 * though parsing for first checking go though the entity
7040 * content to generate callbacks associated to the entity
7041 */
7042 if (was_checked != 0) {
7043 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007044 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007045 * This is a bit hackish but this seems the best
7046 * way to make sure both SAX and DOM entity support
7047 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007048 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007049 if (ctxt->userData == ctxt)
7050 user_data = NULL;
7051 else
7052 user_data = ctxt->userData;
7053
7054 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7055 ctxt->depth++;
7056 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7057 ent->content, user_data, NULL);
7058 ctxt->depth--;
7059 } else if (ent->etype ==
7060 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7061 ctxt->depth++;
7062 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7063 ctxt->sax, user_data, ctxt->depth,
7064 ent->URI, ent->ExternalID, NULL);
7065 ctxt->depth--;
7066 } else {
7067 ret = XML_ERR_ENTITY_PE_INTERNAL;
7068 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7069 "invalid entity type found\n", NULL);
7070 }
7071 if (ret == XML_ERR_ENTITY_LOOP) {
7072 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7073 return;
7074 }
7075 }
7076 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7077 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7078 /*
7079 * Entity reference callback comes second, it's somewhat
7080 * superfluous but a compatibility to historical behaviour
7081 */
7082 ctxt->sax->reference(ctxt->userData, ent->name);
7083 }
7084 return;
7085 }
7086
7087 /*
7088 * If we didn't get any children for the entity being built
7089 */
7090 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7091 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7092 /*
7093 * Create a node.
7094 */
7095 ctxt->sax->reference(ctxt->userData, ent->name);
7096 return;
7097 }
7098
7099 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7100 /*
7101 * There is a problem on the handling of _private for entities
7102 * (bug 155816): Should we copy the content of the field from
7103 * the entity (possibly overwriting some value set by the user
7104 * when a copy is created), should we leave it alone, or should
7105 * we try to take care of different situations? The problem
7106 * is exacerbated by the usage of this field by the xmlReader.
7107 * To fix this bug, we look at _private on the created node
7108 * and, if it's NULL, we copy in whatever was in the entity.
7109 * If it's not NULL we leave it alone. This is somewhat of a
7110 * hack - maybe we should have further tests to determine
7111 * what to do.
7112 */
7113 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7114 /*
7115 * Seems we are generating the DOM content, do
7116 * a simple tree copy for all references except the first
7117 * In the first occurrence list contains the replacement.
7118 * progressive == 2 means we are operating on the Reader
7119 * and since nodes are discarded we must copy all the time.
7120 */
7121 if (((list == NULL) && (ent->owner == 0)) ||
7122 (ctxt->parseMode == XML_PARSE_READER)) {
7123 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7124
7125 /*
7126 * when operating on a reader, the entities definitions
7127 * are always owning the entities subtree.
7128 if (ctxt->parseMode == XML_PARSE_READER)
7129 ent->owner = 1;
7130 */
7131
7132 cur = ent->children;
7133 while (cur != NULL) {
7134 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7135 if (nw != NULL) {
7136 if (nw->_private == NULL)
7137 nw->_private = cur->_private;
7138 if (firstChild == NULL){
7139 firstChild = nw;
7140 }
7141 nw = xmlAddChild(ctxt->node, nw);
7142 }
7143 if (cur == ent->last) {
7144 /*
7145 * needed to detect some strange empty
7146 * node cases in the reader tests
7147 */
7148 if ((ctxt->parseMode == XML_PARSE_READER) &&
7149 (nw != NULL) &&
7150 (nw->type == XML_ELEMENT_NODE) &&
7151 (nw->children == NULL))
7152 nw->extra = 1;
7153
7154 break;
7155 }
7156 cur = cur->next;
7157 }
7158#ifdef LIBXML_LEGACY_ENABLED
7159 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7160 xmlAddEntityReference(ent, firstChild, nw);
7161#endif /* LIBXML_LEGACY_ENABLED */
7162 } else if (list == NULL) {
7163 xmlNodePtr nw = NULL, cur, next, last,
7164 firstChild = NULL;
7165 /*
7166 * Copy the entity child list and make it the new
7167 * entity child list. The goal is to make sure any
7168 * ID or REF referenced will be the one from the
7169 * document content and not the entity copy.
7170 */
7171 cur = ent->children;
7172 ent->children = NULL;
7173 last = ent->last;
7174 ent->last = NULL;
7175 while (cur != NULL) {
7176 next = cur->next;
7177 cur->next = NULL;
7178 cur->parent = NULL;
7179 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7180 if (nw != NULL) {
7181 if (nw->_private == NULL)
7182 nw->_private = cur->_private;
7183 if (firstChild == NULL){
7184 firstChild = cur;
7185 }
7186 xmlAddChild((xmlNodePtr) ent, nw);
7187 xmlAddChild(ctxt->node, cur);
7188 }
7189 if (cur == last)
7190 break;
7191 cur = next;
7192 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007193 if (ent->owner == 0)
7194 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007195#ifdef LIBXML_LEGACY_ENABLED
7196 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7197 xmlAddEntityReference(ent, firstChild, nw);
7198#endif /* LIBXML_LEGACY_ENABLED */
7199 } else {
7200 const xmlChar *nbktext;
7201
7202 /*
7203 * the name change is to avoid coalescing of the
7204 * node with a possible previous text one which
7205 * would make ent->children a dangling pointer
7206 */
7207 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7208 -1);
7209 if (ent->children->type == XML_TEXT_NODE)
7210 ent->children->name = nbktext;
7211 if ((ent->last != ent->children) &&
7212 (ent->last->type == XML_TEXT_NODE))
7213 ent->last->name = nbktext;
7214 xmlAddChildList(ctxt->node, ent->children);
7215 }
7216
7217 /*
7218 * This is to avoid a nasty side effect, see
7219 * characters() in SAX.c
7220 */
7221 ctxt->nodemem = 0;
7222 ctxt->nodelen = 0;
7223 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007224 }
7225 }
7226}
7227
7228/**
7229 * xmlParseEntityRef:
7230 * @ctxt: an XML parser context
7231 *
7232 * parse ENTITY references declarations
7233 *
7234 * [68] EntityRef ::= '&' Name ';'
7235 *
7236 * [ WFC: Entity Declared ]
7237 * In a document without any DTD, a document with only an internal DTD
7238 * subset which contains no parameter entity references, or a document
7239 * with "standalone='yes'", the Name given in the entity reference
7240 * must match that in an entity declaration, except that well-formed
7241 * documents need not declare any of the following entities: amp, lt,
7242 * gt, apos, quot. The declaration of a parameter entity must precede
7243 * any reference to it. Similarly, the declaration of a general entity
7244 * must precede any reference to it which appears in a default value in an
7245 * attribute-list declaration. Note that if entities are declared in the
7246 * external subset or in external parameter entities, a non-validating
7247 * processor is not obligated to read and process their declarations;
7248 * for such documents, the rule that an entity must be declared is a
7249 * well-formedness constraint only if standalone='yes'.
7250 *
7251 * [ WFC: Parsed Entity ]
7252 * An entity reference must not contain the name of an unparsed entity
7253 *
7254 * Returns the xmlEntityPtr if found, or NULL otherwise.
7255 */
7256xmlEntityPtr
7257xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007258 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007259 xmlEntityPtr ent = NULL;
7260
7261 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007262
Daniel Veillard0161e632008-08-28 15:36:32 +00007263 if (RAW != '&')
7264 return(NULL);
7265 NEXT;
7266 name = xmlParseName(ctxt);
7267 if (name == NULL) {
7268 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7269 "xmlParseEntityRef: no name\n");
7270 return(NULL);
7271 }
7272 if (RAW != ';') {
7273 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7274 return(NULL);
7275 }
7276 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007277
Daniel Veillard0161e632008-08-28 15:36:32 +00007278 /*
7279 * Predefined entites override any extra definition
7280 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007281 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7282 ent = xmlGetPredefinedEntity(name);
7283 if (ent != NULL)
7284 return(ent);
7285 }
Owen Taylor3473f882001-02-23 17:55:21 +00007286
Daniel Veillard0161e632008-08-28 15:36:32 +00007287 /*
7288 * Increate the number of entity references parsed
7289 */
7290 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007291
Daniel Veillard0161e632008-08-28 15:36:32 +00007292 /*
7293 * Ask first SAX for entity resolution, otherwise try the
7294 * entities which may have stored in the parser context.
7295 */
7296 if (ctxt->sax != NULL) {
7297 if (ctxt->sax->getEntity != NULL)
7298 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007299 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7300 (ctxt->options & XML_PARSE_OLDSAX))
7301 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007302 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7303 (ctxt->userData==ctxt)) {
7304 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007305 }
7306 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007307 /*
7308 * [ WFC: Entity Declared ]
7309 * In a document without any DTD, a document with only an
7310 * internal DTD subset which contains no parameter entity
7311 * references, or a document with "standalone='yes'", the
7312 * Name given in the entity reference must match that in an
7313 * entity declaration, except that well-formed documents
7314 * need not declare any of the following entities: amp, lt,
7315 * gt, apos, quot.
7316 * The declaration of a parameter entity must precede any
7317 * reference to it.
7318 * Similarly, the declaration of a general entity must
7319 * precede any reference to it which appears in a default
7320 * value in an attribute-list declaration. Note that if
7321 * entities are declared in the external subset or in
7322 * external parameter entities, a non-validating processor
7323 * is not obligated to read and process their declarations;
7324 * for such documents, the rule that an entity must be
7325 * declared is a well-formedness constraint only if
7326 * standalone='yes'.
7327 */
7328 if (ent == NULL) {
7329 if ((ctxt->standalone == 1) ||
7330 ((ctxt->hasExternalSubset == 0) &&
7331 (ctxt->hasPErefs == 0))) {
7332 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7333 "Entity '%s' not defined\n", name);
7334 } else {
7335 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7336 "Entity '%s' not defined\n", name);
7337 if ((ctxt->inSubset == 0) &&
7338 (ctxt->sax != NULL) &&
7339 (ctxt->sax->reference != NULL)) {
7340 ctxt->sax->reference(ctxt->userData, name);
7341 }
7342 }
7343 ctxt->valid = 0;
7344 }
7345
7346 /*
7347 * [ WFC: Parsed Entity ]
7348 * An entity reference must not contain the name of an
7349 * unparsed entity
7350 */
7351 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7352 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7353 "Entity reference to unparsed entity %s\n", name);
7354 }
7355
7356 /*
7357 * [ WFC: No External Entity References ]
7358 * Attribute values cannot contain direct or indirect
7359 * entity references to external entities.
7360 */
7361 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7362 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7363 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7364 "Attribute references external entity '%s'\n", name);
7365 }
7366 /*
7367 * [ WFC: No < in Attribute Values ]
7368 * The replacement text of any entity referred to directly or
7369 * indirectly in an attribute value (other than "&lt;") must
7370 * not contain a <.
7371 */
7372 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7373 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007374 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007375 (xmlStrchr(ent->content, '<'))) {
7376 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7377 "'<' in entity '%s' is not allowed in attributes values\n", name);
7378 }
7379
7380 /*
7381 * Internal check, no parameter entities here ...
7382 */
7383 else {
7384 switch (ent->etype) {
7385 case XML_INTERNAL_PARAMETER_ENTITY:
7386 case XML_EXTERNAL_PARAMETER_ENTITY:
7387 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7388 "Attempt to reference the parameter entity '%s'\n",
7389 name);
7390 break;
7391 default:
7392 break;
7393 }
7394 }
7395
7396 /*
7397 * [ WFC: No Recursion ]
7398 * A parsed entity must not contain a recursive reference
7399 * to itself, either directly or indirectly.
7400 * Done somewhere else
7401 */
Owen Taylor3473f882001-02-23 17:55:21 +00007402 return(ent);
7403}
7404
7405/**
7406 * xmlParseStringEntityRef:
7407 * @ctxt: an XML parser context
7408 * @str: a pointer to an index in the string
7409 *
7410 * parse ENTITY references declarations, but this version parses it from
7411 * a string value.
7412 *
7413 * [68] EntityRef ::= '&' Name ';'
7414 *
7415 * [ WFC: Entity Declared ]
7416 * In a document without any DTD, a document with only an internal DTD
7417 * subset which contains no parameter entity references, or a document
7418 * with "standalone='yes'", the Name given in the entity reference
7419 * must match that in an entity declaration, except that well-formed
7420 * documents need not declare any of the following entities: amp, lt,
7421 * gt, apos, quot. The declaration of a parameter entity must precede
7422 * any reference to it. Similarly, the declaration of a general entity
7423 * must precede any reference to it which appears in a default value in an
7424 * attribute-list declaration. Note that if entities are declared in the
7425 * external subset or in external parameter entities, a non-validating
7426 * processor is not obligated to read and process their declarations;
7427 * for such documents, the rule that an entity must be declared is a
7428 * well-formedness constraint only if standalone='yes'.
7429 *
7430 * [ WFC: Parsed Entity ]
7431 * An entity reference must not contain the name of an unparsed entity
7432 *
7433 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7434 * is updated to the current location in the string.
7435 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007436static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007437xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7438 xmlChar *name;
7439 const xmlChar *ptr;
7440 xmlChar cur;
7441 xmlEntityPtr ent = NULL;
7442
7443 if ((str == NULL) || (*str == NULL))
7444 return(NULL);
7445 ptr = *str;
7446 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007447 if (cur != '&')
7448 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007449
Daniel Veillard0161e632008-08-28 15:36:32 +00007450 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007451 name = xmlParseStringName(ctxt, &ptr);
7452 if (name == NULL) {
7453 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7454 "xmlParseStringEntityRef: no name\n");
7455 *str = ptr;
7456 return(NULL);
7457 }
7458 if (*ptr != ';') {
7459 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007460 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007461 *str = ptr;
7462 return(NULL);
7463 }
7464 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007465
Owen Taylor3473f882001-02-23 17:55:21 +00007466
Daniel Veillard0161e632008-08-28 15:36:32 +00007467 /*
7468 * Predefined entites override any extra definition
7469 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007470 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7471 ent = xmlGetPredefinedEntity(name);
7472 if (ent != NULL) {
7473 xmlFree(name);
7474 *str = ptr;
7475 return(ent);
7476 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007477 }
Owen Taylor3473f882001-02-23 17:55:21 +00007478
Daniel Veillard0161e632008-08-28 15:36:32 +00007479 /*
7480 * Increate the number of entity references parsed
7481 */
7482 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007483
Daniel Veillard0161e632008-08-28 15:36:32 +00007484 /*
7485 * Ask first SAX for entity resolution, otherwise try the
7486 * entities which may have stored in the parser context.
7487 */
7488 if (ctxt->sax != NULL) {
7489 if (ctxt->sax->getEntity != NULL)
7490 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007491 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7492 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007493 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7494 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007495 }
7496 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007497
7498 /*
7499 * [ WFC: Entity Declared ]
7500 * In a document without any DTD, a document with only an
7501 * internal DTD subset which contains no parameter entity
7502 * references, or a document with "standalone='yes'", the
7503 * Name given in the entity reference must match that in an
7504 * entity declaration, except that well-formed documents
7505 * need not declare any of the following entities: amp, lt,
7506 * gt, apos, quot.
7507 * The declaration of a parameter entity must precede any
7508 * reference to it.
7509 * Similarly, the declaration of a general entity must
7510 * precede any reference to it which appears in a default
7511 * value in an attribute-list declaration. Note that if
7512 * entities are declared in the external subset or in
7513 * external parameter entities, a non-validating processor
7514 * is not obligated to read and process their declarations;
7515 * for such documents, the rule that an entity must be
7516 * declared is a well-formedness constraint only if
7517 * standalone='yes'.
7518 */
7519 if (ent == NULL) {
7520 if ((ctxt->standalone == 1) ||
7521 ((ctxt->hasExternalSubset == 0) &&
7522 (ctxt->hasPErefs == 0))) {
7523 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7524 "Entity '%s' not defined\n", name);
7525 } else {
7526 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7527 "Entity '%s' not defined\n",
7528 name);
7529 }
7530 /* TODO ? check regressions ctxt->valid = 0; */
7531 }
7532
7533 /*
7534 * [ WFC: Parsed Entity ]
7535 * An entity reference must not contain the name of an
7536 * unparsed entity
7537 */
7538 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7539 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7540 "Entity reference to unparsed entity %s\n", name);
7541 }
7542
7543 /*
7544 * [ WFC: No External Entity References ]
7545 * Attribute values cannot contain direct or indirect
7546 * entity references to external entities.
7547 */
7548 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7549 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7550 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7551 "Attribute references external entity '%s'\n", name);
7552 }
7553 /*
7554 * [ WFC: No < in Attribute Values ]
7555 * The replacement text of any entity referred to directly or
7556 * indirectly in an attribute value (other than "&lt;") must
7557 * not contain a <.
7558 */
7559 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7560 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007561 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007562 (xmlStrchr(ent->content, '<'))) {
7563 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7564 "'<' in entity '%s' is not allowed in attributes values\n",
7565 name);
7566 }
7567
7568 /*
7569 * Internal check, no parameter entities here ...
7570 */
7571 else {
7572 switch (ent->etype) {
7573 case XML_INTERNAL_PARAMETER_ENTITY:
7574 case XML_EXTERNAL_PARAMETER_ENTITY:
7575 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7576 "Attempt to reference the parameter entity '%s'\n",
7577 name);
7578 break;
7579 default:
7580 break;
7581 }
7582 }
7583
7584 /*
7585 * [ WFC: No Recursion ]
7586 * A parsed entity must not contain a recursive reference
7587 * to itself, either directly or indirectly.
7588 * Done somewhere else
7589 */
7590
7591 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007592 *str = ptr;
7593 return(ent);
7594}
7595
7596/**
7597 * xmlParsePEReference:
7598 * @ctxt: an XML parser context
7599 *
7600 * parse PEReference declarations
7601 * The entity content is handled directly by pushing it's content as
7602 * a new input stream.
7603 *
7604 * [69] PEReference ::= '%' Name ';'
7605 *
7606 * [ WFC: No Recursion ]
7607 * A parsed entity must not contain a recursive
7608 * reference to itself, either directly or indirectly.
7609 *
7610 * [ WFC: Entity Declared ]
7611 * In a document without any DTD, a document with only an internal DTD
7612 * subset which contains no parameter entity references, or a document
7613 * with "standalone='yes'", ... ... The declaration of a parameter
7614 * entity must precede any reference to it...
7615 *
7616 * [ VC: Entity Declared ]
7617 * In a document with an external subset or external parameter entities
7618 * with "standalone='no'", ... ... The declaration of a parameter entity
7619 * must precede any reference to it...
7620 *
7621 * [ WFC: In DTD ]
7622 * Parameter-entity references may only appear in the DTD.
7623 * NOTE: misleading but this is handled.
7624 */
7625void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007626xmlParsePEReference(xmlParserCtxtPtr ctxt)
7627{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007628 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007629 xmlEntityPtr entity = NULL;
7630 xmlParserInputPtr input;
7631
Daniel Veillard0161e632008-08-28 15:36:32 +00007632 if (RAW != '%')
7633 return;
7634 NEXT;
7635 name = xmlParseName(ctxt);
7636 if (name == NULL) {
7637 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7638 "xmlParsePEReference: no name\n");
7639 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007640 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007641 if (RAW != ';') {
7642 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7643 return;
7644 }
7645
7646 NEXT;
7647
7648 /*
7649 * Increate the number of entity references parsed
7650 */
7651 ctxt->nbentities++;
7652
7653 /*
7654 * Request the entity from SAX
7655 */
7656 if ((ctxt->sax != NULL) &&
7657 (ctxt->sax->getParameterEntity != NULL))
7658 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7659 name);
7660 if (entity == NULL) {
7661 /*
7662 * [ WFC: Entity Declared ]
7663 * In a document without any DTD, a document with only an
7664 * internal DTD subset which contains no parameter entity
7665 * references, or a document with "standalone='yes'", ...
7666 * ... The declaration of a parameter entity must precede
7667 * any reference to it...
7668 */
7669 if ((ctxt->standalone == 1) ||
7670 ((ctxt->hasExternalSubset == 0) &&
7671 (ctxt->hasPErefs == 0))) {
7672 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7673 "PEReference: %%%s; not found\n",
7674 name);
7675 } else {
7676 /*
7677 * [ VC: Entity Declared ]
7678 * In a document with an external subset or external
7679 * parameter entities with "standalone='no'", ...
7680 * ... The declaration of a parameter entity must
7681 * precede any reference to it...
7682 */
7683 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7684 "PEReference: %%%s; not found\n",
7685 name, NULL);
7686 ctxt->valid = 0;
7687 }
7688 } else {
7689 /*
7690 * Internal checking in case the entity quest barfed
7691 */
7692 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7693 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7694 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7695 "Internal: %%%s; is not a parameter entity\n",
7696 name, NULL);
7697 } else if (ctxt->input->free != deallocblankswrapper) {
7698 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7699 if (xmlPushInput(ctxt, input) < 0)
7700 return;
7701 } else {
7702 /*
7703 * TODO !!!
7704 * handle the extra spaces added before and after
7705 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7706 */
7707 input = xmlNewEntityInputStream(ctxt, entity);
7708 if (xmlPushInput(ctxt, input) < 0)
7709 return;
7710 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7711 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7712 (IS_BLANK_CH(NXT(5)))) {
7713 xmlParseTextDecl(ctxt);
7714 if (ctxt->errNo ==
7715 XML_ERR_UNSUPPORTED_ENCODING) {
7716 /*
7717 * The XML REC instructs us to stop parsing
7718 * right here
7719 */
7720 ctxt->instate = XML_PARSER_EOF;
7721 return;
7722 }
7723 }
7724 }
7725 }
7726 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007727}
7728
7729/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007730 * xmlLoadEntityContent:
7731 * @ctxt: an XML parser context
7732 * @entity: an unloaded system entity
7733 *
7734 * Load the original content of the given system entity from the
7735 * ExternalID/SystemID given. This is to be used for Included in Literal
7736 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7737 *
7738 * Returns 0 in case of success and -1 in case of failure
7739 */
7740static int
7741xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7742 xmlParserInputPtr input;
7743 xmlBufferPtr buf;
7744 int l, c;
7745 int count = 0;
7746
7747 if ((ctxt == NULL) || (entity == NULL) ||
7748 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7749 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7750 (entity->content != NULL)) {
7751 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7752 "xmlLoadEntityContent parameter error");
7753 return(-1);
7754 }
7755
7756 if (xmlParserDebugEntities)
7757 xmlGenericError(xmlGenericErrorContext,
7758 "Reading %s entity content input\n", entity->name);
7759
7760 buf = xmlBufferCreate();
7761 if (buf == NULL) {
7762 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7763 "xmlLoadEntityContent parameter error");
7764 return(-1);
7765 }
7766
7767 input = xmlNewEntityInputStream(ctxt, entity);
7768 if (input == NULL) {
7769 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7770 "xmlLoadEntityContent input error");
7771 xmlBufferFree(buf);
7772 return(-1);
7773 }
7774
7775 /*
7776 * Push the entity as the current input, read char by char
7777 * saving to the buffer until the end of the entity or an error
7778 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007779 if (xmlPushInput(ctxt, input) < 0) {
7780 xmlBufferFree(buf);
7781 return(-1);
7782 }
7783
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007784 GROW;
7785 c = CUR_CHAR(l);
7786 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7787 (IS_CHAR(c))) {
7788 xmlBufferAdd(buf, ctxt->input->cur, l);
7789 if (count++ > 100) {
7790 count = 0;
7791 GROW;
7792 }
7793 NEXTL(l);
7794 c = CUR_CHAR(l);
7795 }
7796
7797 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7798 xmlPopInput(ctxt);
7799 } else if (!IS_CHAR(c)) {
7800 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7801 "xmlLoadEntityContent: invalid char value %d\n",
7802 c);
7803 xmlBufferFree(buf);
7804 return(-1);
7805 }
7806 entity->content = buf->content;
7807 buf->content = NULL;
7808 xmlBufferFree(buf);
7809
7810 return(0);
7811}
7812
7813/**
Owen Taylor3473f882001-02-23 17:55:21 +00007814 * xmlParseStringPEReference:
7815 * @ctxt: an XML parser context
7816 * @str: a pointer to an index in the string
7817 *
7818 * parse PEReference declarations
7819 *
7820 * [69] PEReference ::= '%' Name ';'
7821 *
7822 * [ WFC: No Recursion ]
7823 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007824 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007825 *
7826 * [ WFC: Entity Declared ]
7827 * In a document without any DTD, a document with only an internal DTD
7828 * subset which contains no parameter entity references, or a document
7829 * with "standalone='yes'", ... ... The declaration of a parameter
7830 * entity must precede any reference to it...
7831 *
7832 * [ VC: Entity Declared ]
7833 * In a document with an external subset or external parameter entities
7834 * with "standalone='no'", ... ... The declaration of a parameter entity
7835 * must precede any reference to it...
7836 *
7837 * [ WFC: In DTD ]
7838 * Parameter-entity references may only appear in the DTD.
7839 * NOTE: misleading but this is handled.
7840 *
7841 * Returns the string of the entity content.
7842 * str is updated to the current value of the index
7843 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007844static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007845xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7846 const xmlChar *ptr;
7847 xmlChar cur;
7848 xmlChar *name;
7849 xmlEntityPtr entity = NULL;
7850
7851 if ((str == NULL) || (*str == NULL)) return(NULL);
7852 ptr = *str;
7853 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007854 if (cur != '%')
7855 return(NULL);
7856 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007857 name = xmlParseStringName(ctxt, &ptr);
7858 if (name == NULL) {
7859 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7860 "xmlParseStringPEReference: no name\n");
7861 *str = ptr;
7862 return(NULL);
7863 }
7864 cur = *ptr;
7865 if (cur != ';') {
7866 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7867 xmlFree(name);
7868 *str = ptr;
7869 return(NULL);
7870 }
7871 ptr++;
7872
7873 /*
7874 * Increate the number of entity references parsed
7875 */
7876 ctxt->nbentities++;
7877
7878 /*
7879 * Request the entity from SAX
7880 */
7881 if ((ctxt->sax != NULL) &&
7882 (ctxt->sax->getParameterEntity != NULL))
7883 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7884 name);
7885 if (entity == NULL) {
7886 /*
7887 * [ WFC: Entity Declared ]
7888 * In a document without any DTD, a document with only an
7889 * internal DTD subset which contains no parameter entity
7890 * references, or a document with "standalone='yes'", ...
7891 * ... The declaration of a parameter entity must precede
7892 * any reference to it...
7893 */
7894 if ((ctxt->standalone == 1) ||
7895 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7896 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7897 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007898 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007899 /*
7900 * [ VC: Entity Declared ]
7901 * In a document with an external subset or external
7902 * parameter entities with "standalone='no'", ...
7903 * ... The declaration of a parameter entity must
7904 * precede any reference to it...
7905 */
7906 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7907 "PEReference: %%%s; not found\n",
7908 name, NULL);
7909 ctxt->valid = 0;
7910 }
7911 } else {
7912 /*
7913 * Internal checking in case the entity quest barfed
7914 */
7915 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7916 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7917 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7918 "%%%s; is not a parameter entity\n",
7919 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007920 }
7921 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007922 ctxt->hasPErefs = 1;
7923 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007924 *str = ptr;
7925 return(entity);
7926}
7927
7928/**
7929 * xmlParseDocTypeDecl:
7930 * @ctxt: an XML parser context
7931 *
7932 * parse a DOCTYPE declaration
7933 *
7934 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7935 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7936 *
7937 * [ VC: Root Element Type ]
7938 * The Name in the document type declaration must match the element
7939 * type of the root element.
7940 */
7941
7942void
7943xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007944 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007945 xmlChar *ExternalID = NULL;
7946 xmlChar *URI = NULL;
7947
7948 /*
7949 * We know that '<!DOCTYPE' has been detected.
7950 */
7951 SKIP(9);
7952
7953 SKIP_BLANKS;
7954
7955 /*
7956 * Parse the DOCTYPE name.
7957 */
7958 name = xmlParseName(ctxt);
7959 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007960 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7961 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007962 }
7963 ctxt->intSubName = name;
7964
7965 SKIP_BLANKS;
7966
7967 /*
7968 * Check for SystemID and ExternalID
7969 */
7970 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7971
7972 if ((URI != NULL) || (ExternalID != NULL)) {
7973 ctxt->hasExternalSubset = 1;
7974 }
7975 ctxt->extSubURI = URI;
7976 ctxt->extSubSystem = ExternalID;
7977
7978 SKIP_BLANKS;
7979
7980 /*
7981 * Create and update the internal subset.
7982 */
7983 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7984 (!ctxt->disableSAX))
7985 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7986
7987 /*
7988 * Is there any internal subset declarations ?
7989 * they are handled separately in xmlParseInternalSubset()
7990 */
7991 if (RAW == '[')
7992 return;
7993
7994 /*
7995 * We should be at the end of the DOCTYPE declaration.
7996 */
7997 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007998 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007999 }
8000 NEXT;
8001}
8002
8003/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008004 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008005 * @ctxt: an XML parser context
8006 *
8007 * parse the internal subset declaration
8008 *
8009 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8010 */
8011
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008012static void
Owen Taylor3473f882001-02-23 17:55:21 +00008013xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8014 /*
8015 * Is there any DTD definition ?
8016 */
8017 if (RAW == '[') {
8018 ctxt->instate = XML_PARSER_DTD;
8019 NEXT;
8020 /*
8021 * Parse the succession of Markup declarations and
8022 * PEReferences.
8023 * Subsequence (markupdecl | PEReference | S)*
8024 */
8025 while (RAW != ']') {
8026 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008027 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008028
8029 SKIP_BLANKS;
8030 xmlParseMarkupDecl(ctxt);
8031 xmlParsePEReference(ctxt);
8032
8033 /*
8034 * Pop-up of finished entities.
8035 */
8036 while ((RAW == 0) && (ctxt->inputNr > 1))
8037 xmlPopInput(ctxt);
8038
8039 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008040 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008041 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008042 break;
8043 }
8044 }
8045 if (RAW == ']') {
8046 NEXT;
8047 SKIP_BLANKS;
8048 }
8049 }
8050
8051 /*
8052 * We should be at the end of the DOCTYPE declaration.
8053 */
8054 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008055 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008056 }
8057 NEXT;
8058}
8059
Daniel Veillard81273902003-09-30 00:43:48 +00008060#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008061/**
8062 * xmlParseAttribute:
8063 * @ctxt: an XML parser context
8064 * @value: a xmlChar ** used to store the value of the attribute
8065 *
8066 * parse an attribute
8067 *
8068 * [41] Attribute ::= Name Eq AttValue
8069 *
8070 * [ WFC: No External Entity References ]
8071 * Attribute values cannot contain direct or indirect entity references
8072 * to external entities.
8073 *
8074 * [ WFC: No < in Attribute Values ]
8075 * The replacement text of any entity referred to directly or indirectly in
8076 * an attribute value (other than "&lt;") must not contain a <.
8077 *
8078 * [ VC: Attribute Value Type ]
8079 * The attribute must have been declared; the value must be of the type
8080 * declared for it.
8081 *
8082 * [25] Eq ::= S? '=' S?
8083 *
8084 * With namespace:
8085 *
8086 * [NS 11] Attribute ::= QName Eq AttValue
8087 *
8088 * Also the case QName == xmlns:??? is handled independently as a namespace
8089 * definition.
8090 *
8091 * Returns the attribute name, and the value in *value.
8092 */
8093
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008094const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008095xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008096 const xmlChar *name;
8097 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008098
8099 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008100 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008101 name = xmlParseName(ctxt);
8102 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008103 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008104 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008105 return(NULL);
8106 }
8107
8108 /*
8109 * read the value
8110 */
8111 SKIP_BLANKS;
8112 if (RAW == '=') {
8113 NEXT;
8114 SKIP_BLANKS;
8115 val = xmlParseAttValue(ctxt);
8116 ctxt->instate = XML_PARSER_CONTENT;
8117 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008118 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008119 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008120 return(NULL);
8121 }
8122
8123 /*
8124 * Check that xml:lang conforms to the specification
8125 * No more registered as an error, just generate a warning now
8126 * since this was deprecated in XML second edition
8127 */
8128 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8129 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008130 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8131 "Malformed value for xml:lang : %s\n",
8132 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008133 }
8134 }
8135
8136 /*
8137 * Check that xml:space conforms to the specification
8138 */
8139 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8140 if (xmlStrEqual(val, BAD_CAST "default"))
8141 *(ctxt->space) = 0;
8142 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8143 *(ctxt->space) = 1;
8144 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008145 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008146"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008147 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008148 }
8149 }
8150
8151 *value = val;
8152 return(name);
8153}
8154
8155/**
8156 * xmlParseStartTag:
8157 * @ctxt: an XML parser context
8158 *
8159 * parse a start of tag either for rule element or
8160 * EmptyElement. In both case we don't parse the tag closing chars.
8161 *
8162 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8163 *
8164 * [ WFC: Unique Att Spec ]
8165 * No attribute name may appear more than once in the same start-tag or
8166 * empty-element tag.
8167 *
8168 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8169 *
8170 * [ WFC: Unique Att Spec ]
8171 * No attribute name may appear more than once in the same start-tag or
8172 * empty-element tag.
8173 *
8174 * With namespace:
8175 *
8176 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8177 *
8178 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8179 *
8180 * Returns the element name parsed
8181 */
8182
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008183const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008184xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008185 const xmlChar *name;
8186 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008187 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008188 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008189 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008190 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008191 int i;
8192
8193 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008194 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008195
8196 name = xmlParseName(ctxt);
8197 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008198 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008199 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008200 return(NULL);
8201 }
8202
8203 /*
8204 * Now parse the attributes, it ends up with the ending
8205 *
8206 * (S Attribute)* S?
8207 */
8208 SKIP_BLANKS;
8209 GROW;
8210
Daniel Veillard21a0f912001-02-25 19:54:14 +00008211 while ((RAW != '>') &&
8212 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008213 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008214 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008215 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008216
8217 attname = xmlParseAttribute(ctxt, &attvalue);
8218 if ((attname != NULL) && (attvalue != NULL)) {
8219 /*
8220 * [ WFC: Unique Att Spec ]
8221 * No attribute name may appear more than once in the same
8222 * start-tag or empty-element tag.
8223 */
8224 for (i = 0; i < nbatts;i += 2) {
8225 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008226 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008227 xmlFree(attvalue);
8228 goto failed;
8229 }
8230 }
Owen Taylor3473f882001-02-23 17:55:21 +00008231 /*
8232 * Add the pair to atts
8233 */
8234 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008235 maxatts = 22; /* allow for 10 attrs by default */
8236 atts = (const xmlChar **)
8237 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008238 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008239 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008240 if (attvalue != NULL)
8241 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008242 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008243 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008244 ctxt->atts = atts;
8245 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008246 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008247 const xmlChar **n;
8248
Owen Taylor3473f882001-02-23 17:55:21 +00008249 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008250 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008251 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008252 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008253 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008254 if (attvalue != NULL)
8255 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008256 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008257 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008258 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008259 ctxt->atts = atts;
8260 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008261 }
8262 atts[nbatts++] = attname;
8263 atts[nbatts++] = attvalue;
8264 atts[nbatts] = NULL;
8265 atts[nbatts + 1] = NULL;
8266 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008267 if (attvalue != NULL)
8268 xmlFree(attvalue);
8269 }
8270
8271failed:
8272
Daniel Veillard3772de32002-12-17 10:31:45 +00008273 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008274 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8275 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008276 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008277 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8278 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008279 }
8280 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008281 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8282 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008283 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8284 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008285 break;
8286 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008287 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008288 GROW;
8289 }
8290
8291 /*
8292 * SAX: Start of Element !
8293 */
8294 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008295 (!ctxt->disableSAX)) {
8296 if (nbatts > 0)
8297 ctxt->sax->startElement(ctxt->userData, name, atts);
8298 else
8299 ctxt->sax->startElement(ctxt->userData, name, NULL);
8300 }
Owen Taylor3473f882001-02-23 17:55:21 +00008301
8302 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008303 /* Free only the content strings */
8304 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008305 if (atts[i] != NULL)
8306 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008307 }
8308 return(name);
8309}
8310
8311/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008312 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008313 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008314 * @line: line of the start tag
8315 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008316 *
8317 * parse an end of tag
8318 *
8319 * [42] ETag ::= '</' Name S? '>'
8320 *
8321 * With namespace
8322 *
8323 * [NS 9] ETag ::= '</' QName S? '>'
8324 */
8325
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008326static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008327xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008328 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008329
8330 GROW;
8331 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008332 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008333 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008334 return;
8335 }
8336 SKIP(2);
8337
Daniel Veillard46de64e2002-05-29 08:21:33 +00008338 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008339
8340 /*
8341 * We should definitely be at the ending "S? '>'" part
8342 */
8343 GROW;
8344 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008345 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008346 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008347 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008348 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008349
8350 /*
8351 * [ WFC: Element Type Match ]
8352 * The Name in an element's end-tag must match the element type in the
8353 * start-tag.
8354 *
8355 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008356 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008357 if (name == NULL) name = BAD_CAST "unparseable";
8358 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008359 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008360 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008361 }
8362
8363 /*
8364 * SAX: End of Tag
8365 */
8366 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8367 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008368 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008369
Daniel Veillarde57ec792003-09-10 10:50:59 +00008370 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008371 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008372 return;
8373}
8374
8375/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008376 * xmlParseEndTag:
8377 * @ctxt: an XML parser context
8378 *
8379 * parse an end of tag
8380 *
8381 * [42] ETag ::= '</' Name S? '>'
8382 *
8383 * With namespace
8384 *
8385 * [NS 9] ETag ::= '</' QName S? '>'
8386 */
8387
8388void
8389xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008390 xmlParseEndTag1(ctxt, 0);
8391}
Daniel Veillard81273902003-09-30 00:43:48 +00008392#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008393
8394/************************************************************************
8395 * *
8396 * SAX 2 specific operations *
8397 * *
8398 ************************************************************************/
8399
Daniel Veillard0fb18932003-09-07 09:14:37 +00008400/*
8401 * xmlGetNamespace:
8402 * @ctxt: an XML parser context
8403 * @prefix: the prefix to lookup
8404 *
8405 * Lookup the namespace name for the @prefix (which ca be NULL)
8406 * The prefix must come from the @ctxt->dict dictionnary
8407 *
8408 * Returns the namespace name or NULL if not bound
8409 */
8410static const xmlChar *
8411xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8412 int i;
8413
Daniel Veillarde57ec792003-09-10 10:50:59 +00008414 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008415 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008416 if (ctxt->nsTab[i] == prefix) {
8417 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8418 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008419 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008420 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008421 return(NULL);
8422}
8423
8424/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008425 * xmlParseQName:
8426 * @ctxt: an XML parser context
8427 * @prefix: pointer to store the prefix part
8428 *
8429 * parse an XML Namespace QName
8430 *
8431 * [6] QName ::= (Prefix ':')? LocalPart
8432 * [7] Prefix ::= NCName
8433 * [8] LocalPart ::= NCName
8434 *
8435 * Returns the Name parsed or NULL
8436 */
8437
8438static const xmlChar *
8439xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8440 const xmlChar *l, *p;
8441
8442 GROW;
8443
8444 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008445 if (l == NULL) {
8446 if (CUR == ':') {
8447 l = xmlParseName(ctxt);
8448 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008449 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8450 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008451 *prefix = NULL;
8452 return(l);
8453 }
8454 }
8455 return(NULL);
8456 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008457 if (CUR == ':') {
8458 NEXT;
8459 p = l;
8460 l = xmlParseNCName(ctxt);
8461 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008462 xmlChar *tmp;
8463
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008464 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8465 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008466 l = xmlParseNmtoken(ctxt);
8467 if (l == NULL)
8468 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8469 else {
8470 tmp = xmlBuildQName(l, p, NULL, 0);
8471 xmlFree((char *)l);
8472 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008473 p = xmlDictLookup(ctxt->dict, tmp, -1);
8474 if (tmp != NULL) xmlFree(tmp);
8475 *prefix = NULL;
8476 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008477 }
8478 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008479 xmlChar *tmp;
8480
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008481 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8482 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008483 NEXT;
8484 tmp = (xmlChar *) xmlParseName(ctxt);
8485 if (tmp != NULL) {
8486 tmp = xmlBuildQName(tmp, l, NULL, 0);
8487 l = xmlDictLookup(ctxt->dict, tmp, -1);
8488 if (tmp != NULL) xmlFree(tmp);
8489 *prefix = p;
8490 return(l);
8491 }
8492 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8493 l = xmlDictLookup(ctxt->dict, tmp, -1);
8494 if (tmp != NULL) xmlFree(tmp);
8495 *prefix = p;
8496 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008497 }
8498 *prefix = p;
8499 } else
8500 *prefix = NULL;
8501 return(l);
8502}
8503
8504/**
8505 * xmlParseQNameAndCompare:
8506 * @ctxt: an XML parser context
8507 * @name: the localname
8508 * @prefix: the prefix, if any.
8509 *
8510 * parse an XML name and compares for match
8511 * (specialized for endtag parsing)
8512 *
8513 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8514 * and the name for mismatch
8515 */
8516
8517static const xmlChar *
8518xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8519 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008520 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008521 const xmlChar *in;
8522 const xmlChar *ret;
8523 const xmlChar *prefix2;
8524
8525 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8526
8527 GROW;
8528 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008529
Daniel Veillard0fb18932003-09-07 09:14:37 +00008530 cmp = prefix;
8531 while (*in != 0 && *in == *cmp) {
8532 ++in;
8533 ++cmp;
8534 }
8535 if ((*cmp == 0) && (*in == ':')) {
8536 in++;
8537 cmp = name;
8538 while (*in != 0 && *in == *cmp) {
8539 ++in;
8540 ++cmp;
8541 }
William M. Brack76e95df2003-10-18 16:20:14 +00008542 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008543 /* success */
8544 ctxt->input->cur = in;
8545 return((const xmlChar*) 1);
8546 }
8547 }
8548 /*
8549 * all strings coms from the dictionary, equality can be done directly
8550 */
8551 ret = xmlParseQName (ctxt, &prefix2);
8552 if ((ret == name) && (prefix == prefix2))
8553 return((const xmlChar*) 1);
8554 return ret;
8555}
8556
8557/**
8558 * xmlParseAttValueInternal:
8559 * @ctxt: an XML parser context
8560 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008561 * @alloc: whether the attribute was reallocated as a new string
8562 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008563 *
8564 * parse a value for an attribute.
8565 * NOTE: if no normalization is needed, the routine will return pointers
8566 * directly from the data buffer.
8567 *
8568 * 3.3.3 Attribute-Value Normalization:
8569 * Before the value of an attribute is passed to the application or
8570 * checked for validity, the XML processor must normalize it as follows:
8571 * - a character reference is processed by appending the referenced
8572 * character to the attribute value
8573 * - an entity reference is processed by recursively processing the
8574 * replacement text of the entity
8575 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8576 * appending #x20 to the normalized value, except that only a single
8577 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8578 * parsed entity or the literal entity value of an internal parsed entity
8579 * - other characters are processed by appending them to the normalized value
8580 * If the declared value is not CDATA, then the XML processor must further
8581 * process the normalized attribute value by discarding any leading and
8582 * trailing space (#x20) characters, and by replacing sequences of space
8583 * (#x20) characters by a single space (#x20) character.
8584 * All attributes for which no declaration has been read should be treated
8585 * by a non-validating parser as if declared CDATA.
8586 *
8587 * Returns the AttValue parsed or NULL. The value has to be freed by the
8588 * caller if it was copied, this can be detected by val[*len] == 0.
8589 */
8590
8591static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008592xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8593 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008594{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008595 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008596 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008597 xmlChar *ret = NULL;
8598
8599 GROW;
8600 in = (xmlChar *) CUR_PTR;
8601 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008602 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008603 return (NULL);
8604 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008605 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008606
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008607 /*
8608 * try to handle in this routine the most common case where no
8609 * allocation of a new string is required and where content is
8610 * pure ASCII.
8611 */
8612 limit = *in++;
8613 end = ctxt->input->end;
8614 start = in;
8615 if (in >= end) {
8616 const xmlChar *oldbase = ctxt->input->base;
8617 GROW;
8618 if (oldbase != ctxt->input->base) {
8619 long delta = ctxt->input->base - oldbase;
8620 start = start + delta;
8621 in = in + delta;
8622 }
8623 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008624 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008625 if (normalize) {
8626 /*
8627 * Skip any leading spaces
8628 */
8629 while ((in < end) && (*in != limit) &&
8630 ((*in == 0x20) || (*in == 0x9) ||
8631 (*in == 0xA) || (*in == 0xD))) {
8632 in++;
8633 start = in;
8634 if (in >= end) {
8635 const xmlChar *oldbase = ctxt->input->base;
8636 GROW;
8637 if (oldbase != ctxt->input->base) {
8638 long delta = ctxt->input->base - oldbase;
8639 start = start + delta;
8640 in = in + delta;
8641 }
8642 end = ctxt->input->end;
8643 }
8644 }
8645 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8646 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8647 if ((*in++ == 0x20) && (*in == 0x20)) break;
8648 if (in >= end) {
8649 const xmlChar *oldbase = ctxt->input->base;
8650 GROW;
8651 if (oldbase != ctxt->input->base) {
8652 long delta = ctxt->input->base - oldbase;
8653 start = start + delta;
8654 in = in + delta;
8655 }
8656 end = ctxt->input->end;
8657 }
8658 }
8659 last = in;
8660 /*
8661 * skip the trailing blanks
8662 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008663 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008664 while ((in < end) && (*in != limit) &&
8665 ((*in == 0x20) || (*in == 0x9) ||
8666 (*in == 0xA) || (*in == 0xD))) {
8667 in++;
8668 if (in >= end) {
8669 const xmlChar *oldbase = ctxt->input->base;
8670 GROW;
8671 if (oldbase != ctxt->input->base) {
8672 long delta = ctxt->input->base - oldbase;
8673 start = start + delta;
8674 in = in + delta;
8675 last = last + delta;
8676 }
8677 end = ctxt->input->end;
8678 }
8679 }
8680 if (*in != limit) goto need_complex;
8681 } else {
8682 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8683 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8684 in++;
8685 if (in >= end) {
8686 const xmlChar *oldbase = ctxt->input->base;
8687 GROW;
8688 if (oldbase != ctxt->input->base) {
8689 long delta = ctxt->input->base - oldbase;
8690 start = start + delta;
8691 in = in + delta;
8692 }
8693 end = ctxt->input->end;
8694 }
8695 }
8696 last = in;
8697 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008698 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008699 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008700 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008701 *len = last - start;
8702 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008703 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008704 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008705 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008706 }
8707 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008708 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008709 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008710need_complex:
8711 if (alloc) *alloc = 1;
8712 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008713}
8714
8715/**
8716 * xmlParseAttribute2:
8717 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008718 * @pref: the element prefix
8719 * @elem: the element name
8720 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008721 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008722 * @len: an int * to save the length of the attribute
8723 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008724 *
8725 * parse an attribute in the new SAX2 framework.
8726 *
8727 * Returns the attribute name, and the value in *value, .
8728 */
8729
8730static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008731xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008732 const xmlChar * pref, const xmlChar * elem,
8733 const xmlChar ** prefix, xmlChar ** value,
8734 int *len, int *alloc)
8735{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008736 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008737 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008738 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008739
8740 *value = NULL;
8741 GROW;
8742 name = xmlParseQName(ctxt, prefix);
8743 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008744 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8745 "error parsing attribute name\n");
8746 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008747 }
8748
8749 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008750 * get the type if needed
8751 */
8752 if (ctxt->attsSpecial != NULL) {
8753 int type;
8754
8755 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008756 pref, elem, *prefix, name);
8757 if (type != 0)
8758 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008759 }
8760
8761 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008762 * read the value
8763 */
8764 SKIP_BLANKS;
8765 if (RAW == '=') {
8766 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008767 SKIP_BLANKS;
8768 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8769 if (normalize) {
8770 /*
8771 * Sometimes a second normalisation pass for spaces is needed
8772 * but that only happens if charrefs or entities refernces
8773 * have been used in the attribute value, i.e. the attribute
8774 * value have been extracted in an allocated string already.
8775 */
8776 if (*alloc) {
8777 const xmlChar *val2;
8778
8779 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008780 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008781 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008782 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008783 }
8784 }
8785 }
8786 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008787 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008788 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8789 "Specification mandate value for attribute %s\n",
8790 name);
8791 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008792 }
8793
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008794 if (*prefix == ctxt->str_xml) {
8795 /*
8796 * Check that xml:lang conforms to the specification
8797 * No more registered as an error, just generate a warning now
8798 * since this was deprecated in XML second edition
8799 */
8800 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8801 internal_val = xmlStrndup(val, *len);
8802 if (!xmlCheckLanguageID(internal_val)) {
8803 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8804 "Malformed value for xml:lang : %s\n",
8805 internal_val, NULL);
8806 }
8807 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008808
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008809 /*
8810 * Check that xml:space conforms to the specification
8811 */
8812 if (xmlStrEqual(name, BAD_CAST "space")) {
8813 internal_val = xmlStrndup(val, *len);
8814 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8815 *(ctxt->space) = 0;
8816 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8817 *(ctxt->space) = 1;
8818 else {
8819 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8820 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8821 internal_val, NULL);
8822 }
8823 }
8824 if (internal_val) {
8825 xmlFree(internal_val);
8826 }
8827 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008828
8829 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008830 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008831}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008832/**
8833 * xmlParseStartTag2:
8834 * @ctxt: an XML parser context
8835 *
8836 * parse a start of tag either for rule element or
8837 * EmptyElement. In both case we don't parse the tag closing chars.
8838 * This routine is called when running SAX2 parsing
8839 *
8840 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8841 *
8842 * [ WFC: Unique Att Spec ]
8843 * No attribute name may appear more than once in the same start-tag or
8844 * empty-element tag.
8845 *
8846 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8847 *
8848 * [ WFC: Unique Att Spec ]
8849 * No attribute name may appear more than once in the same start-tag or
8850 * empty-element tag.
8851 *
8852 * With namespace:
8853 *
8854 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8855 *
8856 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8857 *
8858 * Returns the element name parsed
8859 */
8860
8861static const xmlChar *
8862xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008863 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008864 const xmlChar *localname;
8865 const xmlChar *prefix;
8866 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008867 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008868 const xmlChar *nsname;
8869 xmlChar *attvalue;
8870 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008871 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008872 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008873 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008874 const xmlChar *base;
8875 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008876 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008877
8878 if (RAW != '<') return(NULL);
8879 NEXT1;
8880
8881 /*
8882 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8883 * point since the attribute values may be stored as pointers to
8884 * the buffer and calling SHRINK would destroy them !
8885 * The Shrinking is only possible once the full set of attribute
8886 * callbacks have been done.
8887 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008888reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008889 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008890 base = ctxt->input->base;
8891 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008892 oldline = ctxt->input->line;
8893 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008894 nbatts = 0;
8895 nratts = 0;
8896 nbdef = 0;
8897 nbNs = 0;
8898 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008899 /* Forget any namespaces added during an earlier parse of this element. */
8900 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008901
8902 localname = xmlParseQName(ctxt, &prefix);
8903 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008904 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8905 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008906 return(NULL);
8907 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008908 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008909
8910 /*
8911 * Now parse the attributes, it ends up with the ending
8912 *
8913 * (S Attribute)* S?
8914 */
8915 SKIP_BLANKS;
8916 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008917 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008918
8919 while ((RAW != '>') &&
8920 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008921 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008922 const xmlChar *q = CUR_PTR;
8923 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008924 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008925
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008926 attname = xmlParseAttribute2(ctxt, prefix, localname,
8927 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008928 if (ctxt->input->base != base) {
8929 if ((attvalue != NULL) && (alloc != 0))
8930 xmlFree(attvalue);
8931 attvalue = NULL;
8932 goto base_changed;
8933 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008934 if ((attname != NULL) && (attvalue != NULL)) {
8935 if (len < 0) len = xmlStrlen(attvalue);
8936 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008937 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8938 xmlURIPtr uri;
8939
8940 if (*URL != 0) {
8941 uri = xmlParseURI((const char *) URL);
8942 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008943 xmlNsErr(ctxt, XML_WAR_NS_URI,
8944 "xmlns: '%s' is not a valid URI\n",
8945 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008946 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008947 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008948 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8949 "xmlns: URI %s is not absolute\n",
8950 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008951 }
8952 xmlFreeURI(uri);
8953 }
Daniel Veillard37334572008-07-31 08:20:02 +00008954 if (URL == ctxt->str_xml_ns) {
8955 if (attname != ctxt->str_xml) {
8956 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8957 "xml namespace URI cannot be the default namespace\n",
8958 NULL, NULL, NULL);
8959 }
8960 goto skip_default_ns;
8961 }
8962 if ((len == 29) &&
8963 (xmlStrEqual(URL,
8964 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8965 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8966 "reuse of the xmlns namespace name is forbidden\n",
8967 NULL, NULL, NULL);
8968 goto skip_default_ns;
8969 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008970 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008971 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008972 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008973 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008974 for (j = 1;j <= nbNs;j++)
8975 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8976 break;
8977 if (j <= nbNs)
8978 xmlErrAttributeDup(ctxt, NULL, attname);
8979 else
8980 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008981skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008982 if (alloc != 0) xmlFree(attvalue);
8983 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008984 continue;
8985 }
8986 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008987 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8988 xmlURIPtr uri;
8989
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008990 if (attname == ctxt->str_xml) {
8991 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008992 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8993 "xml namespace prefix mapped to wrong URI\n",
8994 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008995 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008996 /*
8997 * Do not keep a namespace definition node
8998 */
Daniel Veillard37334572008-07-31 08:20:02 +00008999 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009000 }
Daniel Veillard37334572008-07-31 08:20:02 +00009001 if (URL == ctxt->str_xml_ns) {
9002 if (attname != ctxt->str_xml) {
9003 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9004 "xml namespace URI mapped to wrong prefix\n",
9005 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009006 }
Daniel Veillard37334572008-07-31 08:20:02 +00009007 goto skip_ns;
9008 }
9009 if (attname == ctxt->str_xmlns) {
9010 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9011 "redefinition of the xmlns prefix is forbidden\n",
9012 NULL, NULL, NULL);
9013 goto skip_ns;
9014 }
9015 if ((len == 29) &&
9016 (xmlStrEqual(URL,
9017 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9018 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9019 "reuse of the xmlns namespace name is forbidden\n",
9020 NULL, NULL, NULL);
9021 goto skip_ns;
9022 }
9023 if ((URL == NULL) || (URL[0] == 0)) {
9024 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9025 "xmlns:%s: Empty XML namespace is not allowed\n",
9026 attname, NULL, NULL);
9027 goto skip_ns;
9028 } else {
9029 uri = xmlParseURI((const char *) URL);
9030 if (uri == NULL) {
9031 xmlNsErr(ctxt, XML_WAR_NS_URI,
9032 "xmlns:%s: '%s' is not a valid URI\n",
9033 attname, URL, NULL);
9034 } else {
9035 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9036 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9037 "xmlns:%s: URI %s is not absolute\n",
9038 attname, URL, NULL);
9039 }
9040 xmlFreeURI(uri);
9041 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009042 }
9043
Daniel Veillard0fb18932003-09-07 09:14:37 +00009044 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009045 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009046 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009047 for (j = 1;j <= nbNs;j++)
9048 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9049 break;
9050 if (j <= nbNs)
9051 xmlErrAttributeDup(ctxt, aprefix, attname);
9052 else
9053 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009054skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009055 if (alloc != 0) xmlFree(attvalue);
9056 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009057 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009058 continue;
9059 }
9060
9061 /*
9062 * Add the pair to atts
9063 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009064 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9065 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009066 if (attvalue[len] == 0)
9067 xmlFree(attvalue);
9068 goto failed;
9069 }
9070 maxatts = ctxt->maxatts;
9071 atts = ctxt->atts;
9072 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009073 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009074 atts[nbatts++] = attname;
9075 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009076 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009077 atts[nbatts++] = attvalue;
9078 attvalue += len;
9079 atts[nbatts++] = attvalue;
9080 /*
9081 * tag if some deallocation is needed
9082 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009083 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009084 } else {
9085 if ((attvalue != NULL) && (attvalue[len] == 0))
9086 xmlFree(attvalue);
9087 }
9088
Daniel Veillard37334572008-07-31 08:20:02 +00009089failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009090
9091 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00009092 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009093 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9094 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009095 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009096 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9097 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009098 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009099 }
9100 SKIP_BLANKS;
9101 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9102 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009103 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009104 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009105 break;
9106 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009107 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009108 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009109 }
9110
Daniel Veillard0fb18932003-09-07 09:14:37 +00009111 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009112 * The attributes defaulting
9113 */
9114 if (ctxt->attsDefault != NULL) {
9115 xmlDefAttrsPtr defaults;
9116
9117 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9118 if (defaults != NULL) {
9119 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009120 attname = defaults->values[5 * i];
9121 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009122
9123 /*
9124 * special work for namespaces defaulted defs
9125 */
9126 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9127 /*
9128 * check that it's not a defined namespace
9129 */
9130 for (j = 1;j <= nbNs;j++)
9131 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9132 break;
9133 if (j <= nbNs) continue;
9134
9135 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009136 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009137 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009138 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009139 nbNs++;
9140 }
9141 } else if (aprefix == ctxt->str_xmlns) {
9142 /*
9143 * check that it's not a defined namespace
9144 */
9145 for (j = 1;j <= nbNs;j++)
9146 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9147 break;
9148 if (j <= nbNs) continue;
9149
9150 nsname = xmlGetNamespace(ctxt, attname);
9151 if (nsname != defaults->values[2]) {
9152 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009153 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009154 nbNs++;
9155 }
9156 } else {
9157 /*
9158 * check that it's not a defined attribute
9159 */
9160 for (j = 0;j < nbatts;j+=5) {
9161 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9162 break;
9163 }
9164 if (j < nbatts) continue;
9165
9166 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9167 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009168 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009169 }
9170 maxatts = ctxt->maxatts;
9171 atts = ctxt->atts;
9172 }
9173 atts[nbatts++] = attname;
9174 atts[nbatts++] = aprefix;
9175 if (aprefix == NULL)
9176 atts[nbatts++] = NULL;
9177 else
9178 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009179 atts[nbatts++] = defaults->values[5 * i + 2];
9180 atts[nbatts++] = defaults->values[5 * i + 3];
9181 if ((ctxt->standalone == 1) &&
9182 (defaults->values[5 * i + 4] != NULL)) {
9183 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9184 "standalone: attribute %s on %s defaulted from external subset\n",
9185 attname, localname);
9186 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009187 nbdef++;
9188 }
9189 }
9190 }
9191 }
9192
Daniel Veillarde70c8772003-11-25 07:21:18 +00009193 /*
9194 * The attributes checkings
9195 */
9196 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009197 /*
9198 * The default namespace does not apply to attribute names.
9199 */
9200 if (atts[i + 1] != NULL) {
9201 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9202 if (nsname == NULL) {
9203 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9204 "Namespace prefix %s for %s on %s is not defined\n",
9205 atts[i + 1], atts[i], localname);
9206 }
9207 atts[i + 2] = nsname;
9208 } else
9209 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009210 /*
9211 * [ WFC: Unique Att Spec ]
9212 * No attribute name may appear more than once in the same
9213 * start-tag or empty-element tag.
9214 * As extended by the Namespace in XML REC.
9215 */
9216 for (j = 0; j < i;j += 5) {
9217 if (atts[i] == atts[j]) {
9218 if (atts[i+1] == atts[j+1]) {
9219 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9220 break;
9221 }
9222 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9223 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9224 "Namespaced Attribute %s in '%s' redefined\n",
9225 atts[i], nsname, NULL);
9226 break;
9227 }
9228 }
9229 }
9230 }
9231
Daniel Veillarde57ec792003-09-10 10:50:59 +00009232 nsname = xmlGetNamespace(ctxt, prefix);
9233 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009234 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9235 "Namespace prefix %s on %s is not defined\n",
9236 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009237 }
9238 *pref = prefix;
9239 *URI = nsname;
9240
9241 /*
9242 * SAX: Start of Element !
9243 */
9244 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9245 (!ctxt->disableSAX)) {
9246 if (nbNs > 0)
9247 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9248 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9249 nbatts / 5, nbdef, atts);
9250 else
9251 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9252 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9253 }
9254
9255 /*
9256 * Free up attribute allocated strings if needed
9257 */
9258 if (attval != 0) {
9259 for (i = 3,j = 0; j < nratts;i += 5,j++)
9260 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9261 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009262 }
9263
9264 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009265
9266base_changed:
9267 /*
9268 * the attribute strings are valid iif the base didn't changed
9269 */
9270 if (attval != 0) {
9271 for (i = 3,j = 0; j < nratts;i += 5,j++)
9272 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9273 xmlFree((xmlChar *) atts[i]);
9274 }
9275 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009276 ctxt->input->line = oldline;
9277 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009278 if (ctxt->wellFormed == 1) {
9279 goto reparse;
9280 }
9281 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009282}
9283
9284/**
9285 * xmlParseEndTag2:
9286 * @ctxt: an XML parser context
9287 * @line: line of the start tag
9288 * @nsNr: number of namespaces on the start tag
9289 *
9290 * parse an end of tag
9291 *
9292 * [42] ETag ::= '</' Name S? '>'
9293 *
9294 * With namespace
9295 *
9296 * [NS 9] ETag ::= '</' QName S? '>'
9297 */
9298
9299static void
9300xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009301 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009302 const xmlChar *name;
9303
9304 GROW;
9305 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009306 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009307 return;
9308 }
9309 SKIP(2);
9310
William M. Brack13dfa872004-09-18 04:52:08 +00009311 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009312 if (ctxt->input->cur[tlen] == '>') {
9313 ctxt->input->cur += tlen + 1;
9314 goto done;
9315 }
9316 ctxt->input->cur += tlen;
9317 name = (xmlChar*)1;
9318 } else {
9319 if (prefix == NULL)
9320 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9321 else
9322 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9323 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009324
9325 /*
9326 * We should definitely be at the ending "S? '>'" part
9327 */
9328 GROW;
9329 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009330 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009331 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009332 } else
9333 NEXT1;
9334
9335 /*
9336 * [ WFC: Element Type Match ]
9337 * The Name in an element's end-tag must match the element type in the
9338 * start-tag.
9339 *
9340 */
9341 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009342 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009343 if ((line == 0) && (ctxt->node != NULL))
9344 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009345 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009346 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009347 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009348 }
9349
9350 /*
9351 * SAX: End of Tag
9352 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009353done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009354 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9355 (!ctxt->disableSAX))
9356 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9357
Daniel Veillard0fb18932003-09-07 09:14:37 +00009358 spacePop(ctxt);
9359 if (nsNr != 0)
9360 nsPop(ctxt, nsNr);
9361 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009362}
9363
9364/**
Owen Taylor3473f882001-02-23 17:55:21 +00009365 * xmlParseCDSect:
9366 * @ctxt: an XML parser context
9367 *
9368 * Parse escaped pure raw content.
9369 *
9370 * [18] CDSect ::= CDStart CData CDEnd
9371 *
9372 * [19] CDStart ::= '<![CDATA['
9373 *
9374 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9375 *
9376 * [21] CDEnd ::= ']]>'
9377 */
9378void
9379xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9380 xmlChar *buf = NULL;
9381 int len = 0;
9382 int size = XML_PARSER_BUFFER_SIZE;
9383 int r, rl;
9384 int s, sl;
9385 int cur, l;
9386 int count = 0;
9387
Daniel Veillard8f597c32003-10-06 08:19:27 +00009388 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009389 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009390 SKIP(9);
9391 } else
9392 return;
9393
9394 ctxt->instate = XML_PARSER_CDATA_SECTION;
9395 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009396 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009397 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009398 ctxt->instate = XML_PARSER_CONTENT;
9399 return;
9400 }
9401 NEXTL(rl);
9402 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009403 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009404 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009405 ctxt->instate = XML_PARSER_CONTENT;
9406 return;
9407 }
9408 NEXTL(sl);
9409 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009410 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009411 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009412 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009413 return;
9414 }
William M. Brack871611b2003-10-18 04:53:14 +00009415 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009416 ((r != ']') || (s != ']') || (cur != '>'))) {
9417 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009418 xmlChar *tmp;
9419
Owen Taylor3473f882001-02-23 17:55:21 +00009420 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009421 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9422 if (tmp == NULL) {
9423 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009424 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009425 return;
9426 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009427 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009428 }
9429 COPY_BUF(rl,buf,len,r);
9430 r = s;
9431 rl = sl;
9432 s = cur;
9433 sl = l;
9434 count++;
9435 if (count > 50) {
9436 GROW;
9437 count = 0;
9438 }
9439 NEXTL(l);
9440 cur = CUR_CHAR(l);
9441 }
9442 buf[len] = 0;
9443 ctxt->instate = XML_PARSER_CONTENT;
9444 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009445 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009446 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009447 xmlFree(buf);
9448 return;
9449 }
9450 NEXTL(l);
9451
9452 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009453 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009454 */
9455 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9456 if (ctxt->sax->cdataBlock != NULL)
9457 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009458 else if (ctxt->sax->characters != NULL)
9459 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009460 }
9461 xmlFree(buf);
9462}
9463
9464/**
9465 * xmlParseContent:
9466 * @ctxt: an XML parser context
9467 *
9468 * Parse a content:
9469 *
9470 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9471 */
9472
9473void
9474xmlParseContent(xmlParserCtxtPtr ctxt) {
9475 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009476 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009477 ((RAW != '<') || (NXT(1) != '/')) &&
9478 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009479 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009480 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009481 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009482
9483 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009484 * First case : a Processing Instruction.
9485 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009486 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009487 xmlParsePI(ctxt);
9488 }
9489
9490 /*
9491 * Second case : a CDSection
9492 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009493 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009494 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009495 xmlParseCDSect(ctxt);
9496 }
9497
9498 /*
9499 * Third case : a comment
9500 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009501 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009502 (NXT(2) == '-') && (NXT(3) == '-')) {
9503 xmlParseComment(ctxt);
9504 ctxt->instate = XML_PARSER_CONTENT;
9505 }
9506
9507 /*
9508 * Fourth case : a sub-element.
9509 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009510 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009511 xmlParseElement(ctxt);
9512 }
9513
9514 /*
9515 * Fifth case : a reference. If if has not been resolved,
9516 * parsing returns it's Name, create the node
9517 */
9518
Daniel Veillard21a0f912001-02-25 19:54:14 +00009519 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009520 xmlParseReference(ctxt);
9521 }
9522
9523 /*
9524 * Last case, text. Note that References are handled directly.
9525 */
9526 else {
9527 xmlParseCharData(ctxt, 0);
9528 }
9529
9530 GROW;
9531 /*
9532 * Pop-up of finished entities.
9533 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009534 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009535 xmlPopInput(ctxt);
9536 SHRINK;
9537
Daniel Veillardfdc91562002-07-01 21:52:03 +00009538 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009539 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9540 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009541 ctxt->instate = XML_PARSER_EOF;
9542 break;
9543 }
9544 }
9545}
9546
9547/**
9548 * xmlParseElement:
9549 * @ctxt: an XML parser context
9550 *
9551 * parse an XML element, this is highly recursive
9552 *
9553 * [39] element ::= EmptyElemTag | STag content ETag
9554 *
9555 * [ WFC: Element Type Match ]
9556 * The Name in an element's end-tag must match the element type in the
9557 * start-tag.
9558 *
Owen Taylor3473f882001-02-23 17:55:21 +00009559 */
9560
9561void
9562xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009563 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009564 const xmlChar *prefix = NULL;
9565 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009566 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009567 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009568 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009569 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009570
Daniel Veillard8915c152008-08-26 13:05:34 +00009571 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9572 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9573 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9574 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9575 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009576 ctxt->instate = XML_PARSER_EOF;
9577 return;
9578 }
9579
Owen Taylor3473f882001-02-23 17:55:21 +00009580 /* Capture start position */
9581 if (ctxt->record_info) {
9582 node_info.begin_pos = ctxt->input->consumed +
9583 (CUR_PTR - ctxt->input->base);
9584 node_info.begin_line = ctxt->input->line;
9585 }
9586
9587 if (ctxt->spaceNr == 0)
9588 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009589 else if (*ctxt->space == -2)
9590 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009591 else
9592 spacePush(ctxt, *ctxt->space);
9593
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009594 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009595#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009596 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009597#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009598 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009599#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009600 else
9601 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009602#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009603 if (ctxt->instate == XML_PARSER_EOF)
9604 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009605 if (name == NULL) {
9606 spacePop(ctxt);
9607 return;
9608 }
9609 namePush(ctxt, name);
9610 ret = ctxt->node;
9611
Daniel Veillard4432df22003-09-28 18:58:27 +00009612#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009613 /*
9614 * [ VC: Root Element Type ]
9615 * The Name in the document type declaration must match the element
9616 * type of the root element.
9617 */
9618 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9619 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9620 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009621#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009622
9623 /*
9624 * Check for an Empty Element.
9625 */
9626 if ((RAW == '/') && (NXT(1) == '>')) {
9627 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009628 if (ctxt->sax2) {
9629 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9630 (!ctxt->disableSAX))
9631 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009632#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009633 } else {
9634 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9635 (!ctxt->disableSAX))
9636 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009637#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009638 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009639 namePop(ctxt);
9640 spacePop(ctxt);
9641 if (nsNr != ctxt->nsNr)
9642 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009643 if ( ret != NULL && ctxt->record_info ) {
9644 node_info.end_pos = ctxt->input->consumed +
9645 (CUR_PTR - ctxt->input->base);
9646 node_info.end_line = ctxt->input->line;
9647 node_info.node = ret;
9648 xmlParserAddNodeInfo(ctxt, &node_info);
9649 }
9650 return;
9651 }
9652 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009653 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009654 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009655 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9656 "Couldn't find end of Start Tag %s line %d\n",
9657 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009658
9659 /*
9660 * end of parsing of this node.
9661 */
9662 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009663 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009664 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009665 if (nsNr != ctxt->nsNr)
9666 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009667
9668 /*
9669 * Capture end position and add node
9670 */
9671 if ( ret != NULL && ctxt->record_info ) {
9672 node_info.end_pos = ctxt->input->consumed +
9673 (CUR_PTR - ctxt->input->base);
9674 node_info.end_line = ctxt->input->line;
9675 node_info.node = ret;
9676 xmlParserAddNodeInfo(ctxt, &node_info);
9677 }
9678 return;
9679 }
9680
9681 /*
9682 * Parse the content of the element:
9683 */
9684 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009685 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009686 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009687 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009688 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009689
9690 /*
9691 * end of parsing of this node.
9692 */
9693 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009694 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009695 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009696 if (nsNr != ctxt->nsNr)
9697 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009698 return;
9699 }
9700
9701 /*
9702 * parse the end of tag: '</' should be here.
9703 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009704 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009705 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009706 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009707 }
9708#ifdef LIBXML_SAX1_ENABLED
9709 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009710 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009711#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009712
9713 /*
9714 * Capture end position and add node
9715 */
9716 if ( ret != NULL && ctxt->record_info ) {
9717 node_info.end_pos = ctxt->input->consumed +
9718 (CUR_PTR - ctxt->input->base);
9719 node_info.end_line = ctxt->input->line;
9720 node_info.node = ret;
9721 xmlParserAddNodeInfo(ctxt, &node_info);
9722 }
9723}
9724
9725/**
9726 * xmlParseVersionNum:
9727 * @ctxt: an XML parser context
9728 *
9729 * parse the XML version value.
9730 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009731 * [26] VersionNum ::= '1.' [0-9]+
9732 *
9733 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009734 *
9735 * Returns the string giving the XML version number, or NULL
9736 */
9737xmlChar *
9738xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9739 xmlChar *buf = NULL;
9740 int len = 0;
9741 int size = 10;
9742 xmlChar cur;
9743
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009744 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009745 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009746 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009747 return(NULL);
9748 }
9749 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009750 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009751 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009752 return(NULL);
9753 }
9754 buf[len++] = cur;
9755 NEXT;
9756 cur=CUR;
9757 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009758 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009759 return(NULL);
9760 }
9761 buf[len++] = cur;
9762 NEXT;
9763 cur=CUR;
9764 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009765 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009766 xmlChar *tmp;
9767
Owen Taylor3473f882001-02-23 17:55:21 +00009768 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009769 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9770 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009771 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009772 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009773 return(NULL);
9774 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009775 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009776 }
9777 buf[len++] = cur;
9778 NEXT;
9779 cur=CUR;
9780 }
9781 buf[len] = 0;
9782 return(buf);
9783}
9784
9785/**
9786 * xmlParseVersionInfo:
9787 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009788 *
Owen Taylor3473f882001-02-23 17:55:21 +00009789 * parse the XML version.
9790 *
9791 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009792 *
Owen Taylor3473f882001-02-23 17:55:21 +00009793 * [25] Eq ::= S? '=' S?
9794 *
9795 * Returns the version string, e.g. "1.0"
9796 */
9797
9798xmlChar *
9799xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9800 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009801
Daniel Veillarda07050d2003-10-19 14:46:32 +00009802 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009803 SKIP(7);
9804 SKIP_BLANKS;
9805 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009806 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009807 return(NULL);
9808 }
9809 NEXT;
9810 SKIP_BLANKS;
9811 if (RAW == '"') {
9812 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009813 version = xmlParseVersionNum(ctxt);
9814 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009815 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009816 } else
9817 NEXT;
9818 } else if (RAW == '\''){
9819 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009820 version = xmlParseVersionNum(ctxt);
9821 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009822 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009823 } else
9824 NEXT;
9825 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009826 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009827 }
9828 }
9829 return(version);
9830}
9831
9832/**
9833 * xmlParseEncName:
9834 * @ctxt: an XML parser context
9835 *
9836 * parse the XML encoding name
9837 *
9838 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9839 *
9840 * Returns the encoding name value or NULL
9841 */
9842xmlChar *
9843xmlParseEncName(xmlParserCtxtPtr ctxt) {
9844 xmlChar *buf = NULL;
9845 int len = 0;
9846 int size = 10;
9847 xmlChar cur;
9848
9849 cur = CUR;
9850 if (((cur >= 'a') && (cur <= 'z')) ||
9851 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009852 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009853 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009854 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009855 return(NULL);
9856 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009857
Owen Taylor3473f882001-02-23 17:55:21 +00009858 buf[len++] = cur;
9859 NEXT;
9860 cur = CUR;
9861 while (((cur >= 'a') && (cur <= 'z')) ||
9862 ((cur >= 'A') && (cur <= 'Z')) ||
9863 ((cur >= '0') && (cur <= '9')) ||
9864 (cur == '.') || (cur == '_') ||
9865 (cur == '-')) {
9866 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009867 xmlChar *tmp;
9868
Owen Taylor3473f882001-02-23 17:55:21 +00009869 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009870 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9871 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009872 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009873 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009874 return(NULL);
9875 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009876 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009877 }
9878 buf[len++] = cur;
9879 NEXT;
9880 cur = CUR;
9881 if (cur == 0) {
9882 SHRINK;
9883 GROW;
9884 cur = CUR;
9885 }
9886 }
9887 buf[len] = 0;
9888 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009889 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009890 }
9891 return(buf);
9892}
9893
9894/**
9895 * xmlParseEncodingDecl:
9896 * @ctxt: an XML parser context
9897 *
9898 * parse the XML encoding declaration
9899 *
9900 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9901 *
9902 * this setups the conversion filters.
9903 *
9904 * Returns the encoding value or NULL
9905 */
9906
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009907const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009908xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9909 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009910
9911 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009912 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009913 SKIP(8);
9914 SKIP_BLANKS;
9915 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009916 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009917 return(NULL);
9918 }
9919 NEXT;
9920 SKIP_BLANKS;
9921 if (RAW == '"') {
9922 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009923 encoding = xmlParseEncName(ctxt);
9924 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009925 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009926 } else
9927 NEXT;
9928 } else if (RAW == '\''){
9929 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009930 encoding = xmlParseEncName(ctxt);
9931 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009932 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009933 } else
9934 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009935 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009936 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009937 }
Daniel Veillardc62efc82011-05-16 16:03:50 +08009938
9939 /*
9940 * Non standard parsing, allowing the user to ignore encoding
9941 */
9942 if (ctxt->options & XML_PARSE_IGNORE_ENC)
9943 return(encoding);
9944
Daniel Veillard6b621b82003-08-11 15:03:34 +00009945 /*
9946 * UTF-16 encoding stwich has already taken place at this stage,
9947 * more over the little-endian/big-endian selection is already done
9948 */
9949 if ((encoding != NULL) &&
9950 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9951 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009952 /*
9953 * If no encoding was passed to the parser, that we are
9954 * using UTF-16 and no decoder is present i.e. the
9955 * document is apparently UTF-8 compatible, then raise an
9956 * encoding mismatch fatal error
9957 */
9958 if ((ctxt->encoding == NULL) &&
9959 (ctxt->input->buf != NULL) &&
9960 (ctxt->input->buf->encoder == NULL)) {
9961 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9962 "Document labelled UTF-16 but has UTF-8 content\n");
9963 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009964 if (ctxt->encoding != NULL)
9965 xmlFree((xmlChar *) ctxt->encoding);
9966 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009967 }
9968 /*
9969 * UTF-8 encoding is handled natively
9970 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009971 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009972 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9973 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009974 if (ctxt->encoding != NULL)
9975 xmlFree((xmlChar *) ctxt->encoding);
9976 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009977 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009978 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009979 xmlCharEncodingHandlerPtr handler;
9980
9981 if (ctxt->input->encoding != NULL)
9982 xmlFree((xmlChar *) ctxt->input->encoding);
9983 ctxt->input->encoding = encoding;
9984
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009985 handler = xmlFindCharEncodingHandler((const char *) encoding);
9986 if (handler != NULL) {
9987 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009988 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009989 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009990 "Unsupported encoding %s\n", encoding);
9991 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009992 }
9993 }
9994 }
9995 return(encoding);
9996}
9997
9998/**
9999 * xmlParseSDDecl:
10000 * @ctxt: an XML parser context
10001 *
10002 * parse the XML standalone declaration
10003 *
10004 * [32] SDDecl ::= S 'standalone' Eq
10005 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10006 *
10007 * [ VC: Standalone Document Declaration ]
10008 * TODO The standalone document declaration must have the value "no"
10009 * if any external markup declarations contain declarations of:
10010 * - attributes with default values, if elements to which these
10011 * attributes apply appear in the document without specifications
10012 * of values for these attributes, or
10013 * - entities (other than amp, lt, gt, apos, quot), if references
10014 * to those entities appear in the document, or
10015 * - attributes with values subject to normalization, where the
10016 * attribute appears in the document with a value which will change
10017 * as a result of normalization, or
10018 * - element types with element content, if white space occurs directly
10019 * within any instance of those types.
10020 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010021 * Returns:
10022 * 1 if standalone="yes"
10023 * 0 if standalone="no"
10024 * -2 if standalone attribute is missing or invalid
10025 * (A standalone value of -2 means that the XML declaration was found,
10026 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010027 */
10028
10029int
10030xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010031 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010032
10033 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010034 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010035 SKIP(10);
10036 SKIP_BLANKS;
10037 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010038 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010039 return(standalone);
10040 }
10041 NEXT;
10042 SKIP_BLANKS;
10043 if (RAW == '\''){
10044 NEXT;
10045 if ((RAW == 'n') && (NXT(1) == 'o')) {
10046 standalone = 0;
10047 SKIP(2);
10048 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10049 (NXT(2) == 's')) {
10050 standalone = 1;
10051 SKIP(3);
10052 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010053 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010054 }
10055 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010056 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010057 } else
10058 NEXT;
10059 } else if (RAW == '"'){
10060 NEXT;
10061 if ((RAW == 'n') && (NXT(1) == 'o')) {
10062 standalone = 0;
10063 SKIP(2);
10064 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10065 (NXT(2) == 's')) {
10066 standalone = 1;
10067 SKIP(3);
10068 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010069 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010070 }
10071 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010072 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010073 } else
10074 NEXT;
10075 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010076 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010077 }
10078 }
10079 return(standalone);
10080}
10081
10082/**
10083 * xmlParseXMLDecl:
10084 * @ctxt: an XML parser context
10085 *
10086 * parse an XML declaration header
10087 *
10088 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10089 */
10090
10091void
10092xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10093 xmlChar *version;
10094
10095 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010096 * This value for standalone indicates that the document has an
10097 * XML declaration but it does not have a standalone attribute.
10098 * It will be overwritten later if a standalone attribute is found.
10099 */
10100 ctxt->input->standalone = -2;
10101
10102 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010103 * We know that '<?xml' is here.
10104 */
10105 SKIP(5);
10106
William M. Brack76e95df2003-10-18 16:20:14 +000010107 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010108 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10109 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010110 }
10111 SKIP_BLANKS;
10112
10113 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010114 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010115 */
10116 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010117 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010118 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010119 } else {
10120 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10121 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010122 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010123 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010124 if (ctxt->options & XML_PARSE_OLD10) {
10125 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10126 "Unsupported version '%s'\n",
10127 version);
10128 } else {
10129 if ((version[0] == '1') && ((version[1] == '.'))) {
10130 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10131 "Unsupported version '%s'\n",
10132 version, NULL);
10133 } else {
10134 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10135 "Unsupported version '%s'\n",
10136 version);
10137 }
10138 }
Daniel Veillard19840942001-11-29 16:11:38 +000010139 }
10140 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010141 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010142 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010143 }
Owen Taylor3473f882001-02-23 17:55:21 +000010144
10145 /*
10146 * We may have the encoding declaration
10147 */
William M. Brack76e95df2003-10-18 16:20:14 +000010148 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010149 if ((RAW == '?') && (NXT(1) == '>')) {
10150 SKIP(2);
10151 return;
10152 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010153 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010154 }
10155 xmlParseEncodingDecl(ctxt);
10156 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10157 /*
10158 * The XML REC instructs us to stop parsing right here
10159 */
10160 return;
10161 }
10162
10163 /*
10164 * We may have the standalone status.
10165 */
William M. Brack76e95df2003-10-18 16:20:14 +000010166 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010167 if ((RAW == '?') && (NXT(1) == '>')) {
10168 SKIP(2);
10169 return;
10170 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010171 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010172 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010173
10174 /*
10175 * We can grow the input buffer freely at that point
10176 */
10177 GROW;
10178
Owen Taylor3473f882001-02-23 17:55:21 +000010179 SKIP_BLANKS;
10180 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10181
10182 SKIP_BLANKS;
10183 if ((RAW == '?') && (NXT(1) == '>')) {
10184 SKIP(2);
10185 } else if (RAW == '>') {
10186 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010187 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010188 NEXT;
10189 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010190 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010191 MOVETO_ENDTAG(CUR_PTR);
10192 NEXT;
10193 }
10194}
10195
10196/**
10197 * xmlParseMisc:
10198 * @ctxt: an XML parser context
10199 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010200 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010201 *
10202 * [27] Misc ::= Comment | PI | S
10203 */
10204
10205void
10206xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010207 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010208 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010209 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010210 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010211 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010212 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010213 NEXT;
10214 } else
10215 xmlParseComment(ctxt);
10216 }
10217}
10218
10219/**
10220 * xmlParseDocument:
10221 * @ctxt: an XML parser context
10222 *
10223 * parse an XML document (and build a tree if using the standard SAX
10224 * interface).
10225 *
10226 * [1] document ::= prolog element Misc*
10227 *
10228 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10229 *
10230 * Returns 0, -1 in case of error. the parser context is augmented
10231 * as a result of the parsing.
10232 */
10233
10234int
10235xmlParseDocument(xmlParserCtxtPtr ctxt) {
10236 xmlChar start[4];
10237 xmlCharEncoding enc;
10238
10239 xmlInitParser();
10240
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010241 if ((ctxt == NULL) || (ctxt->input == NULL))
10242 return(-1);
10243
Owen Taylor3473f882001-02-23 17:55:21 +000010244 GROW;
10245
10246 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010247 * SAX: detecting the level.
10248 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010249 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010250
10251 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010252 * SAX: beginning of the document processing.
10253 */
10254 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10255 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10256
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010257 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010258 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010259 /*
10260 * Get the 4 first bytes and decode the charset
10261 * if enc != XML_CHAR_ENCODING_NONE
10262 * plug some encoding conversion routines.
10263 */
10264 start[0] = RAW;
10265 start[1] = NXT(1);
10266 start[2] = NXT(2);
10267 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010268 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010269 if (enc != XML_CHAR_ENCODING_NONE) {
10270 xmlSwitchEncoding(ctxt, enc);
10271 }
Owen Taylor3473f882001-02-23 17:55:21 +000010272 }
10273
10274
10275 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010276 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010277 }
10278
10279 /*
10280 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010281 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010282 * than just the first line, unless the amount of data is really
10283 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010284 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010285 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10286 GROW;
10287 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010288 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010289
10290 /*
10291 * Note that we will switch encoding on the fly.
10292 */
10293 xmlParseXMLDecl(ctxt);
10294 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10295 /*
10296 * The XML REC instructs us to stop parsing right here
10297 */
10298 return(-1);
10299 }
10300 ctxt->standalone = ctxt->input->standalone;
10301 SKIP_BLANKS;
10302 } else {
10303 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10304 }
10305 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10306 ctxt->sax->startDocument(ctxt->userData);
10307
10308 /*
10309 * The Misc part of the Prolog
10310 */
10311 GROW;
10312 xmlParseMisc(ctxt);
10313
10314 /*
10315 * Then possibly doc type declaration(s) and more Misc
10316 * (doctypedecl Misc*)?
10317 */
10318 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010319 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010320
10321 ctxt->inSubset = 1;
10322 xmlParseDocTypeDecl(ctxt);
10323 if (RAW == '[') {
10324 ctxt->instate = XML_PARSER_DTD;
10325 xmlParseInternalSubset(ctxt);
10326 }
10327
10328 /*
10329 * Create and update the external subset.
10330 */
10331 ctxt->inSubset = 2;
10332 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10333 (!ctxt->disableSAX))
10334 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10335 ctxt->extSubSystem, ctxt->extSubURI);
10336 ctxt->inSubset = 0;
10337
Daniel Veillardac4118d2008-01-11 05:27:32 +000010338 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010339
10340 ctxt->instate = XML_PARSER_PROLOG;
10341 xmlParseMisc(ctxt);
10342 }
10343
10344 /*
10345 * Time to start parsing the tree itself
10346 */
10347 GROW;
10348 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010349 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10350 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010351 } else {
10352 ctxt->instate = XML_PARSER_CONTENT;
10353 xmlParseElement(ctxt);
10354 ctxt->instate = XML_PARSER_EPILOG;
10355
10356
10357 /*
10358 * The Misc part at the end
10359 */
10360 xmlParseMisc(ctxt);
10361
Daniel Veillard561b7f82002-03-20 21:55:57 +000010362 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010363 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010364 }
10365 ctxt->instate = XML_PARSER_EOF;
10366 }
10367
10368 /*
10369 * SAX: end of the document processing.
10370 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010371 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010372 ctxt->sax->endDocument(ctxt->userData);
10373
Daniel Veillard5997aca2002-03-18 18:36:20 +000010374 /*
10375 * Remove locally kept entity definitions if the tree was not built
10376 */
10377 if ((ctxt->myDoc != NULL) &&
10378 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10379 xmlFreeDoc(ctxt->myDoc);
10380 ctxt->myDoc = NULL;
10381 }
10382
Daniel Veillardae0765b2008-07-31 19:54:59 +000010383 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10384 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10385 if (ctxt->valid)
10386 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10387 if (ctxt->nsWellFormed)
10388 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10389 if (ctxt->options & XML_PARSE_OLD10)
10390 ctxt->myDoc->properties |= XML_DOC_OLD10;
10391 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010392 if (! ctxt->wellFormed) {
10393 ctxt->valid = 0;
10394 return(-1);
10395 }
Owen Taylor3473f882001-02-23 17:55:21 +000010396 return(0);
10397}
10398
10399/**
10400 * xmlParseExtParsedEnt:
10401 * @ctxt: an XML parser context
10402 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010403 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010404 * An external general parsed entity is well-formed if it matches the
10405 * production labeled extParsedEnt.
10406 *
10407 * [78] extParsedEnt ::= TextDecl? content
10408 *
10409 * Returns 0, -1 in case of error. the parser context is augmented
10410 * as a result of the parsing.
10411 */
10412
10413int
10414xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10415 xmlChar start[4];
10416 xmlCharEncoding enc;
10417
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010418 if ((ctxt == NULL) || (ctxt->input == NULL))
10419 return(-1);
10420
Owen Taylor3473f882001-02-23 17:55:21 +000010421 xmlDefaultSAXHandlerInit();
10422
Daniel Veillard309f81d2003-09-23 09:02:53 +000010423 xmlDetectSAX2(ctxt);
10424
Owen Taylor3473f882001-02-23 17:55:21 +000010425 GROW;
10426
10427 /*
10428 * SAX: beginning of the document processing.
10429 */
10430 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10431 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10432
10433 /*
10434 * Get the 4 first bytes and decode the charset
10435 * if enc != XML_CHAR_ENCODING_NONE
10436 * plug some encoding conversion routines.
10437 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010438 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10439 start[0] = RAW;
10440 start[1] = NXT(1);
10441 start[2] = NXT(2);
10442 start[3] = NXT(3);
10443 enc = xmlDetectCharEncoding(start, 4);
10444 if (enc != XML_CHAR_ENCODING_NONE) {
10445 xmlSwitchEncoding(ctxt, enc);
10446 }
Owen Taylor3473f882001-02-23 17:55:21 +000010447 }
10448
10449
10450 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010451 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010452 }
10453
10454 /*
10455 * Check for the XMLDecl in the Prolog.
10456 */
10457 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010458 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010459
10460 /*
10461 * Note that we will switch encoding on the fly.
10462 */
10463 xmlParseXMLDecl(ctxt);
10464 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10465 /*
10466 * The XML REC instructs us to stop parsing right here
10467 */
10468 return(-1);
10469 }
10470 SKIP_BLANKS;
10471 } else {
10472 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10473 }
10474 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10475 ctxt->sax->startDocument(ctxt->userData);
10476
10477 /*
10478 * Doing validity checking on chunk doesn't make sense
10479 */
10480 ctxt->instate = XML_PARSER_CONTENT;
10481 ctxt->validate = 0;
10482 ctxt->loadsubset = 0;
10483 ctxt->depth = 0;
10484
10485 xmlParseContent(ctxt);
10486
10487 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010488 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010489 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010490 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010491 }
10492
10493 /*
10494 * SAX: end of the document processing.
10495 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010496 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010497 ctxt->sax->endDocument(ctxt->userData);
10498
10499 if (! ctxt->wellFormed) return(-1);
10500 return(0);
10501}
10502
Daniel Veillard73b013f2003-09-30 12:36:01 +000010503#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010504/************************************************************************
10505 * *
10506 * Progressive parsing interfaces *
10507 * *
10508 ************************************************************************/
10509
10510/**
10511 * xmlParseLookupSequence:
10512 * @ctxt: an XML parser context
10513 * @first: the first char to lookup
10514 * @next: the next char to lookup or zero
10515 * @third: the next char to lookup or zero
10516 *
10517 * Try to find if a sequence (first, next, third) or just (first next) or
10518 * (first) is available in the input stream.
10519 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10520 * to avoid rescanning sequences of bytes, it DOES change the state of the
10521 * parser, do not use liberally.
10522 *
10523 * Returns the index to the current parsing point if the full sequence
10524 * is available, -1 otherwise.
10525 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010526static int
Owen Taylor3473f882001-02-23 17:55:21 +000010527xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10528 xmlChar next, xmlChar third) {
10529 int base, len;
10530 xmlParserInputPtr in;
10531 const xmlChar *buf;
10532
10533 in = ctxt->input;
10534 if (in == NULL) return(-1);
10535 base = in->cur - in->base;
10536 if (base < 0) return(-1);
10537 if (ctxt->checkIndex > base)
10538 base = ctxt->checkIndex;
10539 if (in->buf == NULL) {
10540 buf = in->base;
10541 len = in->length;
10542 } else {
10543 buf = in->buf->buffer->content;
10544 len = in->buf->buffer->use;
10545 }
10546 /* take into account the sequence length */
10547 if (third) len -= 2;
10548 else if (next) len --;
10549 for (;base < len;base++) {
10550 if (buf[base] == first) {
10551 if (third != 0) {
10552 if ((buf[base + 1] != next) ||
10553 (buf[base + 2] != third)) continue;
10554 } else if (next != 0) {
10555 if (buf[base + 1] != next) continue;
10556 }
10557 ctxt->checkIndex = 0;
10558#ifdef DEBUG_PUSH
10559 if (next == 0)
10560 xmlGenericError(xmlGenericErrorContext,
10561 "PP: lookup '%c' found at %d\n",
10562 first, base);
10563 else if (third == 0)
10564 xmlGenericError(xmlGenericErrorContext,
10565 "PP: lookup '%c%c' found at %d\n",
10566 first, next, base);
10567 else
10568 xmlGenericError(xmlGenericErrorContext,
10569 "PP: lookup '%c%c%c' found at %d\n",
10570 first, next, third, base);
10571#endif
10572 return(base - (in->cur - in->base));
10573 }
10574 }
10575 ctxt->checkIndex = base;
10576#ifdef DEBUG_PUSH
10577 if (next == 0)
10578 xmlGenericError(xmlGenericErrorContext,
10579 "PP: lookup '%c' failed\n", first);
10580 else if (third == 0)
10581 xmlGenericError(xmlGenericErrorContext,
10582 "PP: lookup '%c%c' failed\n", first, next);
10583 else
10584 xmlGenericError(xmlGenericErrorContext,
10585 "PP: lookup '%c%c%c' failed\n", first, next, third);
10586#endif
10587 return(-1);
10588}
10589
10590/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010591 * xmlParseGetLasts:
10592 * @ctxt: an XML parser context
10593 * @lastlt: pointer to store the last '<' from the input
10594 * @lastgt: pointer to store the last '>' from the input
10595 *
10596 * Lookup the last < and > in the current chunk
10597 */
10598static void
10599xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10600 const xmlChar **lastgt) {
10601 const xmlChar *tmp;
10602
10603 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10604 xmlGenericError(xmlGenericErrorContext,
10605 "Internal error: xmlParseGetLasts\n");
10606 return;
10607 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010608 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010609 tmp = ctxt->input->end;
10610 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010611 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010612 if (tmp < ctxt->input->base) {
10613 *lastlt = NULL;
10614 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010615 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010616 *lastlt = tmp;
10617 tmp++;
10618 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10619 if (*tmp == '\'') {
10620 tmp++;
10621 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10622 if (tmp < ctxt->input->end) tmp++;
10623 } else if (*tmp == '"') {
10624 tmp++;
10625 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10626 if (tmp < ctxt->input->end) tmp++;
10627 } else
10628 tmp++;
10629 }
10630 if (tmp < ctxt->input->end)
10631 *lastgt = tmp;
10632 else {
10633 tmp = *lastlt;
10634 tmp--;
10635 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10636 if (tmp >= ctxt->input->base)
10637 *lastgt = tmp;
10638 else
10639 *lastgt = NULL;
10640 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010641 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010642 } else {
10643 *lastlt = NULL;
10644 *lastgt = NULL;
10645 }
10646}
10647/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010648 * xmlCheckCdataPush:
10649 * @cur: pointer to the bock of characters
10650 * @len: length of the block in bytes
10651 *
10652 * Check that the block of characters is okay as SCdata content [20]
10653 *
10654 * Returns the number of bytes to pass if okay, a negative index where an
10655 * UTF-8 error occured otherwise
10656 */
10657static int
10658xmlCheckCdataPush(const xmlChar *utf, int len) {
10659 int ix;
10660 unsigned char c;
10661 int codepoint;
10662
10663 if ((utf == NULL) || (len <= 0))
10664 return(0);
10665
10666 for (ix = 0; ix < len;) { /* string is 0-terminated */
10667 c = utf[ix];
10668 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10669 if (c >= 0x20)
10670 ix++;
10671 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10672 ix++;
10673 else
10674 return(-ix);
10675 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10676 if (ix + 2 > len) return(ix);
10677 if ((utf[ix+1] & 0xc0 ) != 0x80)
10678 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010679 codepoint = (utf[ix] & 0x1f) << 6;
10680 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010681 if (!xmlIsCharQ(codepoint))
10682 return(-ix);
10683 ix += 2;
10684 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10685 if (ix + 3 > len) return(ix);
10686 if (((utf[ix+1] & 0xc0) != 0x80) ||
10687 ((utf[ix+2] & 0xc0) != 0x80))
10688 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010689 codepoint = (utf[ix] & 0xf) << 12;
10690 codepoint |= (utf[ix+1] & 0x3f) << 6;
10691 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010692 if (!xmlIsCharQ(codepoint))
10693 return(-ix);
10694 ix += 3;
10695 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10696 if (ix + 4 > len) return(ix);
10697 if (((utf[ix+1] & 0xc0) != 0x80) ||
10698 ((utf[ix+2] & 0xc0) != 0x80) ||
10699 ((utf[ix+3] & 0xc0) != 0x80))
10700 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010701 codepoint = (utf[ix] & 0x7) << 18;
10702 codepoint |= (utf[ix+1] & 0x3f) << 12;
10703 codepoint |= (utf[ix+2] & 0x3f) << 6;
10704 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010705 if (!xmlIsCharQ(codepoint))
10706 return(-ix);
10707 ix += 4;
10708 } else /* unknown encoding */
10709 return(-ix);
10710 }
10711 return(ix);
10712}
10713
10714/**
Owen Taylor3473f882001-02-23 17:55:21 +000010715 * xmlParseTryOrFinish:
10716 * @ctxt: an XML parser context
10717 * @terminate: last chunk indicator
10718 *
10719 * Try to progress on parsing
10720 *
10721 * Returns zero if no parsing was possible
10722 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010723static int
Owen Taylor3473f882001-02-23 17:55:21 +000010724xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10725 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010726 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010727 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010728 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010729
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010730 if (ctxt->input == NULL)
10731 return(0);
10732
Owen Taylor3473f882001-02-23 17:55:21 +000010733#ifdef DEBUG_PUSH
10734 switch (ctxt->instate) {
10735 case XML_PARSER_EOF:
10736 xmlGenericError(xmlGenericErrorContext,
10737 "PP: try EOF\n"); break;
10738 case XML_PARSER_START:
10739 xmlGenericError(xmlGenericErrorContext,
10740 "PP: try START\n"); break;
10741 case XML_PARSER_MISC:
10742 xmlGenericError(xmlGenericErrorContext,
10743 "PP: try MISC\n");break;
10744 case XML_PARSER_COMMENT:
10745 xmlGenericError(xmlGenericErrorContext,
10746 "PP: try COMMENT\n");break;
10747 case XML_PARSER_PROLOG:
10748 xmlGenericError(xmlGenericErrorContext,
10749 "PP: try PROLOG\n");break;
10750 case XML_PARSER_START_TAG:
10751 xmlGenericError(xmlGenericErrorContext,
10752 "PP: try START_TAG\n");break;
10753 case XML_PARSER_CONTENT:
10754 xmlGenericError(xmlGenericErrorContext,
10755 "PP: try CONTENT\n");break;
10756 case XML_PARSER_CDATA_SECTION:
10757 xmlGenericError(xmlGenericErrorContext,
10758 "PP: try CDATA_SECTION\n");break;
10759 case XML_PARSER_END_TAG:
10760 xmlGenericError(xmlGenericErrorContext,
10761 "PP: try END_TAG\n");break;
10762 case XML_PARSER_ENTITY_DECL:
10763 xmlGenericError(xmlGenericErrorContext,
10764 "PP: try ENTITY_DECL\n");break;
10765 case XML_PARSER_ENTITY_VALUE:
10766 xmlGenericError(xmlGenericErrorContext,
10767 "PP: try ENTITY_VALUE\n");break;
10768 case XML_PARSER_ATTRIBUTE_VALUE:
10769 xmlGenericError(xmlGenericErrorContext,
10770 "PP: try ATTRIBUTE_VALUE\n");break;
10771 case XML_PARSER_DTD:
10772 xmlGenericError(xmlGenericErrorContext,
10773 "PP: try DTD\n");break;
10774 case XML_PARSER_EPILOG:
10775 xmlGenericError(xmlGenericErrorContext,
10776 "PP: try EPILOG\n");break;
10777 case XML_PARSER_PI:
10778 xmlGenericError(xmlGenericErrorContext,
10779 "PP: try PI\n");break;
10780 case XML_PARSER_IGNORE:
10781 xmlGenericError(xmlGenericErrorContext,
10782 "PP: try IGNORE\n");break;
10783 }
10784#endif
10785
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010786 if ((ctxt->input != NULL) &&
10787 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010788 xmlSHRINK(ctxt);
10789 ctxt->checkIndex = 0;
10790 }
10791 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010792
Daniel Veillarda880b122003-04-21 21:36:41 +000010793 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010794 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010795 return(0);
10796
10797
Owen Taylor3473f882001-02-23 17:55:21 +000010798 /*
10799 * Pop-up of finished entities.
10800 */
10801 while ((RAW == 0) && (ctxt->inputNr > 1))
10802 xmlPopInput(ctxt);
10803
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010804 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010805 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010806 avail = ctxt->input->length -
10807 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010808 else {
10809 /*
10810 * If we are operating on converted input, try to flush
10811 * remainng chars to avoid them stalling in the non-converted
10812 * buffer.
10813 */
10814 if ((ctxt->input->buf->raw != NULL) &&
10815 (ctxt->input->buf->raw->use > 0)) {
10816 int base = ctxt->input->base -
10817 ctxt->input->buf->buffer->content;
10818 int current = ctxt->input->cur - ctxt->input->base;
10819
10820 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10821 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10822 ctxt->input->cur = ctxt->input->base + current;
10823 ctxt->input->end =
10824 &ctxt->input->buf->buffer->content[
10825 ctxt->input->buf->buffer->use];
10826 }
10827 avail = ctxt->input->buf->buffer->use -
10828 (ctxt->input->cur - ctxt->input->base);
10829 }
Owen Taylor3473f882001-02-23 17:55:21 +000010830 if (avail < 1)
10831 goto done;
10832 switch (ctxt->instate) {
10833 case XML_PARSER_EOF:
10834 /*
10835 * Document parsing is done !
10836 */
10837 goto done;
10838 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010839 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10840 xmlChar start[4];
10841 xmlCharEncoding enc;
10842
10843 /*
10844 * Very first chars read from the document flow.
10845 */
10846 if (avail < 4)
10847 goto done;
10848
10849 /*
10850 * Get the 4 first bytes and decode the charset
10851 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010852 * plug some encoding conversion routines,
10853 * else xmlSwitchEncoding will set to (default)
10854 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010855 */
10856 start[0] = RAW;
10857 start[1] = NXT(1);
10858 start[2] = NXT(2);
10859 start[3] = NXT(3);
10860 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010861 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010862 break;
10863 }
Owen Taylor3473f882001-02-23 17:55:21 +000010864
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010865 if (avail < 2)
10866 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010867 cur = ctxt->input->cur[0];
10868 next = ctxt->input->cur[1];
10869 if (cur == 0) {
10870 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10871 ctxt->sax->setDocumentLocator(ctxt->userData,
10872 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010873 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010874 ctxt->instate = XML_PARSER_EOF;
10875#ifdef DEBUG_PUSH
10876 xmlGenericError(xmlGenericErrorContext,
10877 "PP: entering EOF\n");
10878#endif
10879 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10880 ctxt->sax->endDocument(ctxt->userData);
10881 goto done;
10882 }
10883 if ((cur == '<') && (next == '?')) {
10884 /* PI or XML decl */
10885 if (avail < 5) return(ret);
10886 if ((!terminate) &&
10887 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10888 return(ret);
10889 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10890 ctxt->sax->setDocumentLocator(ctxt->userData,
10891 &xmlDefaultSAXLocator);
10892 if ((ctxt->input->cur[2] == 'x') &&
10893 (ctxt->input->cur[3] == 'm') &&
10894 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010895 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010896 ret += 5;
10897#ifdef DEBUG_PUSH
10898 xmlGenericError(xmlGenericErrorContext,
10899 "PP: Parsing XML Decl\n");
10900#endif
10901 xmlParseXMLDecl(ctxt);
10902 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10903 /*
10904 * The XML REC instructs us to stop parsing right
10905 * here
10906 */
10907 ctxt->instate = XML_PARSER_EOF;
10908 return(0);
10909 }
10910 ctxt->standalone = ctxt->input->standalone;
10911 if ((ctxt->encoding == NULL) &&
10912 (ctxt->input->encoding != NULL))
10913 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10914 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10915 (!ctxt->disableSAX))
10916 ctxt->sax->startDocument(ctxt->userData);
10917 ctxt->instate = XML_PARSER_MISC;
10918#ifdef DEBUG_PUSH
10919 xmlGenericError(xmlGenericErrorContext,
10920 "PP: entering MISC\n");
10921#endif
10922 } else {
10923 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10924 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10925 (!ctxt->disableSAX))
10926 ctxt->sax->startDocument(ctxt->userData);
10927 ctxt->instate = XML_PARSER_MISC;
10928#ifdef DEBUG_PUSH
10929 xmlGenericError(xmlGenericErrorContext,
10930 "PP: entering MISC\n");
10931#endif
10932 }
10933 } else {
10934 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10935 ctxt->sax->setDocumentLocator(ctxt->userData,
10936 &xmlDefaultSAXLocator);
10937 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010938 if (ctxt->version == NULL) {
10939 xmlErrMemory(ctxt, NULL);
10940 break;
10941 }
Owen Taylor3473f882001-02-23 17:55:21 +000010942 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10943 (!ctxt->disableSAX))
10944 ctxt->sax->startDocument(ctxt->userData);
10945 ctxt->instate = XML_PARSER_MISC;
10946#ifdef DEBUG_PUSH
10947 xmlGenericError(xmlGenericErrorContext,
10948 "PP: entering MISC\n");
10949#endif
10950 }
10951 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010952 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010953 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010954 const xmlChar *prefix = NULL;
10955 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010956 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010957
10958 if ((avail < 2) && (ctxt->inputNr == 1))
10959 goto done;
10960 cur = ctxt->input->cur[0];
10961 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010962 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010963 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010964 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10965 ctxt->sax->endDocument(ctxt->userData);
10966 goto done;
10967 }
10968 if (!terminate) {
10969 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010970 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010971 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010972 goto done;
10973 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10974 goto done;
10975 }
10976 }
10977 if (ctxt->spaceNr == 0)
10978 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010979 else if (*ctxt->space == -2)
10980 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010981 else
10982 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010983#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010984 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010985#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010986 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010987#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010988 else
10989 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010990#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010991 if (ctxt->instate == XML_PARSER_EOF)
10992 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010993 if (name == NULL) {
10994 spacePop(ctxt);
10995 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010996 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10997 ctxt->sax->endDocument(ctxt->userData);
10998 goto done;
10999 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011000#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011001 /*
11002 * [ VC: Root Element Type ]
11003 * The Name in the document type declaration must match
11004 * the element type of the root element.
11005 */
11006 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11007 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11008 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011009#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011010
11011 /*
11012 * Check for an Empty Element.
11013 */
11014 if ((RAW == '/') && (NXT(1) == '>')) {
11015 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011016
11017 if (ctxt->sax2) {
11018 if ((ctxt->sax != NULL) &&
11019 (ctxt->sax->endElementNs != NULL) &&
11020 (!ctxt->disableSAX))
11021 ctxt->sax->endElementNs(ctxt->userData, name,
11022 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011023 if (ctxt->nsNr - nsNr > 0)
11024 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011025#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011026 } else {
11027 if ((ctxt->sax != NULL) &&
11028 (ctxt->sax->endElement != NULL) &&
11029 (!ctxt->disableSAX))
11030 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011031#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011032 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011033 spacePop(ctxt);
11034 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011035 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011036 } else {
11037 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011038 }
11039 break;
11040 }
11041 if (RAW == '>') {
11042 NEXT;
11043 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011044 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011045 "Couldn't find end of Start Tag %s\n",
11046 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011047 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011048 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011049 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011050 if (ctxt->sax2)
11051 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011052#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011053 else
11054 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011055#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011056
Daniel Veillarda880b122003-04-21 21:36:41 +000011057 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011058 break;
11059 }
11060 case XML_PARSER_CONTENT: {
11061 const xmlChar *test;
11062 unsigned int cons;
11063 if ((avail < 2) && (ctxt->inputNr == 1))
11064 goto done;
11065 cur = ctxt->input->cur[0];
11066 next = ctxt->input->cur[1];
11067
11068 test = CUR_PTR;
11069 cons = ctxt->input->consumed;
11070 if ((cur == '<') && (next == '/')) {
11071 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011072 break;
11073 } else if ((cur == '<') && (next == '?')) {
11074 if ((!terminate) &&
11075 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11076 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011077 xmlParsePI(ctxt);
11078 } else if ((cur == '<') && (next != '!')) {
11079 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011080 break;
11081 } else if ((cur == '<') && (next == '!') &&
11082 (ctxt->input->cur[2] == '-') &&
11083 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011084 int term;
11085
11086 if (avail < 4)
11087 goto done;
11088 ctxt->input->cur += 4;
11089 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11090 ctxt->input->cur -= 4;
11091 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000011092 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011093 xmlParseComment(ctxt);
11094 ctxt->instate = XML_PARSER_CONTENT;
11095 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11096 (ctxt->input->cur[2] == '[') &&
11097 (ctxt->input->cur[3] == 'C') &&
11098 (ctxt->input->cur[4] == 'D') &&
11099 (ctxt->input->cur[5] == 'A') &&
11100 (ctxt->input->cur[6] == 'T') &&
11101 (ctxt->input->cur[7] == 'A') &&
11102 (ctxt->input->cur[8] == '[')) {
11103 SKIP(9);
11104 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011105 break;
11106 } else if ((cur == '<') && (next == '!') &&
11107 (avail < 9)) {
11108 goto done;
11109 } else if (cur == '&') {
11110 if ((!terminate) &&
11111 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11112 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011113 xmlParseReference(ctxt);
11114 } else {
11115 /* TODO Avoid the extra copy, handle directly !!! */
11116 /*
11117 * Goal of the following test is:
11118 * - minimize calls to the SAX 'character' callback
11119 * when they are mergeable
11120 * - handle an problem for isBlank when we only parse
11121 * a sequence of blank chars and the next one is
11122 * not available to check against '<' presence.
11123 * - tries to homogenize the differences in SAX
11124 * callbacks between the push and pull versions
11125 * of the parser.
11126 */
11127 if ((ctxt->inputNr == 1) &&
11128 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11129 if (!terminate) {
11130 if (ctxt->progressive) {
11131 if ((lastlt == NULL) ||
11132 (ctxt->input->cur > lastlt))
11133 goto done;
11134 } else if (xmlParseLookupSequence(ctxt,
11135 '<', 0, 0) < 0) {
11136 goto done;
11137 }
11138 }
11139 }
11140 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011141 xmlParseCharData(ctxt, 0);
11142 }
11143 /*
11144 * Pop-up of finished entities.
11145 */
11146 while ((RAW == 0) && (ctxt->inputNr > 1))
11147 xmlPopInput(ctxt);
11148 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011149 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11150 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011151 ctxt->instate = XML_PARSER_EOF;
11152 break;
11153 }
11154 break;
11155 }
11156 case XML_PARSER_END_TAG:
11157 if (avail < 2)
11158 goto done;
11159 if (!terminate) {
11160 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011161 /* > can be found unescaped in attribute values */
11162 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011163 goto done;
11164 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11165 goto done;
11166 }
11167 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011168 if (ctxt->sax2) {
11169 xmlParseEndTag2(ctxt,
11170 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11171 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011172 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011173 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011174 }
11175#ifdef LIBXML_SAX1_ENABLED
11176 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011177 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011178#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011179 if (ctxt->instate == XML_PARSER_EOF) {
11180 /* Nothing */
11181 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011182 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011183 } else {
11184 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011185 }
11186 break;
11187 case XML_PARSER_CDATA_SECTION: {
11188 /*
11189 * The Push mode need to have the SAX callback for
11190 * cdataBlock merge back contiguous callbacks.
11191 */
11192 int base;
11193
11194 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11195 if (base < 0) {
11196 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011197 int tmp;
11198
11199 tmp = xmlCheckCdataPush(ctxt->input->cur,
11200 XML_PARSER_BIG_BUFFER_SIZE);
11201 if (tmp < 0) {
11202 tmp = -tmp;
11203 ctxt->input->cur += tmp;
11204 goto encoding_error;
11205 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011206 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11207 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011208 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011209 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011210 else if (ctxt->sax->characters != NULL)
11211 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011212 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011213 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011214 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011215 ctxt->checkIndex = 0;
11216 }
11217 goto done;
11218 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011219 int tmp;
11220
11221 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11222 if ((tmp < 0) || (tmp != base)) {
11223 tmp = -tmp;
11224 ctxt->input->cur += tmp;
11225 goto encoding_error;
11226 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011227 if ((ctxt->sax != NULL) && (base == 0) &&
11228 (ctxt->sax->cdataBlock != NULL) &&
11229 (!ctxt->disableSAX)) {
11230 /*
11231 * Special case to provide identical behaviour
11232 * between pull and push parsers on enpty CDATA
11233 * sections
11234 */
11235 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11236 (!strncmp((const char *)&ctxt->input->cur[-9],
11237 "<![CDATA[", 9)))
11238 ctxt->sax->cdataBlock(ctxt->userData,
11239 BAD_CAST "", 0);
11240 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011241 (!ctxt->disableSAX)) {
11242 if (ctxt->sax->cdataBlock != NULL)
11243 ctxt->sax->cdataBlock(ctxt->userData,
11244 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011245 else if (ctxt->sax->characters != NULL)
11246 ctxt->sax->characters(ctxt->userData,
11247 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011248 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011249 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011250 ctxt->checkIndex = 0;
11251 ctxt->instate = XML_PARSER_CONTENT;
11252#ifdef DEBUG_PUSH
11253 xmlGenericError(xmlGenericErrorContext,
11254 "PP: entering CONTENT\n");
11255#endif
11256 }
11257 break;
11258 }
Owen Taylor3473f882001-02-23 17:55:21 +000011259 case XML_PARSER_MISC:
11260 SKIP_BLANKS;
11261 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011262 avail = ctxt->input->length -
11263 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011264 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011265 avail = ctxt->input->buf->buffer->use -
11266 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011267 if (avail < 2)
11268 goto done;
11269 cur = ctxt->input->cur[0];
11270 next = ctxt->input->cur[1];
11271 if ((cur == '<') && (next == '?')) {
11272 if ((!terminate) &&
11273 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11274 goto done;
11275#ifdef DEBUG_PUSH
11276 xmlGenericError(xmlGenericErrorContext,
11277 "PP: Parsing PI\n");
11278#endif
11279 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011280 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011281 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011282 (ctxt->input->cur[2] == '-') &&
11283 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011284 if ((!terminate) &&
11285 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11286 goto done;
11287#ifdef DEBUG_PUSH
11288 xmlGenericError(xmlGenericErrorContext,
11289 "PP: Parsing Comment\n");
11290#endif
11291 xmlParseComment(ctxt);
11292 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011293 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011294 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011295 (ctxt->input->cur[2] == 'D') &&
11296 (ctxt->input->cur[3] == 'O') &&
11297 (ctxt->input->cur[4] == 'C') &&
11298 (ctxt->input->cur[5] == 'T') &&
11299 (ctxt->input->cur[6] == 'Y') &&
11300 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011301 (ctxt->input->cur[8] == 'E')) {
11302 if ((!terminate) &&
11303 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11304 goto done;
11305#ifdef DEBUG_PUSH
11306 xmlGenericError(xmlGenericErrorContext,
11307 "PP: Parsing internal subset\n");
11308#endif
11309 ctxt->inSubset = 1;
11310 xmlParseDocTypeDecl(ctxt);
11311 if (RAW == '[') {
11312 ctxt->instate = XML_PARSER_DTD;
11313#ifdef DEBUG_PUSH
11314 xmlGenericError(xmlGenericErrorContext,
11315 "PP: entering DTD\n");
11316#endif
11317 } else {
11318 /*
11319 * Create and update the external subset.
11320 */
11321 ctxt->inSubset = 2;
11322 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11323 (ctxt->sax->externalSubset != NULL))
11324 ctxt->sax->externalSubset(ctxt->userData,
11325 ctxt->intSubName, ctxt->extSubSystem,
11326 ctxt->extSubURI);
11327 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011328 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011329 ctxt->instate = XML_PARSER_PROLOG;
11330#ifdef DEBUG_PUSH
11331 xmlGenericError(xmlGenericErrorContext,
11332 "PP: entering PROLOG\n");
11333#endif
11334 }
11335 } else if ((cur == '<') && (next == '!') &&
11336 (avail < 9)) {
11337 goto done;
11338 } else {
11339 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011340 ctxt->progressive = 1;
11341 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011342#ifdef DEBUG_PUSH
11343 xmlGenericError(xmlGenericErrorContext,
11344 "PP: entering START_TAG\n");
11345#endif
11346 }
11347 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011348 case XML_PARSER_PROLOG:
11349 SKIP_BLANKS;
11350 if (ctxt->input->buf == NULL)
11351 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11352 else
11353 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11354 if (avail < 2)
11355 goto done;
11356 cur = ctxt->input->cur[0];
11357 next = ctxt->input->cur[1];
11358 if ((cur == '<') && (next == '?')) {
11359 if ((!terminate) &&
11360 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11361 goto done;
11362#ifdef DEBUG_PUSH
11363 xmlGenericError(xmlGenericErrorContext,
11364 "PP: Parsing PI\n");
11365#endif
11366 xmlParsePI(ctxt);
11367 } else if ((cur == '<') && (next == '!') &&
11368 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11369 if ((!terminate) &&
11370 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11371 goto done;
11372#ifdef DEBUG_PUSH
11373 xmlGenericError(xmlGenericErrorContext,
11374 "PP: Parsing Comment\n");
11375#endif
11376 xmlParseComment(ctxt);
11377 ctxt->instate = XML_PARSER_PROLOG;
11378 } else if ((cur == '<') && (next == '!') &&
11379 (avail < 4)) {
11380 goto done;
11381 } else {
11382 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011383 if (ctxt->progressive == 0)
11384 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011385 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011386#ifdef DEBUG_PUSH
11387 xmlGenericError(xmlGenericErrorContext,
11388 "PP: entering START_TAG\n");
11389#endif
11390 }
11391 break;
11392 case XML_PARSER_EPILOG:
11393 SKIP_BLANKS;
11394 if (ctxt->input->buf == NULL)
11395 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11396 else
11397 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11398 if (avail < 2)
11399 goto done;
11400 cur = ctxt->input->cur[0];
11401 next = ctxt->input->cur[1];
11402 if ((cur == '<') && (next == '?')) {
11403 if ((!terminate) &&
11404 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11405 goto done;
11406#ifdef DEBUG_PUSH
11407 xmlGenericError(xmlGenericErrorContext,
11408 "PP: Parsing PI\n");
11409#endif
11410 xmlParsePI(ctxt);
11411 ctxt->instate = XML_PARSER_EPILOG;
11412 } else if ((cur == '<') && (next == '!') &&
11413 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11414 if ((!terminate) &&
11415 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11416 goto done;
11417#ifdef DEBUG_PUSH
11418 xmlGenericError(xmlGenericErrorContext,
11419 "PP: Parsing Comment\n");
11420#endif
11421 xmlParseComment(ctxt);
11422 ctxt->instate = XML_PARSER_EPILOG;
11423 } else if ((cur == '<') && (next == '!') &&
11424 (avail < 4)) {
11425 goto done;
11426 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011427 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011428 ctxt->instate = XML_PARSER_EOF;
11429#ifdef DEBUG_PUSH
11430 xmlGenericError(xmlGenericErrorContext,
11431 "PP: entering EOF\n");
11432#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011433 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011434 ctxt->sax->endDocument(ctxt->userData);
11435 goto done;
11436 }
11437 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011438 case XML_PARSER_DTD: {
11439 /*
11440 * Sorry but progressive parsing of the internal subset
11441 * is not expected to be supported. We first check that
11442 * the full content of the internal subset is available and
11443 * the parsing is launched only at that point.
11444 * Internal subset ends up with "']' S? '>'" in an unescaped
11445 * section and not in a ']]>' sequence which are conditional
11446 * sections (whoever argued to keep that crap in XML deserve
11447 * a place in hell !).
11448 */
11449 int base, i;
11450 xmlChar *buf;
11451 xmlChar quote = 0;
11452
11453 base = ctxt->input->cur - ctxt->input->base;
11454 if (base < 0) return(0);
11455 if (ctxt->checkIndex > base)
11456 base = ctxt->checkIndex;
11457 buf = ctxt->input->buf->buffer->content;
11458 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11459 base++) {
11460 if (quote != 0) {
11461 if (buf[base] == quote)
11462 quote = 0;
11463 continue;
11464 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011465 if ((quote == 0) && (buf[base] == '<')) {
11466 int found = 0;
11467 /* special handling of comments */
11468 if (((unsigned int) base + 4 <
11469 ctxt->input->buf->buffer->use) &&
11470 (buf[base + 1] == '!') &&
11471 (buf[base + 2] == '-') &&
11472 (buf[base + 3] == '-')) {
11473 for (;(unsigned int) base + 3 <
11474 ctxt->input->buf->buffer->use; base++) {
11475 if ((buf[base] == '-') &&
11476 (buf[base + 1] == '-') &&
11477 (buf[base + 2] == '>')) {
11478 found = 1;
11479 base += 2;
11480 break;
11481 }
11482 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011483 if (!found) {
11484#if 0
11485 fprintf(stderr, "unfinished comment\n");
11486#endif
11487 break; /* for */
11488 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011489 continue;
11490 }
11491 }
Owen Taylor3473f882001-02-23 17:55:21 +000011492 if (buf[base] == '"') {
11493 quote = '"';
11494 continue;
11495 }
11496 if (buf[base] == '\'') {
11497 quote = '\'';
11498 continue;
11499 }
11500 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011501#if 0
11502 fprintf(stderr, "%c%c%c%c: ", buf[base],
11503 buf[base + 1], buf[base + 2], buf[base + 3]);
11504#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011505 if ((unsigned int) base +1 >=
11506 ctxt->input->buf->buffer->use)
11507 break;
11508 if (buf[base + 1] == ']') {
11509 /* conditional crap, skip both ']' ! */
11510 base++;
11511 continue;
11512 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011513 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011514 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11515 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011516 if (buf[base + i] == '>') {
11517#if 0
11518 fprintf(stderr, "found\n");
11519#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011520 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011521 }
11522 if (!IS_BLANK_CH(buf[base + i])) {
11523#if 0
11524 fprintf(stderr, "not found\n");
11525#endif
11526 goto not_end_of_int_subset;
11527 }
Owen Taylor3473f882001-02-23 17:55:21 +000011528 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011529#if 0
11530 fprintf(stderr, "end of stream\n");
11531#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011532 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011533
Owen Taylor3473f882001-02-23 17:55:21 +000011534 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011535not_end_of_int_subset:
11536 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011537 }
11538 /*
11539 * We didn't found the end of the Internal subset
11540 */
Owen Taylor3473f882001-02-23 17:55:21 +000011541#ifdef DEBUG_PUSH
11542 if (next == 0)
11543 xmlGenericError(xmlGenericErrorContext,
11544 "PP: lookup of int subset end filed\n");
11545#endif
11546 goto done;
11547
11548found_end_int_subset:
11549 xmlParseInternalSubset(ctxt);
11550 ctxt->inSubset = 2;
11551 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11552 (ctxt->sax->externalSubset != NULL))
11553 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11554 ctxt->extSubSystem, ctxt->extSubURI);
11555 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011556 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011557 ctxt->instate = XML_PARSER_PROLOG;
11558 ctxt->checkIndex = 0;
11559#ifdef DEBUG_PUSH
11560 xmlGenericError(xmlGenericErrorContext,
11561 "PP: entering PROLOG\n");
11562#endif
11563 break;
11564 }
11565 case XML_PARSER_COMMENT:
11566 xmlGenericError(xmlGenericErrorContext,
11567 "PP: internal error, state == COMMENT\n");
11568 ctxt->instate = XML_PARSER_CONTENT;
11569#ifdef DEBUG_PUSH
11570 xmlGenericError(xmlGenericErrorContext,
11571 "PP: entering CONTENT\n");
11572#endif
11573 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011574 case XML_PARSER_IGNORE:
11575 xmlGenericError(xmlGenericErrorContext,
11576 "PP: internal error, state == IGNORE");
11577 ctxt->instate = XML_PARSER_DTD;
11578#ifdef DEBUG_PUSH
11579 xmlGenericError(xmlGenericErrorContext,
11580 "PP: entering DTD\n");
11581#endif
11582 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011583 case XML_PARSER_PI:
11584 xmlGenericError(xmlGenericErrorContext,
11585 "PP: internal error, state == PI\n");
11586 ctxt->instate = XML_PARSER_CONTENT;
11587#ifdef DEBUG_PUSH
11588 xmlGenericError(xmlGenericErrorContext,
11589 "PP: entering CONTENT\n");
11590#endif
11591 break;
11592 case XML_PARSER_ENTITY_DECL:
11593 xmlGenericError(xmlGenericErrorContext,
11594 "PP: internal error, state == ENTITY_DECL\n");
11595 ctxt->instate = XML_PARSER_DTD;
11596#ifdef DEBUG_PUSH
11597 xmlGenericError(xmlGenericErrorContext,
11598 "PP: entering DTD\n");
11599#endif
11600 break;
11601 case XML_PARSER_ENTITY_VALUE:
11602 xmlGenericError(xmlGenericErrorContext,
11603 "PP: internal error, state == ENTITY_VALUE\n");
11604 ctxt->instate = XML_PARSER_CONTENT;
11605#ifdef DEBUG_PUSH
11606 xmlGenericError(xmlGenericErrorContext,
11607 "PP: entering DTD\n");
11608#endif
11609 break;
11610 case XML_PARSER_ATTRIBUTE_VALUE:
11611 xmlGenericError(xmlGenericErrorContext,
11612 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11613 ctxt->instate = XML_PARSER_START_TAG;
11614#ifdef DEBUG_PUSH
11615 xmlGenericError(xmlGenericErrorContext,
11616 "PP: entering START_TAG\n");
11617#endif
11618 break;
11619 case XML_PARSER_SYSTEM_LITERAL:
11620 xmlGenericError(xmlGenericErrorContext,
11621 "PP: internal error, state == SYSTEM_LITERAL\n");
11622 ctxt->instate = XML_PARSER_START_TAG;
11623#ifdef DEBUG_PUSH
11624 xmlGenericError(xmlGenericErrorContext,
11625 "PP: entering START_TAG\n");
11626#endif
11627 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011628 case XML_PARSER_PUBLIC_LITERAL:
11629 xmlGenericError(xmlGenericErrorContext,
11630 "PP: internal error, state == PUBLIC_LITERAL\n");
11631 ctxt->instate = XML_PARSER_START_TAG;
11632#ifdef DEBUG_PUSH
11633 xmlGenericError(xmlGenericErrorContext,
11634 "PP: entering START_TAG\n");
11635#endif
11636 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011637 }
11638 }
11639done:
11640#ifdef DEBUG_PUSH
11641 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11642#endif
11643 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011644encoding_error:
11645 {
11646 char buffer[150];
11647
11648 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11649 ctxt->input->cur[0], ctxt->input->cur[1],
11650 ctxt->input->cur[2], ctxt->input->cur[3]);
11651 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11652 "Input is not proper UTF-8, indicate encoding !\n%s",
11653 BAD_CAST buffer, NULL);
11654 }
11655 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011656}
11657
11658/**
Owen Taylor3473f882001-02-23 17:55:21 +000011659 * xmlParseChunk:
11660 * @ctxt: an XML parser context
11661 * @chunk: an char array
11662 * @size: the size in byte of the chunk
11663 * @terminate: last chunk indicator
11664 *
11665 * Parse a Chunk of memory
11666 *
11667 * Returns zero if no error, the xmlParserErrors otherwise.
11668 */
11669int
11670xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11671 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011672 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011673 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011674
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011675 if (ctxt == NULL)
11676 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011677 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011678 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011679 if (ctxt->instate == XML_PARSER_START)
11680 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011681 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11682 (chunk[size - 1] == '\r')) {
11683 end_in_lf = 1;
11684 size--;
11685 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011686
11687xmldecl_done:
11688
Owen Taylor3473f882001-02-23 17:55:21 +000011689 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11690 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11691 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11692 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011693 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011694
11695 /*
11696 * Specific handling if we autodetected an encoding, we should not
11697 * push more than the first line ... which depend on the encoding
11698 * And only push the rest once the final encoding was detected
11699 */
11700 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11701 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010011702 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011703
11704 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11705 BAD_CAST "UTF-16")) ||
11706 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11707 BAD_CAST "UTF16")))
11708 len = 90;
11709 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11710 BAD_CAST "UCS-4")) ||
11711 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11712 BAD_CAST "UCS4")))
11713 len = 180;
11714
11715 if (ctxt->input->buf->rawconsumed < len)
11716 len -= ctxt->input->buf->rawconsumed;
11717
Raul Hudeaba9716a2010-03-15 10:13:29 +010011718 /*
11719 * Change size for reading the initial declaration only
11720 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11721 * will blindly copy extra bytes from memory.
11722 */
Daniel Veillard60587d62010-11-04 15:16:27 +010011723 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010011724 remain = size - len;
11725 size = len;
11726 } else {
11727 remain = 0;
11728 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011729 }
William M. Bracka3215c72004-07-31 16:24:01 +000011730 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11731 if (res < 0) {
11732 ctxt->errNo = XML_PARSER_EOF;
11733 ctxt->disableSAX = 1;
11734 return (XML_PARSER_EOF);
11735 }
Owen Taylor3473f882001-02-23 17:55:21 +000011736 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11737 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011738 ctxt->input->end =
11739 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011740#ifdef DEBUG_PUSH
11741 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11742#endif
11743
Owen Taylor3473f882001-02-23 17:55:21 +000011744 } else if (ctxt->instate != XML_PARSER_EOF) {
11745 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11746 xmlParserInputBufferPtr in = ctxt->input->buf;
11747 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11748 (in->raw != NULL)) {
11749 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011750
Owen Taylor3473f882001-02-23 17:55:21 +000011751 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11752 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011753 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011754 xmlGenericError(xmlGenericErrorContext,
11755 "xmlParseChunk: encoder error\n");
11756 return(XML_ERR_INVALID_ENCODING);
11757 }
11758 }
11759 }
11760 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011761 if (remain != 0)
11762 xmlParseTryOrFinish(ctxt, 0);
11763 else
11764 xmlParseTryOrFinish(ctxt, terminate);
11765 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11766 return(ctxt->errNo);
11767
11768 if (remain != 0) {
11769 chunk += size;
11770 size = remain;
11771 remain = 0;
11772 goto xmldecl_done;
11773 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011774 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11775 (ctxt->input->buf != NULL)) {
11776 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11777 }
Owen Taylor3473f882001-02-23 17:55:21 +000011778 if (terminate) {
11779 /*
11780 * Check for termination
11781 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011782 int avail = 0;
11783
11784 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011785 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011786 avail = ctxt->input->length -
11787 (ctxt->input->cur - ctxt->input->base);
11788 else
11789 avail = ctxt->input->buf->buffer->use -
11790 (ctxt->input->cur - ctxt->input->base);
11791 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011792
Owen Taylor3473f882001-02-23 17:55:21 +000011793 if ((ctxt->instate != XML_PARSER_EOF) &&
11794 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011795 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011796 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011797 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011798 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011799 }
Owen Taylor3473f882001-02-23 17:55:21 +000011800 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011801 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011802 ctxt->sax->endDocument(ctxt->userData);
11803 }
11804 ctxt->instate = XML_PARSER_EOF;
11805 }
11806 return((xmlParserErrors) ctxt->errNo);
11807}
11808
11809/************************************************************************
11810 * *
11811 * I/O front end functions to the parser *
11812 * *
11813 ************************************************************************/
11814
11815/**
Owen Taylor3473f882001-02-23 17:55:21 +000011816 * xmlCreatePushParserCtxt:
11817 * @sax: a SAX handler
11818 * @user_data: The user data returned on SAX callbacks
11819 * @chunk: a pointer to an array of chars
11820 * @size: number of chars in the array
11821 * @filename: an optional file name or URI
11822 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011823 * Create a parser context for using the XML parser in push mode.
11824 * If @buffer and @size are non-NULL, the data is used to detect
11825 * the encoding. The remaining characters will be parsed so they
11826 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011827 * To allow content encoding detection, @size should be >= 4
11828 * The value of @filename is used for fetching external entities
11829 * and error/warning reports.
11830 *
11831 * Returns the new parser context or NULL
11832 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011833
Owen Taylor3473f882001-02-23 17:55:21 +000011834xmlParserCtxtPtr
11835xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11836 const char *chunk, int size, const char *filename) {
11837 xmlParserCtxtPtr ctxt;
11838 xmlParserInputPtr inputStream;
11839 xmlParserInputBufferPtr buf;
11840 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11841
11842 /*
11843 * plug some encoding conversion routines
11844 */
11845 if ((chunk != NULL) && (size >= 4))
11846 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11847
11848 buf = xmlAllocParserInputBuffer(enc);
11849 if (buf == NULL) return(NULL);
11850
11851 ctxt = xmlNewParserCtxt();
11852 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011853 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011854 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011855 return(NULL);
11856 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011857 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011858 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11859 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011860 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011861 xmlFreeParserInputBuffer(buf);
11862 xmlFreeParserCtxt(ctxt);
11863 return(NULL);
11864 }
Owen Taylor3473f882001-02-23 17:55:21 +000011865 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011866#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011867 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011868#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011869 xmlFree(ctxt->sax);
11870 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11871 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011872 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011873 xmlFreeParserInputBuffer(buf);
11874 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011875 return(NULL);
11876 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011877 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11878 if (sax->initialized == XML_SAX2_MAGIC)
11879 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11880 else
11881 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011882 if (user_data != NULL)
11883 ctxt->userData = user_data;
11884 }
11885 if (filename == NULL) {
11886 ctxt->directory = NULL;
11887 } else {
11888 ctxt->directory = xmlParserGetDirectory(filename);
11889 }
11890
11891 inputStream = xmlNewInputStream(ctxt);
11892 if (inputStream == NULL) {
11893 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011894 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011895 return(NULL);
11896 }
11897
11898 if (filename == NULL)
11899 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011900 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011901 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011902 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011903 if (inputStream->filename == NULL) {
11904 xmlFreeParserCtxt(ctxt);
11905 xmlFreeParserInputBuffer(buf);
11906 return(NULL);
11907 }
11908 }
Owen Taylor3473f882001-02-23 17:55:21 +000011909 inputStream->buf = buf;
11910 inputStream->base = inputStream->buf->buffer->content;
11911 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011912 inputStream->end =
11913 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011914
11915 inputPush(ctxt, inputStream);
11916
William M. Brack3a1cd212005-02-11 14:35:54 +000011917 /*
11918 * If the caller didn't provide an initial 'chunk' for determining
11919 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11920 * that it can be automatically determined later
11921 */
11922 if ((size == 0) || (chunk == NULL)) {
11923 ctxt->charset = XML_CHAR_ENCODING_NONE;
11924 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011925 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11926 int cur = ctxt->input->cur - ctxt->input->base;
11927
Owen Taylor3473f882001-02-23 17:55:21 +000011928 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011929
11930 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11931 ctxt->input->cur = ctxt->input->base + cur;
11932 ctxt->input->end =
11933 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011934#ifdef DEBUG_PUSH
11935 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11936#endif
11937 }
11938
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011939 if (enc != XML_CHAR_ENCODING_NONE) {
11940 xmlSwitchEncoding(ctxt, enc);
11941 }
11942
Owen Taylor3473f882001-02-23 17:55:21 +000011943 return(ctxt);
11944}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011945#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011946
11947/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011948 * xmlStopParser:
11949 * @ctxt: an XML parser context
11950 *
11951 * Blocks further parser processing
11952 */
11953void
11954xmlStopParser(xmlParserCtxtPtr ctxt) {
11955 if (ctxt == NULL)
11956 return;
11957 ctxt->instate = XML_PARSER_EOF;
11958 ctxt->disableSAX = 1;
11959 if (ctxt->input != NULL) {
11960 ctxt->input->cur = BAD_CAST"";
11961 ctxt->input->base = ctxt->input->cur;
11962 }
11963}
11964
11965/**
Owen Taylor3473f882001-02-23 17:55:21 +000011966 * xmlCreateIOParserCtxt:
11967 * @sax: a SAX handler
11968 * @user_data: The user data returned on SAX callbacks
11969 * @ioread: an I/O read function
11970 * @ioclose: an I/O close function
11971 * @ioctx: an I/O handler
11972 * @enc: the charset encoding if known
11973 *
11974 * Create a parser context for using the XML parser with an existing
11975 * I/O stream
11976 *
11977 * Returns the new parser context or NULL
11978 */
11979xmlParserCtxtPtr
11980xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11981 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11982 void *ioctx, xmlCharEncoding enc) {
11983 xmlParserCtxtPtr ctxt;
11984 xmlParserInputPtr inputStream;
11985 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011986
11987 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011988
11989 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11990 if (buf == NULL) return(NULL);
11991
11992 ctxt = xmlNewParserCtxt();
11993 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011994 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011995 return(NULL);
11996 }
11997 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011998#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011999 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012000#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012001 xmlFree(ctxt->sax);
12002 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12003 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012004 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012005 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012006 return(NULL);
12007 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012008 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12009 if (sax->initialized == XML_SAX2_MAGIC)
12010 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12011 else
12012 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012013 if (user_data != NULL)
12014 ctxt->userData = user_data;
12015 }
12016
12017 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12018 if (inputStream == NULL) {
12019 xmlFreeParserCtxt(ctxt);
12020 return(NULL);
12021 }
12022 inputPush(ctxt, inputStream);
12023
12024 return(ctxt);
12025}
12026
Daniel Veillard4432df22003-09-28 18:58:27 +000012027#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012028/************************************************************************
12029 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012030 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012031 * *
12032 ************************************************************************/
12033
12034/**
12035 * xmlIOParseDTD:
12036 * @sax: the SAX handler block or NULL
12037 * @input: an Input Buffer
12038 * @enc: the charset encoding if known
12039 *
12040 * Load and parse a DTD
12041 *
12042 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012043 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012044 */
12045
12046xmlDtdPtr
12047xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12048 xmlCharEncoding enc) {
12049 xmlDtdPtr ret = NULL;
12050 xmlParserCtxtPtr ctxt;
12051 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012052 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012053
12054 if (input == NULL)
12055 return(NULL);
12056
12057 ctxt = xmlNewParserCtxt();
12058 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012059 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012060 return(NULL);
12061 }
12062
12063 /*
12064 * Set-up the SAX context
12065 */
12066 if (sax != NULL) {
12067 if (ctxt->sax != NULL)
12068 xmlFree(ctxt->sax);
12069 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012070 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012071 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012072 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012073
12074 /*
12075 * generate a parser input from the I/O handler
12076 */
12077
Daniel Veillard43caefb2003-12-07 19:32:22 +000012078 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012079 if (pinput == NULL) {
12080 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012081 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012082 xmlFreeParserCtxt(ctxt);
12083 return(NULL);
12084 }
12085
12086 /*
12087 * plug some encoding conversion routines here.
12088 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012089 if (xmlPushInput(ctxt, pinput) < 0) {
12090 if (sax != NULL) ctxt->sax = NULL;
12091 xmlFreeParserCtxt(ctxt);
12092 return(NULL);
12093 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012094 if (enc != XML_CHAR_ENCODING_NONE) {
12095 xmlSwitchEncoding(ctxt, enc);
12096 }
Owen Taylor3473f882001-02-23 17:55:21 +000012097
12098 pinput->filename = NULL;
12099 pinput->line = 1;
12100 pinput->col = 1;
12101 pinput->base = ctxt->input->cur;
12102 pinput->cur = ctxt->input->cur;
12103 pinput->free = NULL;
12104
12105 /*
12106 * let's parse that entity knowing it's an external subset.
12107 */
12108 ctxt->inSubset = 2;
12109 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012110 if (ctxt->myDoc == NULL) {
12111 xmlErrMemory(ctxt, "New Doc failed");
12112 return(NULL);
12113 }
12114 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012115 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12116 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012117
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012118 if ((enc == XML_CHAR_ENCODING_NONE) &&
12119 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000012120 /*
12121 * Get the 4 first bytes and decode the charset
12122 * if enc != XML_CHAR_ENCODING_NONE
12123 * plug some encoding conversion routines.
12124 */
12125 start[0] = RAW;
12126 start[1] = NXT(1);
12127 start[2] = NXT(2);
12128 start[3] = NXT(3);
12129 enc = xmlDetectCharEncoding(start, 4);
12130 if (enc != XML_CHAR_ENCODING_NONE) {
12131 xmlSwitchEncoding(ctxt, enc);
12132 }
12133 }
12134
Owen Taylor3473f882001-02-23 17:55:21 +000012135 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12136
12137 if (ctxt->myDoc != NULL) {
12138 if (ctxt->wellFormed) {
12139 ret = ctxt->myDoc->extSubset;
12140 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012141 if (ret != NULL) {
12142 xmlNodePtr tmp;
12143
12144 ret->doc = NULL;
12145 tmp = ret->children;
12146 while (tmp != NULL) {
12147 tmp->doc = NULL;
12148 tmp = tmp->next;
12149 }
12150 }
Owen Taylor3473f882001-02-23 17:55:21 +000012151 } else {
12152 ret = NULL;
12153 }
12154 xmlFreeDoc(ctxt->myDoc);
12155 ctxt->myDoc = NULL;
12156 }
12157 if (sax != NULL) ctxt->sax = NULL;
12158 xmlFreeParserCtxt(ctxt);
12159
12160 return(ret);
12161}
12162
12163/**
12164 * xmlSAXParseDTD:
12165 * @sax: the SAX handler block
12166 * @ExternalID: a NAME* containing the External ID of the DTD
12167 * @SystemID: a NAME* containing the URL to the DTD
12168 *
12169 * Load and parse an external subset.
12170 *
12171 * Returns the resulting xmlDtdPtr or NULL in case of error.
12172 */
12173
12174xmlDtdPtr
12175xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12176 const xmlChar *SystemID) {
12177 xmlDtdPtr ret = NULL;
12178 xmlParserCtxtPtr ctxt;
12179 xmlParserInputPtr input = NULL;
12180 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012181 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012182
12183 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12184
12185 ctxt = xmlNewParserCtxt();
12186 if (ctxt == NULL) {
12187 return(NULL);
12188 }
12189
12190 /*
12191 * Set-up the SAX context
12192 */
12193 if (sax != NULL) {
12194 if (ctxt->sax != NULL)
12195 xmlFree(ctxt->sax);
12196 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012197 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012198 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012199
12200 /*
12201 * Canonicalise the system ID
12202 */
12203 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012204 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012205 xmlFreeParserCtxt(ctxt);
12206 return(NULL);
12207 }
Owen Taylor3473f882001-02-23 17:55:21 +000012208
12209 /*
12210 * Ask the Entity resolver to load the damn thing
12211 */
12212
12213 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012214 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12215 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012216 if (input == NULL) {
12217 if (sax != NULL) ctxt->sax = NULL;
12218 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012219 if (systemIdCanonic != NULL)
12220 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012221 return(NULL);
12222 }
12223
12224 /*
12225 * plug some encoding conversion routines here.
12226 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012227 if (xmlPushInput(ctxt, input) < 0) {
12228 if (sax != NULL) ctxt->sax = NULL;
12229 xmlFreeParserCtxt(ctxt);
12230 if (systemIdCanonic != NULL)
12231 xmlFree(systemIdCanonic);
12232 return(NULL);
12233 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012234 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12235 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12236 xmlSwitchEncoding(ctxt, enc);
12237 }
Owen Taylor3473f882001-02-23 17:55:21 +000012238
12239 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012240 input->filename = (char *) systemIdCanonic;
12241 else
12242 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012243 input->line = 1;
12244 input->col = 1;
12245 input->base = ctxt->input->cur;
12246 input->cur = ctxt->input->cur;
12247 input->free = NULL;
12248
12249 /*
12250 * let's parse that entity knowing it's an external subset.
12251 */
12252 ctxt->inSubset = 2;
12253 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012254 if (ctxt->myDoc == NULL) {
12255 xmlErrMemory(ctxt, "New Doc failed");
12256 if (sax != NULL) ctxt->sax = NULL;
12257 xmlFreeParserCtxt(ctxt);
12258 return(NULL);
12259 }
12260 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012261 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12262 ExternalID, SystemID);
12263 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12264
12265 if (ctxt->myDoc != NULL) {
12266 if (ctxt->wellFormed) {
12267 ret = ctxt->myDoc->extSubset;
12268 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012269 if (ret != NULL) {
12270 xmlNodePtr tmp;
12271
12272 ret->doc = NULL;
12273 tmp = ret->children;
12274 while (tmp != NULL) {
12275 tmp->doc = NULL;
12276 tmp = tmp->next;
12277 }
12278 }
Owen Taylor3473f882001-02-23 17:55:21 +000012279 } else {
12280 ret = NULL;
12281 }
12282 xmlFreeDoc(ctxt->myDoc);
12283 ctxt->myDoc = NULL;
12284 }
12285 if (sax != NULL) ctxt->sax = NULL;
12286 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012287
Owen Taylor3473f882001-02-23 17:55:21 +000012288 return(ret);
12289}
12290
Daniel Veillard4432df22003-09-28 18:58:27 +000012291
Owen Taylor3473f882001-02-23 17:55:21 +000012292/**
12293 * xmlParseDTD:
12294 * @ExternalID: a NAME* containing the External ID of the DTD
12295 * @SystemID: a NAME* containing the URL to the DTD
12296 *
12297 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012298 *
Owen Taylor3473f882001-02-23 17:55:21 +000012299 * Returns the resulting xmlDtdPtr or NULL in case of error.
12300 */
12301
12302xmlDtdPtr
12303xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12304 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12305}
Daniel Veillard4432df22003-09-28 18:58:27 +000012306#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012307
12308/************************************************************************
12309 * *
12310 * Front ends when parsing an Entity *
12311 * *
12312 ************************************************************************/
12313
12314/**
Owen Taylor3473f882001-02-23 17:55:21 +000012315 * xmlParseCtxtExternalEntity:
12316 * @ctx: the existing parsing context
12317 * @URL: the URL for the entity to load
12318 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012319 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012320 *
12321 * Parse an external general entity within an existing parsing context
12322 * An external general parsed entity is well-formed if it matches the
12323 * production labeled extParsedEnt.
12324 *
12325 * [78] extParsedEnt ::= TextDecl? content
12326 *
12327 * Returns 0 if the entity is well formed, -1 in case of args problem and
12328 * the parser error code otherwise
12329 */
12330
12331int
12332xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012333 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012334 xmlParserCtxtPtr ctxt;
12335 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012336 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012337 xmlSAXHandlerPtr oldsax = NULL;
12338 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012339 xmlChar start[4];
12340 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012341
Daniel Veillardce682bc2004-11-05 17:22:25 +000012342 if (ctx == NULL) return(-1);
12343
Daniel Veillard0161e632008-08-28 15:36:32 +000012344 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12345 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012346 return(XML_ERR_ENTITY_LOOP);
12347 }
12348
Daniel Veillardcda96922001-08-21 10:56:31 +000012349 if (lst != NULL)
12350 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012351 if ((URL == NULL) && (ID == NULL))
12352 return(-1);
12353 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12354 return(-1);
12355
Rob Richards798743a2009-06-19 13:54:25 -040012356 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012357 if (ctxt == NULL) {
12358 return(-1);
12359 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012360
Owen Taylor3473f882001-02-23 17:55:21 +000012361 oldsax = ctxt->sax;
12362 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012363 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012364 newDoc = xmlNewDoc(BAD_CAST "1.0");
12365 if (newDoc == NULL) {
12366 xmlFreeParserCtxt(ctxt);
12367 return(-1);
12368 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012369 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012370 if (ctx->myDoc->dict) {
12371 newDoc->dict = ctx->myDoc->dict;
12372 xmlDictReference(newDoc->dict);
12373 }
Owen Taylor3473f882001-02-23 17:55:21 +000012374 if (ctx->myDoc != NULL) {
12375 newDoc->intSubset = ctx->myDoc->intSubset;
12376 newDoc->extSubset = ctx->myDoc->extSubset;
12377 }
12378 if (ctx->myDoc->URL != NULL) {
12379 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12380 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012381 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12382 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012383 ctxt->sax = oldsax;
12384 xmlFreeParserCtxt(ctxt);
12385 newDoc->intSubset = NULL;
12386 newDoc->extSubset = NULL;
12387 xmlFreeDoc(newDoc);
12388 return(-1);
12389 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012390 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012391 nodePush(ctxt, newDoc->children);
12392 if (ctx->myDoc == NULL) {
12393 ctxt->myDoc = newDoc;
12394 } else {
12395 ctxt->myDoc = ctx->myDoc;
12396 newDoc->children->doc = ctx->myDoc;
12397 }
12398
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012399 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012400 * Get the 4 first bytes and decode the charset
12401 * if enc != XML_CHAR_ENCODING_NONE
12402 * plug some encoding conversion routines.
12403 */
12404 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012405 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12406 start[0] = RAW;
12407 start[1] = NXT(1);
12408 start[2] = NXT(2);
12409 start[3] = NXT(3);
12410 enc = xmlDetectCharEncoding(start, 4);
12411 if (enc != XML_CHAR_ENCODING_NONE) {
12412 xmlSwitchEncoding(ctxt, enc);
12413 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012414 }
12415
Owen Taylor3473f882001-02-23 17:55:21 +000012416 /*
12417 * Parse a possible text declaration first
12418 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012419 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012420 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012421 /*
12422 * An XML-1.0 document can't reference an entity not XML-1.0
12423 */
12424 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12425 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12426 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12427 "Version mismatch between document and entity\n");
12428 }
Owen Taylor3473f882001-02-23 17:55:21 +000012429 }
12430
12431 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012432 * If the user provided its own SAX callbacks then reuse the
12433 * useData callback field, otherwise the expected setup in a
12434 * DOM builder is to have userData == ctxt
12435 */
12436 if (ctx->userData == ctx)
12437 ctxt->userData = ctxt;
12438 else
12439 ctxt->userData = ctx->userData;
12440
12441 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012442 * Doing validity checking on chunk doesn't make sense
12443 */
12444 ctxt->instate = XML_PARSER_CONTENT;
12445 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012446 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012447 ctxt->loadsubset = ctx->loadsubset;
12448 ctxt->depth = ctx->depth + 1;
12449 ctxt->replaceEntities = ctx->replaceEntities;
12450 if (ctxt->validate) {
12451 ctxt->vctxt.error = ctx->vctxt.error;
12452 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012453 } else {
12454 ctxt->vctxt.error = NULL;
12455 ctxt->vctxt.warning = NULL;
12456 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012457 ctxt->vctxt.nodeTab = NULL;
12458 ctxt->vctxt.nodeNr = 0;
12459 ctxt->vctxt.nodeMax = 0;
12460 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012461 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12462 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012463 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12464 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12465 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012466 ctxt->dictNames = ctx->dictNames;
12467 ctxt->attsDefault = ctx->attsDefault;
12468 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012469 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012470
12471 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012472
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012473 ctx->validate = ctxt->validate;
12474 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012475 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012476 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012477 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012478 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012479 }
12480 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012481 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012482 }
12483
12484 if (!ctxt->wellFormed) {
12485 if (ctxt->errNo == 0)
12486 ret = 1;
12487 else
12488 ret = ctxt->errNo;
12489 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012490 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012491 xmlNodePtr cur;
12492
12493 /*
12494 * Return the newly created nodeset after unlinking it from
12495 * they pseudo parent.
12496 */
12497 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012498 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012499 while (cur != NULL) {
12500 cur->parent = NULL;
12501 cur = cur->next;
12502 }
12503 newDoc->children->children = NULL;
12504 }
12505 ret = 0;
12506 }
12507 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012508 ctxt->dict = NULL;
12509 ctxt->attsDefault = NULL;
12510 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012511 xmlFreeParserCtxt(ctxt);
12512 newDoc->intSubset = NULL;
12513 newDoc->extSubset = NULL;
12514 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012515
Owen Taylor3473f882001-02-23 17:55:21 +000012516 return(ret);
12517}
12518
12519/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012520 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012521 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012522 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012523 * @sax: the SAX handler bloc (possibly NULL)
12524 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12525 * @depth: Used for loop detection, use 0
12526 * @URL: the URL for the entity to load
12527 * @ID: the System ID for the entity to load
12528 * @list: the return value for the set of parsed nodes
12529 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012530 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012531 *
12532 * Returns 0 if the entity is well formed, -1 in case of args problem and
12533 * the parser error code otherwise
12534 */
12535
Daniel Veillard7d515752003-09-26 19:12:37 +000012536static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012537xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12538 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012539 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012540 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012541 xmlParserCtxtPtr ctxt;
12542 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012543 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012544 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012545 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012546 xmlChar start[4];
12547 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012548
Daniel Veillard0161e632008-08-28 15:36:32 +000012549 if (((depth > 40) &&
12550 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12551 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012552 return(XML_ERR_ENTITY_LOOP);
12553 }
12554
Owen Taylor3473f882001-02-23 17:55:21 +000012555 if (list != NULL)
12556 *list = NULL;
12557 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012558 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012559 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012560 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012561
12562
Rob Richards9c0aa472009-03-26 18:10:19 +000012563 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012564 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012565 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012566 if (oldctxt != NULL) {
12567 ctxt->_private = oldctxt->_private;
12568 ctxt->loadsubset = oldctxt->loadsubset;
12569 ctxt->validate = oldctxt->validate;
12570 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012571 ctxt->record_info = oldctxt->record_info;
12572 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12573 ctxt->node_seq.length = oldctxt->node_seq.length;
12574 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012575 } else {
12576 /*
12577 * Doing validity checking on chunk without context
12578 * doesn't make sense
12579 */
12580 ctxt->_private = NULL;
12581 ctxt->validate = 0;
12582 ctxt->external = 2;
12583 ctxt->loadsubset = 0;
12584 }
Owen Taylor3473f882001-02-23 17:55:21 +000012585 if (sax != NULL) {
12586 oldsax = ctxt->sax;
12587 ctxt->sax = sax;
12588 if (user_data != NULL)
12589 ctxt->userData = user_data;
12590 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012591 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012592 newDoc = xmlNewDoc(BAD_CAST "1.0");
12593 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012594 ctxt->node_seq.maximum = 0;
12595 ctxt->node_seq.length = 0;
12596 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012597 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012598 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012599 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012600 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012601 newDoc->intSubset = doc->intSubset;
12602 newDoc->extSubset = doc->extSubset;
12603 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012604 xmlDictReference(newDoc->dict);
12605
Owen Taylor3473f882001-02-23 17:55:21 +000012606 if (doc->URL != NULL) {
12607 newDoc->URL = xmlStrdup(doc->URL);
12608 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012609 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12610 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012611 if (sax != NULL)
12612 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012613 ctxt->node_seq.maximum = 0;
12614 ctxt->node_seq.length = 0;
12615 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012616 xmlFreeParserCtxt(ctxt);
12617 newDoc->intSubset = NULL;
12618 newDoc->extSubset = NULL;
12619 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012620 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012621 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012622 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012623 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012624 ctxt->myDoc = doc;
12625 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012626
Daniel Veillard0161e632008-08-28 15:36:32 +000012627 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012628 * Get the 4 first bytes and decode the charset
12629 * if enc != XML_CHAR_ENCODING_NONE
12630 * plug some encoding conversion routines.
12631 */
12632 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012633 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12634 start[0] = RAW;
12635 start[1] = NXT(1);
12636 start[2] = NXT(2);
12637 start[3] = NXT(3);
12638 enc = xmlDetectCharEncoding(start, 4);
12639 if (enc != XML_CHAR_ENCODING_NONE) {
12640 xmlSwitchEncoding(ctxt, enc);
12641 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012642 }
12643
Owen Taylor3473f882001-02-23 17:55:21 +000012644 /*
12645 * Parse a possible text declaration first
12646 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012647 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012648 xmlParseTextDecl(ctxt);
12649 }
12650
Owen Taylor3473f882001-02-23 17:55:21 +000012651 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012652 ctxt->depth = depth;
12653
12654 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012655
Daniel Veillard561b7f82002-03-20 21:55:57 +000012656 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012657 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012658 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012659 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012660 }
12661 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012662 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012663 }
12664
12665 if (!ctxt->wellFormed) {
12666 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012667 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012668 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012669 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012670 } else {
12671 if (list != NULL) {
12672 xmlNodePtr cur;
12673
12674 /*
12675 * Return the newly created nodeset after unlinking it from
12676 * they pseudo parent.
12677 */
12678 cur = newDoc->children->children;
12679 *list = cur;
12680 while (cur != NULL) {
12681 cur->parent = NULL;
12682 cur = cur->next;
12683 }
12684 newDoc->children->children = NULL;
12685 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012686 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012687 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012688
12689 /*
12690 * Record in the parent context the number of entities replacement
12691 * done when parsing that reference.
12692 */
Daniel Veillard76d36452009-09-07 11:19:33 +020012693 if (oldctxt != NULL)
12694 oldctxt->nbentities += ctxt->nbentities;
12695
Daniel Veillard0161e632008-08-28 15:36:32 +000012696 /*
12697 * Also record the size of the entity parsed
12698 */
12699 if (ctxt->input != NULL) {
12700 oldctxt->sizeentities += ctxt->input->consumed;
12701 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12702 }
12703 /*
12704 * And record the last error if any
12705 */
12706 if (ctxt->lastError.code != XML_ERR_OK)
12707 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12708
Owen Taylor3473f882001-02-23 17:55:21 +000012709 if (sax != NULL)
12710 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012711 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12712 oldctxt->node_seq.length = ctxt->node_seq.length;
12713 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012714 ctxt->node_seq.maximum = 0;
12715 ctxt->node_seq.length = 0;
12716 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012717 xmlFreeParserCtxt(ctxt);
12718 newDoc->intSubset = NULL;
12719 newDoc->extSubset = NULL;
12720 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012721
Owen Taylor3473f882001-02-23 17:55:21 +000012722 return(ret);
12723}
12724
Daniel Veillard81273902003-09-30 00:43:48 +000012725#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012726/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012727 * xmlParseExternalEntity:
12728 * @doc: the document the chunk pertains to
12729 * @sax: the SAX handler bloc (possibly NULL)
12730 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12731 * @depth: Used for loop detection, use 0
12732 * @URL: the URL for the entity to load
12733 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012734 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012735 *
12736 * Parse an external general entity
12737 * An external general parsed entity is well-formed if it matches the
12738 * production labeled extParsedEnt.
12739 *
12740 * [78] extParsedEnt ::= TextDecl? content
12741 *
12742 * Returns 0 if the entity is well formed, -1 in case of args problem and
12743 * the parser error code otherwise
12744 */
12745
12746int
12747xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012748 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012749 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012750 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012751}
12752
12753/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012754 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012755 * @doc: the document the chunk pertains to
12756 * @sax: the SAX handler bloc (possibly NULL)
12757 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12758 * @depth: Used for loop detection, use 0
12759 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012760 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012761 *
12762 * Parse a well-balanced chunk of an XML document
12763 * called by the parser
12764 * The allowed sequence for the Well Balanced Chunk is the one defined by
12765 * the content production in the XML grammar:
12766 *
12767 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12768 *
12769 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12770 * the parser error code otherwise
12771 */
12772
12773int
12774xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012775 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012776 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12777 depth, string, lst, 0 );
12778}
Daniel Veillard81273902003-09-30 00:43:48 +000012779#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012780
12781/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012782 * xmlParseBalancedChunkMemoryInternal:
12783 * @oldctxt: the existing parsing context
12784 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12785 * @user_data: the user data field for the parser context
12786 * @lst: the return value for the set of parsed nodes
12787 *
12788 *
12789 * Parse a well-balanced chunk of an XML document
12790 * called by the parser
12791 * The allowed sequence for the Well Balanced Chunk is the one defined by
12792 * the content production in the XML grammar:
12793 *
12794 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12795 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012796 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12797 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012798 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012799 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012800 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012801 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012802static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012803xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12804 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12805 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012806 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012807 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012808 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012809 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012810 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012811 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012812 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012813#ifdef SAX2
12814 int i;
12815#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012816
Daniel Veillard0161e632008-08-28 15:36:32 +000012817 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12818 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012819 return(XML_ERR_ENTITY_LOOP);
12820 }
12821
12822
12823 if (lst != NULL)
12824 *lst = NULL;
12825 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012826 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012827
12828 size = xmlStrlen(string);
12829
12830 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012831 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012832 if (user_data != NULL)
12833 ctxt->userData = user_data;
12834 else
12835 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012836 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12837 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012838 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12839 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12840 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012841
Daniel Veillard74eaec12009-08-26 15:57:20 +020012842#ifdef SAX2
12843 /* propagate namespaces down the entity */
12844 for (i = 0;i < oldctxt->nsNr;i += 2) {
12845 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12846 }
12847#endif
12848
Daniel Veillard328f48c2002-11-15 15:24:34 +000012849 oldsax = ctxt->sax;
12850 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012851 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012852 ctxt->replaceEntities = oldctxt->replaceEntities;
12853 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012854
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012855 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012856 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012857 newDoc = xmlNewDoc(BAD_CAST "1.0");
12858 if (newDoc == NULL) {
12859 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012860 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012861 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012862 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012863 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012864 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012865 newDoc->dict = ctxt->dict;
12866 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012867 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012868 } else {
12869 ctxt->myDoc = oldctxt->myDoc;
12870 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012871 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012872 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012873 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12874 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012875 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012876 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012877 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012878 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012879 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012880 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012881 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012882 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012883 ctxt->myDoc->children = NULL;
12884 ctxt->myDoc->last = NULL;
12885 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012886 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012887 ctxt->instate = XML_PARSER_CONTENT;
12888 ctxt->depth = oldctxt->depth + 1;
12889
Daniel Veillard328f48c2002-11-15 15:24:34 +000012890 ctxt->validate = 0;
12891 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012892 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12893 /*
12894 * ID/IDREF registration will be done in xmlValidateElement below
12895 */
12896 ctxt->loadsubset |= XML_SKIP_IDS;
12897 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012898 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012899 ctxt->attsDefault = oldctxt->attsDefault;
12900 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012901
Daniel Veillard68e9e742002-11-16 15:35:11 +000012902 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012903 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012904 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012905 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012906 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012907 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012908 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012909 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012910 }
12911
12912 if (!ctxt->wellFormed) {
12913 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012914 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012915 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012916 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012917 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012918 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012919 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012920
William M. Brack7b9154b2003-09-27 19:23:50 +000012921 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012922 xmlNodePtr cur;
12923
12924 /*
12925 * Return the newly created nodeset after unlinking it from
12926 * they pseudo parent.
12927 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012928 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012929 *lst = cur;
12930 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012931#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012932 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12933 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12934 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012935 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12936 oldctxt->myDoc, cur);
12937 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012938#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012939 cur->parent = NULL;
12940 cur = cur->next;
12941 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012942 ctxt->myDoc->children->children = NULL;
12943 }
12944 if (ctxt->myDoc != NULL) {
12945 xmlFreeNode(ctxt->myDoc->children);
12946 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012947 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012948 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012949
12950 /*
12951 * Record in the parent context the number of entities replacement
12952 * done when parsing that reference.
12953 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020012954 if (oldctxt != NULL)
12955 oldctxt->nbentities += ctxt->nbentities;
12956
Daniel Veillard0161e632008-08-28 15:36:32 +000012957 /*
12958 * Also record the last error if any
12959 */
12960 if (ctxt->lastError.code != XML_ERR_OK)
12961 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12962
Daniel Veillard328f48c2002-11-15 15:24:34 +000012963 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012964 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012965 ctxt->attsDefault = NULL;
12966 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012967 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012968 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012969 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012970 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012971
Daniel Veillard328f48c2002-11-15 15:24:34 +000012972 return(ret);
12973}
12974
Daniel Veillard29b17482004-08-16 00:39:03 +000012975/**
12976 * xmlParseInNodeContext:
12977 * @node: the context node
12978 * @data: the input string
12979 * @datalen: the input string length in bytes
12980 * @options: a combination of xmlParserOption
12981 * @lst: the return value for the set of parsed nodes
12982 *
12983 * Parse a well-balanced chunk of an XML document
12984 * within the context (DTD, namespaces, etc ...) of the given node.
12985 *
12986 * The allowed sequence for the data is a Well Balanced Chunk defined by
12987 * the content production in the XML grammar:
12988 *
12989 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12990 *
12991 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12992 * error code otherwise
12993 */
12994xmlParserErrors
12995xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12996 int options, xmlNodePtr *lst) {
12997#ifdef SAX2
12998 xmlParserCtxtPtr ctxt;
12999 xmlDocPtr doc = NULL;
13000 xmlNodePtr fake, cur;
13001 int nsnr = 0;
13002
13003 xmlParserErrors ret = XML_ERR_OK;
13004
13005 /*
13006 * check all input parameters, grab the document
13007 */
13008 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13009 return(XML_ERR_INTERNAL_ERROR);
13010 switch (node->type) {
13011 case XML_ELEMENT_NODE:
13012 case XML_ATTRIBUTE_NODE:
13013 case XML_TEXT_NODE:
13014 case XML_CDATA_SECTION_NODE:
13015 case XML_ENTITY_REF_NODE:
13016 case XML_PI_NODE:
13017 case XML_COMMENT_NODE:
13018 case XML_DOCUMENT_NODE:
13019 case XML_HTML_DOCUMENT_NODE:
13020 break;
13021 default:
13022 return(XML_ERR_INTERNAL_ERROR);
13023
13024 }
13025 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13026 (node->type != XML_DOCUMENT_NODE) &&
13027 (node->type != XML_HTML_DOCUMENT_NODE))
13028 node = node->parent;
13029 if (node == NULL)
13030 return(XML_ERR_INTERNAL_ERROR);
13031 if (node->type == XML_ELEMENT_NODE)
13032 doc = node->doc;
13033 else
13034 doc = (xmlDocPtr) node;
13035 if (doc == NULL)
13036 return(XML_ERR_INTERNAL_ERROR);
13037
13038 /*
13039 * allocate a context and set-up everything not related to the
13040 * node position in the tree
13041 */
13042 if (doc->type == XML_DOCUMENT_NODE)
13043 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13044#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013045 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013046 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013047 /*
13048 * When parsing in context, it makes no sense to add implied
13049 * elements like html/body/etc...
13050 */
13051 options |= HTML_PARSE_NOIMPLIED;
13052 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013053#endif
13054 else
13055 return(XML_ERR_INTERNAL_ERROR);
13056
13057 if (ctxt == NULL)
13058 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013059
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013060 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013061 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13062 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13063 * we must wait until the last moment to free the original one.
13064 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013065 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013066 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013067 xmlDictFree(ctxt->dict);
13068 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013069 } else
13070 options |= XML_PARSE_NODICT;
13071
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013072 if (doc->encoding != NULL) {
13073 xmlCharEncodingHandlerPtr hdlr;
13074
13075 if (ctxt->encoding != NULL)
13076 xmlFree((xmlChar *) ctxt->encoding);
13077 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13078
13079 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13080 if (hdlr != NULL) {
13081 xmlSwitchToEncoding(ctxt, hdlr);
13082 } else {
13083 return(XML_ERR_UNSUPPORTED_ENCODING);
13084 }
13085 }
13086
Daniel Veillard37334572008-07-31 08:20:02 +000013087 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013088 xmlDetectSAX2(ctxt);
13089 ctxt->myDoc = doc;
13090
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013091 fake = xmlNewComment(NULL);
13092 if (fake == NULL) {
13093 xmlFreeParserCtxt(ctxt);
13094 return(XML_ERR_NO_MEMORY);
13095 }
13096 xmlAddChild(node, fake);
13097
Daniel Veillard29b17482004-08-16 00:39:03 +000013098 if (node->type == XML_ELEMENT_NODE) {
13099 nodePush(ctxt, node);
13100 /*
13101 * initialize the SAX2 namespaces stack
13102 */
13103 cur = node;
13104 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13105 xmlNsPtr ns = cur->nsDef;
13106 const xmlChar *iprefix, *ihref;
13107
13108 while (ns != NULL) {
13109 if (ctxt->dict) {
13110 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13111 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13112 } else {
13113 iprefix = ns->prefix;
13114 ihref = ns->href;
13115 }
13116
13117 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13118 nsPush(ctxt, iprefix, ihref);
13119 nsnr++;
13120 }
13121 ns = ns->next;
13122 }
13123 cur = cur->parent;
13124 }
13125 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013126 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013127
13128 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13129 /*
13130 * ID/IDREF registration will be done in xmlValidateElement below
13131 */
13132 ctxt->loadsubset |= XML_SKIP_IDS;
13133 }
13134
Daniel Veillard499cc922006-01-18 17:22:35 +000013135#ifdef LIBXML_HTML_ENABLED
13136 if (doc->type == XML_HTML_DOCUMENT_NODE)
13137 __htmlParseContent(ctxt);
13138 else
13139#endif
13140 xmlParseContent(ctxt);
13141
Daniel Veillard29b17482004-08-16 00:39:03 +000013142 nsPop(ctxt, nsnr);
13143 if ((RAW == '<') && (NXT(1) == '/')) {
13144 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13145 } else if (RAW != 0) {
13146 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13147 }
13148 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13149 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13150 ctxt->wellFormed = 0;
13151 }
13152
13153 if (!ctxt->wellFormed) {
13154 if (ctxt->errNo == 0)
13155 ret = XML_ERR_INTERNAL_ERROR;
13156 else
13157 ret = (xmlParserErrors)ctxt->errNo;
13158 } else {
13159 ret = XML_ERR_OK;
13160 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013161
Daniel Veillard29b17482004-08-16 00:39:03 +000013162 /*
13163 * Return the newly created nodeset after unlinking it from
13164 * the pseudo sibling.
13165 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013166
Daniel Veillard29b17482004-08-16 00:39:03 +000013167 cur = fake->next;
13168 fake->next = NULL;
13169 node->last = fake;
13170
13171 if (cur != NULL) {
13172 cur->prev = NULL;
13173 }
13174
13175 *lst = cur;
13176
13177 while (cur != NULL) {
13178 cur->parent = NULL;
13179 cur = cur->next;
13180 }
13181
13182 xmlUnlinkNode(fake);
13183 xmlFreeNode(fake);
13184
13185
13186 if (ret != XML_ERR_OK) {
13187 xmlFreeNodeList(*lst);
13188 *lst = NULL;
13189 }
William M. Brackc3f81342004-10-03 01:22:44 +000013190
William M. Brackb7b54de2004-10-06 16:38:01 +000013191 if (doc->dict != NULL)
13192 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013193 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013194
Daniel Veillard29b17482004-08-16 00:39:03 +000013195 return(ret);
13196#else /* !SAX2 */
13197 return(XML_ERR_INTERNAL_ERROR);
13198#endif
13199}
13200
Daniel Veillard81273902003-09-30 00:43:48 +000013201#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013202/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013203 * xmlParseBalancedChunkMemoryRecover:
13204 * @doc: the document the chunk pertains to
13205 * @sax: the SAX handler bloc (possibly NULL)
13206 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13207 * @depth: Used for loop detection, use 0
13208 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13209 * @lst: the return value for the set of parsed nodes
13210 * @recover: return nodes even if the data is broken (use 0)
13211 *
13212 *
13213 * Parse a well-balanced chunk of an XML document
13214 * called by the parser
13215 * The allowed sequence for the Well Balanced Chunk is the one defined by
13216 * the content production in the XML grammar:
13217 *
13218 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13219 *
13220 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13221 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013222 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013223 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013224 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13225 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013226 */
13227int
13228xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013229 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013230 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013231 xmlParserCtxtPtr ctxt;
13232 xmlDocPtr newDoc;
13233 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013234 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013235 int size;
13236 int ret = 0;
13237
Daniel Veillard0161e632008-08-28 15:36:32 +000013238 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013239 return(XML_ERR_ENTITY_LOOP);
13240 }
13241
13242
Daniel Veillardcda96922001-08-21 10:56:31 +000013243 if (lst != NULL)
13244 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013245 if (string == NULL)
13246 return(-1);
13247
13248 size = xmlStrlen(string);
13249
13250 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13251 if (ctxt == NULL) return(-1);
13252 ctxt->userData = ctxt;
13253 if (sax != NULL) {
13254 oldsax = ctxt->sax;
13255 ctxt->sax = sax;
13256 if (user_data != NULL)
13257 ctxt->userData = user_data;
13258 }
13259 newDoc = xmlNewDoc(BAD_CAST "1.0");
13260 if (newDoc == NULL) {
13261 xmlFreeParserCtxt(ctxt);
13262 return(-1);
13263 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013264 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013265 if ((doc != NULL) && (doc->dict != NULL)) {
13266 xmlDictFree(ctxt->dict);
13267 ctxt->dict = doc->dict;
13268 xmlDictReference(ctxt->dict);
13269 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13270 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13271 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13272 ctxt->dictNames = 1;
13273 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013274 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013275 }
Owen Taylor3473f882001-02-23 17:55:21 +000013276 if (doc != NULL) {
13277 newDoc->intSubset = doc->intSubset;
13278 newDoc->extSubset = doc->extSubset;
13279 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013280 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13281 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013282 if (sax != NULL)
13283 ctxt->sax = oldsax;
13284 xmlFreeParserCtxt(ctxt);
13285 newDoc->intSubset = NULL;
13286 newDoc->extSubset = NULL;
13287 xmlFreeDoc(newDoc);
13288 return(-1);
13289 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013290 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13291 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013292 if (doc == NULL) {
13293 ctxt->myDoc = newDoc;
13294 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013295 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013296 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013297 /* Ensure that doc has XML spec namespace */
13298 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13299 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013300 }
13301 ctxt->instate = XML_PARSER_CONTENT;
13302 ctxt->depth = depth;
13303
13304 /*
13305 * Doing validity checking on chunk doesn't make sense
13306 */
13307 ctxt->validate = 0;
13308 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013309 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013310
Daniel Veillardb39bc392002-10-26 19:29:51 +000013311 if ( doc != NULL ){
13312 content = doc->children;
13313 doc->children = NULL;
13314 xmlParseContent(ctxt);
13315 doc->children = content;
13316 }
13317 else {
13318 xmlParseContent(ctxt);
13319 }
Owen Taylor3473f882001-02-23 17:55:21 +000013320 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013321 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013322 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013323 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013324 }
13325 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013326 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013327 }
13328
13329 if (!ctxt->wellFormed) {
13330 if (ctxt->errNo == 0)
13331 ret = 1;
13332 else
13333 ret = ctxt->errNo;
13334 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013335 ret = 0;
13336 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013337
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013338 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13339 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013340
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013341 /*
13342 * Return the newly created nodeset after unlinking it from
13343 * they pseudo parent.
13344 */
13345 cur = newDoc->children->children;
13346 *lst = cur;
13347 while (cur != NULL) {
13348 xmlSetTreeDoc(cur, doc);
13349 cur->parent = NULL;
13350 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013351 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013352 newDoc->children->children = NULL;
13353 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013354
13355 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013356 ctxt->sax = oldsax;
13357 xmlFreeParserCtxt(ctxt);
13358 newDoc->intSubset = NULL;
13359 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013360 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013361 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013362
Owen Taylor3473f882001-02-23 17:55:21 +000013363 return(ret);
13364}
13365
13366/**
13367 * xmlSAXParseEntity:
13368 * @sax: the SAX handler block
13369 * @filename: the filename
13370 *
13371 * parse an XML external entity out of context and build a tree.
13372 * It use the given SAX function block to handle the parsing callback.
13373 * If sax is NULL, fallback to the default DOM tree building routines.
13374 *
13375 * [78] extParsedEnt ::= TextDecl? content
13376 *
13377 * This correspond to a "Well Balanced" chunk
13378 *
13379 * Returns the resulting document tree
13380 */
13381
13382xmlDocPtr
13383xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13384 xmlDocPtr ret;
13385 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013386
13387 ctxt = xmlCreateFileParserCtxt(filename);
13388 if (ctxt == NULL) {
13389 return(NULL);
13390 }
13391 if (sax != NULL) {
13392 if (ctxt->sax != NULL)
13393 xmlFree(ctxt->sax);
13394 ctxt->sax = sax;
13395 ctxt->userData = NULL;
13396 }
13397
Owen Taylor3473f882001-02-23 17:55:21 +000013398 xmlParseExtParsedEnt(ctxt);
13399
13400 if (ctxt->wellFormed)
13401 ret = ctxt->myDoc;
13402 else {
13403 ret = NULL;
13404 xmlFreeDoc(ctxt->myDoc);
13405 ctxt->myDoc = NULL;
13406 }
13407 if (sax != NULL)
13408 ctxt->sax = NULL;
13409 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013410
Owen Taylor3473f882001-02-23 17:55:21 +000013411 return(ret);
13412}
13413
13414/**
13415 * xmlParseEntity:
13416 * @filename: the filename
13417 *
13418 * parse an XML external entity out of context and build a tree.
13419 *
13420 * [78] extParsedEnt ::= TextDecl? content
13421 *
13422 * This correspond to a "Well Balanced" chunk
13423 *
13424 * Returns the resulting document tree
13425 */
13426
13427xmlDocPtr
13428xmlParseEntity(const char *filename) {
13429 return(xmlSAXParseEntity(NULL, filename));
13430}
Daniel Veillard81273902003-09-30 00:43:48 +000013431#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013432
13433/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013434 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013435 * @URL: the entity URL
13436 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013437 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013438 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013439 *
13440 * Create a parser context for an external entity
13441 * Automatic support for ZLIB/Compress compressed document is provided
13442 * by default if found at compile-time.
13443 *
13444 * Returns the new parser context or NULL
13445 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013446static xmlParserCtxtPtr
13447xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13448 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013449 xmlParserCtxtPtr ctxt;
13450 xmlParserInputPtr inputStream;
13451 char *directory = NULL;
13452 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013453
Owen Taylor3473f882001-02-23 17:55:21 +000013454 ctxt = xmlNewParserCtxt();
13455 if (ctxt == NULL) {
13456 return(NULL);
13457 }
13458
Daniel Veillard48247b42009-07-10 16:12:46 +020013459 if (pctx != NULL) {
13460 ctxt->options = pctx->options;
13461 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013462 }
13463
Owen Taylor3473f882001-02-23 17:55:21 +000013464 uri = xmlBuildURI(URL, base);
13465
13466 if (uri == NULL) {
13467 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13468 if (inputStream == NULL) {
13469 xmlFreeParserCtxt(ctxt);
13470 return(NULL);
13471 }
13472
13473 inputPush(ctxt, inputStream);
13474
13475 if ((ctxt->directory == NULL) && (directory == NULL))
13476 directory = xmlParserGetDirectory((char *)URL);
13477 if ((ctxt->directory == NULL) && (directory != NULL))
13478 ctxt->directory = directory;
13479 } else {
13480 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13481 if (inputStream == NULL) {
13482 xmlFree(uri);
13483 xmlFreeParserCtxt(ctxt);
13484 return(NULL);
13485 }
13486
13487 inputPush(ctxt, inputStream);
13488
13489 if ((ctxt->directory == NULL) && (directory == NULL))
13490 directory = xmlParserGetDirectory((char *)uri);
13491 if ((ctxt->directory == NULL) && (directory != NULL))
13492 ctxt->directory = directory;
13493 xmlFree(uri);
13494 }
Owen Taylor3473f882001-02-23 17:55:21 +000013495 return(ctxt);
13496}
13497
Rob Richards9c0aa472009-03-26 18:10:19 +000013498/**
13499 * xmlCreateEntityParserCtxt:
13500 * @URL: the entity URL
13501 * @ID: the entity PUBLIC ID
13502 * @base: a possible base for the target URI
13503 *
13504 * Create a parser context for an external entity
13505 * Automatic support for ZLIB/Compress compressed document is provided
13506 * by default if found at compile-time.
13507 *
13508 * Returns the new parser context or NULL
13509 */
13510xmlParserCtxtPtr
13511xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13512 const xmlChar *base) {
13513 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13514
13515}
13516
Owen Taylor3473f882001-02-23 17:55:21 +000013517/************************************************************************
13518 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013519 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013520 * *
13521 ************************************************************************/
13522
13523/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013524 * xmlCreateURLParserCtxt:
13525 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013526 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013527 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013528 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013529 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013530 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013531 *
13532 * Returns the new parser context or NULL
13533 */
13534xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013535xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013536{
13537 xmlParserCtxtPtr ctxt;
13538 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013539 char *directory = NULL;
13540
Owen Taylor3473f882001-02-23 17:55:21 +000013541 ctxt = xmlNewParserCtxt();
13542 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013543 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013544 return(NULL);
13545 }
13546
Daniel Veillarddf292f72005-01-16 19:00:15 +000013547 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013548 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013549 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013550
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013551 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013552 if (inputStream == NULL) {
13553 xmlFreeParserCtxt(ctxt);
13554 return(NULL);
13555 }
13556
Owen Taylor3473f882001-02-23 17:55:21 +000013557 inputPush(ctxt, inputStream);
13558 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013559 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013560 if ((ctxt->directory == NULL) && (directory != NULL))
13561 ctxt->directory = directory;
13562
13563 return(ctxt);
13564}
13565
Daniel Veillard61b93382003-11-03 14:28:31 +000013566/**
13567 * xmlCreateFileParserCtxt:
13568 * @filename: the filename
13569 *
13570 * Create a parser context for a file content.
13571 * Automatic support for ZLIB/Compress compressed document is provided
13572 * by default if found at compile-time.
13573 *
13574 * Returns the new parser context or NULL
13575 */
13576xmlParserCtxtPtr
13577xmlCreateFileParserCtxt(const char *filename)
13578{
13579 return(xmlCreateURLParserCtxt(filename, 0));
13580}
13581
Daniel Veillard81273902003-09-30 00:43:48 +000013582#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013583/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013584 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013585 * @sax: the SAX handler block
13586 * @filename: the filename
13587 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13588 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013589 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013590 *
13591 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13592 * compressed document is provided by default if found at compile-time.
13593 * It use the given SAX function block to handle the parsing callback.
13594 * If sax is NULL, fallback to the default DOM tree building routines.
13595 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013596 * User data (void *) is stored within the parser context in the
13597 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013598 *
Owen Taylor3473f882001-02-23 17:55:21 +000013599 * Returns the resulting document tree
13600 */
13601
13602xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013603xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13604 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013605 xmlDocPtr ret;
13606 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013607
Daniel Veillard635ef722001-10-29 11:48:19 +000013608 xmlInitParser();
13609
Owen Taylor3473f882001-02-23 17:55:21 +000013610 ctxt = xmlCreateFileParserCtxt(filename);
13611 if (ctxt == NULL) {
13612 return(NULL);
13613 }
13614 if (sax != NULL) {
13615 if (ctxt->sax != NULL)
13616 xmlFree(ctxt->sax);
13617 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013618 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013619 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013620 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013621 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013622 }
Owen Taylor3473f882001-02-23 17:55:21 +000013623
Daniel Veillard37d2d162008-03-14 10:54:00 +000013624 if (ctxt->directory == NULL)
13625 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013626
Daniel Veillarddad3f682002-11-17 16:47:27 +000013627 ctxt->recovery = recovery;
13628
Owen Taylor3473f882001-02-23 17:55:21 +000013629 xmlParseDocument(ctxt);
13630
William M. Brackc07329e2003-09-08 01:57:30 +000013631 if ((ctxt->wellFormed) || recovery) {
13632 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013633 if (ret != NULL) {
13634 if (ctxt->input->buf->compressed > 0)
13635 ret->compression = 9;
13636 else
13637 ret->compression = ctxt->input->buf->compressed;
13638 }
William M. Brackc07329e2003-09-08 01:57:30 +000013639 }
Owen Taylor3473f882001-02-23 17:55:21 +000013640 else {
13641 ret = NULL;
13642 xmlFreeDoc(ctxt->myDoc);
13643 ctxt->myDoc = NULL;
13644 }
13645 if (sax != NULL)
13646 ctxt->sax = NULL;
13647 xmlFreeParserCtxt(ctxt);
13648
13649 return(ret);
13650}
13651
13652/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013653 * xmlSAXParseFile:
13654 * @sax: the SAX handler block
13655 * @filename: the filename
13656 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13657 * documents
13658 *
13659 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13660 * compressed document is provided by default if found at compile-time.
13661 * It use the given SAX function block to handle the parsing callback.
13662 * If sax is NULL, fallback to the default DOM tree building routines.
13663 *
13664 * Returns the resulting document tree
13665 */
13666
13667xmlDocPtr
13668xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13669 int recovery) {
13670 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13671}
13672
13673/**
Owen Taylor3473f882001-02-23 17:55:21 +000013674 * xmlRecoverDoc:
13675 * @cur: a pointer to an array of xmlChar
13676 *
13677 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013678 * In the case the document is not Well Formed, a attempt to build a
13679 * tree is tried anyway
13680 *
13681 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013682 */
13683
13684xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013685xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013686 return(xmlSAXParseDoc(NULL, cur, 1));
13687}
13688
13689/**
13690 * xmlParseFile:
13691 * @filename: the filename
13692 *
13693 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13694 * compressed document is provided by default if found at compile-time.
13695 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013696 * Returns the resulting document tree if the file was wellformed,
13697 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013698 */
13699
13700xmlDocPtr
13701xmlParseFile(const char *filename) {
13702 return(xmlSAXParseFile(NULL, filename, 0));
13703}
13704
13705/**
13706 * xmlRecoverFile:
13707 * @filename: the filename
13708 *
13709 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13710 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013711 * In the case the document is not Well Formed, it attempts to build
13712 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013713 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013714 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013715 */
13716
13717xmlDocPtr
13718xmlRecoverFile(const char *filename) {
13719 return(xmlSAXParseFile(NULL, filename, 1));
13720}
13721
13722
13723/**
13724 * xmlSetupParserForBuffer:
13725 * @ctxt: an XML parser context
13726 * @buffer: a xmlChar * buffer
13727 * @filename: a file name
13728 *
13729 * Setup the parser context to parse a new buffer; Clears any prior
13730 * contents from the parser context. The buffer parameter must not be
13731 * NULL, but the filename parameter can be
13732 */
13733void
13734xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13735 const char* filename)
13736{
13737 xmlParserInputPtr input;
13738
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013739 if ((ctxt == NULL) || (buffer == NULL))
13740 return;
13741
Owen Taylor3473f882001-02-23 17:55:21 +000013742 input = xmlNewInputStream(ctxt);
13743 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013744 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013745 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013746 return;
13747 }
13748
13749 xmlClearParserCtxt(ctxt);
13750 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013751 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013752 input->base = buffer;
13753 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013754 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013755 inputPush(ctxt, input);
13756}
13757
13758/**
13759 * xmlSAXUserParseFile:
13760 * @sax: a SAX handler
13761 * @user_data: The user data returned on SAX callbacks
13762 * @filename: a file name
13763 *
13764 * parse an XML file and call the given SAX handler routines.
13765 * Automatic support for ZLIB/Compress compressed document is provided
13766 *
13767 * Returns 0 in case of success or a error number otherwise
13768 */
13769int
13770xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13771 const char *filename) {
13772 int ret = 0;
13773 xmlParserCtxtPtr ctxt;
13774
13775 ctxt = xmlCreateFileParserCtxt(filename);
13776 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013777 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013778 xmlFree(ctxt->sax);
13779 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013780 xmlDetectSAX2(ctxt);
13781
Owen Taylor3473f882001-02-23 17:55:21 +000013782 if (user_data != NULL)
13783 ctxt->userData = user_data;
13784
13785 xmlParseDocument(ctxt);
13786
13787 if (ctxt->wellFormed)
13788 ret = 0;
13789 else {
13790 if (ctxt->errNo != 0)
13791 ret = ctxt->errNo;
13792 else
13793 ret = -1;
13794 }
13795 if (sax != NULL)
13796 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013797 if (ctxt->myDoc != NULL) {
13798 xmlFreeDoc(ctxt->myDoc);
13799 ctxt->myDoc = NULL;
13800 }
Owen Taylor3473f882001-02-23 17:55:21 +000013801 xmlFreeParserCtxt(ctxt);
13802
13803 return ret;
13804}
Daniel Veillard81273902003-09-30 00:43:48 +000013805#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013806
13807/************************************************************************
13808 * *
13809 * Front ends when parsing from memory *
13810 * *
13811 ************************************************************************/
13812
13813/**
13814 * xmlCreateMemoryParserCtxt:
13815 * @buffer: a pointer to a char array
13816 * @size: the size of the array
13817 *
13818 * Create a parser context for an XML in-memory document.
13819 *
13820 * Returns the new parser context or NULL
13821 */
13822xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013823xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013824 xmlParserCtxtPtr ctxt;
13825 xmlParserInputPtr input;
13826 xmlParserInputBufferPtr buf;
13827
13828 if (buffer == NULL)
13829 return(NULL);
13830 if (size <= 0)
13831 return(NULL);
13832
13833 ctxt = xmlNewParserCtxt();
13834 if (ctxt == NULL)
13835 return(NULL);
13836
Daniel Veillard53350552003-09-18 13:35:51 +000013837 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013838 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013839 if (buf == NULL) {
13840 xmlFreeParserCtxt(ctxt);
13841 return(NULL);
13842 }
Owen Taylor3473f882001-02-23 17:55:21 +000013843
13844 input = xmlNewInputStream(ctxt);
13845 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013846 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013847 xmlFreeParserCtxt(ctxt);
13848 return(NULL);
13849 }
13850
13851 input->filename = NULL;
13852 input->buf = buf;
13853 input->base = input->buf->buffer->content;
13854 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013855 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013856
13857 inputPush(ctxt, input);
13858 return(ctxt);
13859}
13860
Daniel Veillard81273902003-09-30 00:43:48 +000013861#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013862/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013863 * xmlSAXParseMemoryWithData:
13864 * @sax: the SAX handler block
13865 * @buffer: an pointer to a char array
13866 * @size: the size of the array
13867 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13868 * documents
13869 * @data: the userdata
13870 *
13871 * parse an XML in-memory block and use the given SAX function block
13872 * to handle the parsing callback. If sax is NULL, fallback to the default
13873 * DOM tree building routines.
13874 *
13875 * User data (void *) is stored within the parser context in the
13876 * context's _private member, so it is available nearly everywhere in libxml
13877 *
13878 * Returns the resulting document tree
13879 */
13880
13881xmlDocPtr
13882xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13883 int size, int recovery, void *data) {
13884 xmlDocPtr ret;
13885 xmlParserCtxtPtr ctxt;
13886
Daniel Veillardab2a7632009-07-09 08:45:03 +020013887 xmlInitParser();
13888
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013889 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13890 if (ctxt == NULL) return(NULL);
13891 if (sax != NULL) {
13892 if (ctxt->sax != NULL)
13893 xmlFree(ctxt->sax);
13894 ctxt->sax = sax;
13895 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013896 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013897 if (data!=NULL) {
13898 ctxt->_private=data;
13899 }
13900
Daniel Veillardadba5f12003-04-04 16:09:01 +000013901 ctxt->recovery = recovery;
13902
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013903 xmlParseDocument(ctxt);
13904
13905 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13906 else {
13907 ret = NULL;
13908 xmlFreeDoc(ctxt->myDoc);
13909 ctxt->myDoc = NULL;
13910 }
13911 if (sax != NULL)
13912 ctxt->sax = NULL;
13913 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013914
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013915 return(ret);
13916}
13917
13918/**
Owen Taylor3473f882001-02-23 17:55:21 +000013919 * xmlSAXParseMemory:
13920 * @sax: the SAX handler block
13921 * @buffer: an pointer to a char array
13922 * @size: the size of the array
13923 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13924 * documents
13925 *
13926 * parse an XML in-memory block and use the given SAX function block
13927 * to handle the parsing callback. If sax is NULL, fallback to the default
13928 * DOM tree building routines.
13929 *
13930 * Returns the resulting document tree
13931 */
13932xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013933xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13934 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013935 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013936}
13937
13938/**
13939 * xmlParseMemory:
13940 * @buffer: an pointer to a char array
13941 * @size: the size of the array
13942 *
13943 * parse an XML in-memory block and build a tree.
13944 *
13945 * Returns the resulting document tree
13946 */
13947
Daniel Veillard50822cb2001-07-26 20:05:51 +000013948xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013949 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13950}
13951
13952/**
13953 * xmlRecoverMemory:
13954 * @buffer: an pointer to a char array
13955 * @size: the size of the array
13956 *
13957 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013958 * In the case the document is not Well Formed, an attempt to
13959 * build a tree is tried anyway
13960 *
13961 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013962 */
13963
Daniel Veillard50822cb2001-07-26 20:05:51 +000013964xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013965 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13966}
13967
13968/**
13969 * xmlSAXUserParseMemory:
13970 * @sax: a SAX handler
13971 * @user_data: The user data returned on SAX callbacks
13972 * @buffer: an in-memory XML document input
13973 * @size: the length of the XML document in bytes
13974 *
13975 * A better SAX parsing routine.
13976 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013977 *
Owen Taylor3473f882001-02-23 17:55:21 +000013978 * Returns 0 in case of success or a error number otherwise
13979 */
13980int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013981 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013982 int ret = 0;
13983 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013984
13985 xmlInitParser();
13986
Owen Taylor3473f882001-02-23 17:55:21 +000013987 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13988 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013989 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13990 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013991 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013992 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013993
Daniel Veillard30211a02001-04-26 09:33:18 +000013994 if (user_data != NULL)
13995 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013996
Owen Taylor3473f882001-02-23 17:55:21 +000013997 xmlParseDocument(ctxt);
13998
13999 if (ctxt->wellFormed)
14000 ret = 0;
14001 else {
14002 if (ctxt->errNo != 0)
14003 ret = ctxt->errNo;
14004 else
14005 ret = -1;
14006 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014007 if (sax != NULL)
14008 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014009 if (ctxt->myDoc != NULL) {
14010 xmlFreeDoc(ctxt->myDoc);
14011 ctxt->myDoc = NULL;
14012 }
Owen Taylor3473f882001-02-23 17:55:21 +000014013 xmlFreeParserCtxt(ctxt);
14014
14015 return ret;
14016}
Daniel Veillard81273902003-09-30 00:43:48 +000014017#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014018
14019/**
14020 * xmlCreateDocParserCtxt:
14021 * @cur: a pointer to an array of xmlChar
14022 *
14023 * Creates a parser context for an XML in-memory document.
14024 *
14025 * Returns the new parser context or NULL
14026 */
14027xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014028xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014029 int len;
14030
14031 if (cur == NULL)
14032 return(NULL);
14033 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014034 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014035}
14036
Daniel Veillard81273902003-09-30 00:43:48 +000014037#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014038/**
14039 * xmlSAXParseDoc:
14040 * @sax: the SAX handler block
14041 * @cur: a pointer to an array of xmlChar
14042 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14043 * documents
14044 *
14045 * parse an XML in-memory document and build a tree.
14046 * It use the given SAX function block to handle the parsing callback.
14047 * If sax is NULL, fallback to the default DOM tree building routines.
14048 *
14049 * Returns the resulting document tree
14050 */
14051
14052xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014053xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014054 xmlDocPtr ret;
14055 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014056 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014057
Daniel Veillard38936062004-11-04 17:45:11 +000014058 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014059
14060
14061 ctxt = xmlCreateDocParserCtxt(cur);
14062 if (ctxt == NULL) return(NULL);
14063 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014064 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014065 ctxt->sax = sax;
14066 ctxt->userData = NULL;
14067 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014068 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014069
14070 xmlParseDocument(ctxt);
14071 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14072 else {
14073 ret = NULL;
14074 xmlFreeDoc(ctxt->myDoc);
14075 ctxt->myDoc = NULL;
14076 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014077 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014078 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014079 xmlFreeParserCtxt(ctxt);
14080
14081 return(ret);
14082}
14083
14084/**
14085 * xmlParseDoc:
14086 * @cur: a pointer to an array of xmlChar
14087 *
14088 * parse an XML in-memory document and build a tree.
14089 *
14090 * Returns the resulting document tree
14091 */
14092
14093xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014094xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014095 return(xmlSAXParseDoc(NULL, cur, 0));
14096}
Daniel Veillard81273902003-09-30 00:43:48 +000014097#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014098
Daniel Veillard81273902003-09-30 00:43:48 +000014099#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014100/************************************************************************
14101 * *
14102 * Specific function to keep track of entities references *
14103 * and used by the XSLT debugger *
14104 * *
14105 ************************************************************************/
14106
14107static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14108
14109/**
14110 * xmlAddEntityReference:
14111 * @ent : A valid entity
14112 * @firstNode : A valid first node for children of entity
14113 * @lastNode : A valid last node of children entity
14114 *
14115 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14116 */
14117static void
14118xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14119 xmlNodePtr lastNode)
14120{
14121 if (xmlEntityRefFunc != NULL) {
14122 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14123 }
14124}
14125
14126
14127/**
14128 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014129 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014130 *
14131 * Set the function to call call back when a xml reference has been made
14132 */
14133void
14134xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14135{
14136 xmlEntityRefFunc = func;
14137}
Daniel Veillard81273902003-09-30 00:43:48 +000014138#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014139
14140/************************************************************************
14141 * *
14142 * Miscellaneous *
14143 * *
14144 ************************************************************************/
14145
14146#ifdef LIBXML_XPATH_ENABLED
14147#include <libxml/xpath.h>
14148#endif
14149
Daniel Veillardffa3c742005-07-21 13:24:09 +000014150extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014151static int xmlParserInitialized = 0;
14152
14153/**
14154 * xmlInitParser:
14155 *
14156 * Initialization function for the XML parser.
14157 * This is not reentrant. Call once before processing in case of
14158 * use in multithreaded programs.
14159 */
14160
14161void
14162xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014163 if (xmlParserInitialized != 0)
14164 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014165
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014166#ifdef LIBXML_THREAD_ENABLED
14167 __xmlGlobalInitMutexLock();
14168 if (xmlParserInitialized == 0) {
14169#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014170 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014171 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014172 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14173 (xmlGenericError == NULL))
14174 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014175 xmlInitMemory();
14176 xmlInitCharEncodingHandlers();
14177 xmlDefaultSAXHandlerInit();
14178 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014179#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014180 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014181#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014182#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014183 htmlInitAutoClose();
14184 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014185#endif
14186#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014187 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014188#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014189 xmlParserInitialized = 1;
14190#ifdef LIBXML_THREAD_ENABLED
14191 }
14192 __xmlGlobalInitMutexUnlock();
14193#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014194}
14195
14196/**
14197 * xmlCleanupParser:
14198 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014199 * This function name is somewhat misleading. It does not clean up
14200 * parser state, it cleans up memory allocated by the library itself.
14201 * It is a cleanup function for the XML library. It tries to reclaim all
14202 * related global memory allocated for the library processing.
14203 * It doesn't deallocate any document related memory. One should
14204 * call xmlCleanupParser() only when the process has finished using
14205 * the library and all XML/HTML documents built with it.
14206 * See also xmlInitParser() which has the opposite function of preparing
14207 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014208 *
14209 * WARNING: if your application is multithreaded or has plugin support
14210 * calling this may crash the application if another thread or
14211 * a plugin is still using libxml2. It's sometimes very hard to
14212 * guess if libxml2 is in use in the application, some libraries
14213 * or plugins may use it without notice. In case of doubt abstain
14214 * from calling this function or do it just before calling exit()
14215 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014216 */
14217
14218void
14219xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014220 if (!xmlParserInitialized)
14221 return;
14222
Owen Taylor3473f882001-02-23 17:55:21 +000014223 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014224#ifdef LIBXML_CATALOG_ENABLED
14225 xmlCatalogCleanup();
14226#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014227 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014228 xmlCleanupInputCallbacks();
14229#ifdef LIBXML_OUTPUT_ENABLED
14230 xmlCleanupOutputCallbacks();
14231#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014232#ifdef LIBXML_SCHEMAS_ENABLED
14233 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014234 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014235#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014236 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014237 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014238 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014239 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014240 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014241}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014242
14243/************************************************************************
14244 * *
14245 * New set (2.6.0) of simpler and more flexible APIs *
14246 * *
14247 ************************************************************************/
14248
14249/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014250 * DICT_FREE:
14251 * @str: a string
14252 *
14253 * Free a string if it is not owned by the "dict" dictionnary in the
14254 * current scope
14255 */
14256#define DICT_FREE(str) \
14257 if ((str) && ((!dict) || \
14258 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14259 xmlFree((char *)(str));
14260
14261/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014262 * xmlCtxtReset:
14263 * @ctxt: an XML parser context
14264 *
14265 * Reset a parser context
14266 */
14267void
14268xmlCtxtReset(xmlParserCtxtPtr ctxt)
14269{
14270 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014271 xmlDictPtr dict;
14272
14273 if (ctxt == NULL)
14274 return;
14275
14276 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014277
14278 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14279 xmlFreeInputStream(input);
14280 }
14281 ctxt->inputNr = 0;
14282 ctxt->input = NULL;
14283
14284 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014285 if (ctxt->spaceTab != NULL) {
14286 ctxt->spaceTab[0] = -1;
14287 ctxt->space = &ctxt->spaceTab[0];
14288 } else {
14289 ctxt->space = NULL;
14290 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014291
14292
14293 ctxt->nodeNr = 0;
14294 ctxt->node = NULL;
14295
14296 ctxt->nameNr = 0;
14297 ctxt->name = NULL;
14298
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014299 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014300 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014301 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014302 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014303 DICT_FREE(ctxt->directory);
14304 ctxt->directory = NULL;
14305 DICT_FREE(ctxt->extSubURI);
14306 ctxt->extSubURI = NULL;
14307 DICT_FREE(ctxt->extSubSystem);
14308 ctxt->extSubSystem = NULL;
14309 if (ctxt->myDoc != NULL)
14310 xmlFreeDoc(ctxt->myDoc);
14311 ctxt->myDoc = NULL;
14312
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014313 ctxt->standalone = -1;
14314 ctxt->hasExternalSubset = 0;
14315 ctxt->hasPErefs = 0;
14316 ctxt->html = 0;
14317 ctxt->external = 0;
14318 ctxt->instate = XML_PARSER_START;
14319 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014320
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014321 ctxt->wellFormed = 1;
14322 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014323 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014324 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014325#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014326 ctxt->vctxt.userData = ctxt;
14327 ctxt->vctxt.error = xmlParserValidityError;
14328 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014329#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014330 ctxt->record_info = 0;
14331 ctxt->nbChars = 0;
14332 ctxt->checkIndex = 0;
14333 ctxt->inSubset = 0;
14334 ctxt->errNo = XML_ERR_OK;
14335 ctxt->depth = 0;
14336 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14337 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014338 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014339 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014340 xmlInitNodeInfoSeq(&ctxt->node_seq);
14341
14342 if (ctxt->attsDefault != NULL) {
14343 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14344 ctxt->attsDefault = NULL;
14345 }
14346 if (ctxt->attsSpecial != NULL) {
14347 xmlHashFree(ctxt->attsSpecial, NULL);
14348 ctxt->attsSpecial = NULL;
14349 }
14350
Daniel Veillard4432df22003-09-28 18:58:27 +000014351#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014352 if (ctxt->catalogs != NULL)
14353 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014354#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014355 if (ctxt->lastError.code != XML_ERR_OK)
14356 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014357}
14358
14359/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014360 * xmlCtxtResetPush:
14361 * @ctxt: an XML parser context
14362 * @chunk: a pointer to an array of chars
14363 * @size: number of chars in the array
14364 * @filename: an optional file name or URI
14365 * @encoding: the document encoding, or NULL
14366 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014367 * Reset a push parser context
14368 *
14369 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014370 */
14371int
14372xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14373 int size, const char *filename, const char *encoding)
14374{
14375 xmlParserInputPtr inputStream;
14376 xmlParserInputBufferPtr buf;
14377 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14378
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014379 if (ctxt == NULL)
14380 return(1);
14381
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014382 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14383 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14384
14385 buf = xmlAllocParserInputBuffer(enc);
14386 if (buf == NULL)
14387 return(1);
14388
14389 if (ctxt == NULL) {
14390 xmlFreeParserInputBuffer(buf);
14391 return(1);
14392 }
14393
14394 xmlCtxtReset(ctxt);
14395
14396 if (ctxt->pushTab == NULL) {
14397 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14398 sizeof(xmlChar *));
14399 if (ctxt->pushTab == NULL) {
14400 xmlErrMemory(ctxt, NULL);
14401 xmlFreeParserInputBuffer(buf);
14402 return(1);
14403 }
14404 }
14405
14406 if (filename == NULL) {
14407 ctxt->directory = NULL;
14408 } else {
14409 ctxt->directory = xmlParserGetDirectory(filename);
14410 }
14411
14412 inputStream = xmlNewInputStream(ctxt);
14413 if (inputStream == NULL) {
14414 xmlFreeParserInputBuffer(buf);
14415 return(1);
14416 }
14417
14418 if (filename == NULL)
14419 inputStream->filename = NULL;
14420 else
14421 inputStream->filename = (char *)
14422 xmlCanonicPath((const xmlChar *) filename);
14423 inputStream->buf = buf;
14424 inputStream->base = inputStream->buf->buffer->content;
14425 inputStream->cur = inputStream->buf->buffer->content;
14426 inputStream->end =
14427 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14428
14429 inputPush(ctxt, inputStream);
14430
14431 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14432 (ctxt->input->buf != NULL)) {
14433 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14434 int cur = ctxt->input->cur - ctxt->input->base;
14435
14436 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14437
14438 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14439 ctxt->input->cur = ctxt->input->base + cur;
14440 ctxt->input->end =
14441 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14442 use];
14443#ifdef DEBUG_PUSH
14444 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14445#endif
14446 }
14447
14448 if (encoding != NULL) {
14449 xmlCharEncodingHandlerPtr hdlr;
14450
Daniel Veillard37334572008-07-31 08:20:02 +000014451 if (ctxt->encoding != NULL)
14452 xmlFree((xmlChar *) ctxt->encoding);
14453 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14454
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014455 hdlr = xmlFindCharEncodingHandler(encoding);
14456 if (hdlr != NULL) {
14457 xmlSwitchToEncoding(ctxt, hdlr);
14458 } else {
14459 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14460 "Unsupported encoding %s\n", BAD_CAST encoding);
14461 }
14462 } else if (enc != XML_CHAR_ENCODING_NONE) {
14463 xmlSwitchEncoding(ctxt, enc);
14464 }
14465
14466 return(0);
14467}
14468
Daniel Veillard37334572008-07-31 08:20:02 +000014469
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014470/**
Daniel Veillard37334572008-07-31 08:20:02 +000014471 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014472 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014473 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014474 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014475 *
14476 * Applies the options to the parser context
14477 *
14478 * Returns 0 in case of success, the set of unknown or unimplemented options
14479 * in case of error.
14480 */
Daniel Veillard37334572008-07-31 08:20:02 +000014481static int
14482xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014483{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014484 if (ctxt == NULL)
14485 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014486 if (encoding != NULL) {
14487 if (ctxt->encoding != NULL)
14488 xmlFree((xmlChar *) ctxt->encoding);
14489 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14490 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014491 if (options & XML_PARSE_RECOVER) {
14492 ctxt->recovery = 1;
14493 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014494 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014495 } else
14496 ctxt->recovery = 0;
14497 if (options & XML_PARSE_DTDLOAD) {
14498 ctxt->loadsubset = XML_DETECT_IDS;
14499 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014500 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014501 } else
14502 ctxt->loadsubset = 0;
14503 if (options & XML_PARSE_DTDATTR) {
14504 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14505 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014506 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014507 }
14508 if (options & XML_PARSE_NOENT) {
14509 ctxt->replaceEntities = 1;
14510 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14511 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014512 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014513 } else
14514 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014515 if (options & XML_PARSE_PEDANTIC) {
14516 ctxt->pedantic = 1;
14517 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014518 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014519 } else
14520 ctxt->pedantic = 0;
14521 if (options & XML_PARSE_NOBLANKS) {
14522 ctxt->keepBlanks = 0;
14523 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14524 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014525 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014526 } else
14527 ctxt->keepBlanks = 1;
14528 if (options & XML_PARSE_DTDVALID) {
14529 ctxt->validate = 1;
14530 if (options & XML_PARSE_NOWARNING)
14531 ctxt->vctxt.warning = NULL;
14532 if (options & XML_PARSE_NOERROR)
14533 ctxt->vctxt.error = NULL;
14534 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014535 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014536 } else
14537 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014538 if (options & XML_PARSE_NOWARNING) {
14539 ctxt->sax->warning = NULL;
14540 options -= XML_PARSE_NOWARNING;
14541 }
14542 if (options & XML_PARSE_NOERROR) {
14543 ctxt->sax->error = NULL;
14544 ctxt->sax->fatalError = NULL;
14545 options -= XML_PARSE_NOERROR;
14546 }
Daniel Veillard81273902003-09-30 00:43:48 +000014547#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014548 if (options & XML_PARSE_SAX1) {
14549 ctxt->sax->startElement = xmlSAX2StartElement;
14550 ctxt->sax->endElement = xmlSAX2EndElement;
14551 ctxt->sax->startElementNs = NULL;
14552 ctxt->sax->endElementNs = NULL;
14553 ctxt->sax->initialized = 1;
14554 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014555 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014556 }
Daniel Veillard81273902003-09-30 00:43:48 +000014557#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014558 if (options & XML_PARSE_NODICT) {
14559 ctxt->dictNames = 0;
14560 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014561 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014562 } else {
14563 ctxt->dictNames = 1;
14564 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014565 if (options & XML_PARSE_NOCDATA) {
14566 ctxt->sax->cdataBlock = NULL;
14567 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014568 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014569 }
14570 if (options & XML_PARSE_NSCLEAN) {
14571 ctxt->options |= XML_PARSE_NSCLEAN;
14572 options -= XML_PARSE_NSCLEAN;
14573 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014574 if (options & XML_PARSE_NONET) {
14575 ctxt->options |= XML_PARSE_NONET;
14576 options -= XML_PARSE_NONET;
14577 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014578 if (options & XML_PARSE_COMPACT) {
14579 ctxt->options |= XML_PARSE_COMPACT;
14580 options -= XML_PARSE_COMPACT;
14581 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014582 if (options & XML_PARSE_OLD10) {
14583 ctxt->options |= XML_PARSE_OLD10;
14584 options -= XML_PARSE_OLD10;
14585 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014586 if (options & XML_PARSE_NOBASEFIX) {
14587 ctxt->options |= XML_PARSE_NOBASEFIX;
14588 options -= XML_PARSE_NOBASEFIX;
14589 }
14590 if (options & XML_PARSE_HUGE) {
14591 ctxt->options |= XML_PARSE_HUGE;
14592 options -= XML_PARSE_HUGE;
14593 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014594 if (options & XML_PARSE_OLDSAX) {
14595 ctxt->options |= XML_PARSE_OLDSAX;
14596 options -= XML_PARSE_OLDSAX;
14597 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080014598 if (options & XML_PARSE_IGNORE_ENC) {
14599 ctxt->options |= XML_PARSE_IGNORE_ENC;
14600 options -= XML_PARSE_IGNORE_ENC;
14601 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014602 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014603 return (options);
14604}
14605
14606/**
Daniel Veillard37334572008-07-31 08:20:02 +000014607 * xmlCtxtUseOptions:
14608 * @ctxt: an XML parser context
14609 * @options: a combination of xmlParserOption
14610 *
14611 * Applies the options to the parser context
14612 *
14613 * Returns 0 in case of success, the set of unknown or unimplemented options
14614 * in case of error.
14615 */
14616int
14617xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14618{
14619 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14620}
14621
14622/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014623 * xmlDoRead:
14624 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014625 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014626 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014627 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014628 * @reuse: keep the context for reuse
14629 *
14630 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014631 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014632 * Returns the resulting document tree or NULL
14633 */
14634static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014635xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14636 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014637{
14638 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014639
14640 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014641 if (encoding != NULL) {
14642 xmlCharEncodingHandlerPtr hdlr;
14643
14644 hdlr = xmlFindCharEncodingHandler(encoding);
14645 if (hdlr != NULL)
14646 xmlSwitchToEncoding(ctxt, hdlr);
14647 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014648 if ((URL != NULL) && (ctxt->input != NULL) &&
14649 (ctxt->input->filename == NULL))
14650 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014651 xmlParseDocument(ctxt);
14652 if ((ctxt->wellFormed) || ctxt->recovery)
14653 ret = ctxt->myDoc;
14654 else {
14655 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014656 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014657 xmlFreeDoc(ctxt->myDoc);
14658 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014659 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014660 ctxt->myDoc = NULL;
14661 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014662 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014663 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014664
14665 return (ret);
14666}
14667
14668/**
14669 * xmlReadDoc:
14670 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014671 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014672 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014673 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014674 *
14675 * parse an XML in-memory document and build a tree.
14676 *
14677 * Returns the resulting document tree
14678 */
14679xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014680xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014681{
14682 xmlParserCtxtPtr ctxt;
14683
14684 if (cur == NULL)
14685 return (NULL);
14686
14687 ctxt = xmlCreateDocParserCtxt(cur);
14688 if (ctxt == NULL)
14689 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014690 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014691}
14692
14693/**
14694 * xmlReadFile:
14695 * @filename: a file or URL
14696 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014697 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014698 *
14699 * parse an XML file from the filesystem or the network.
14700 *
14701 * Returns the resulting document tree
14702 */
14703xmlDocPtr
14704xmlReadFile(const char *filename, const char *encoding, int options)
14705{
14706 xmlParserCtxtPtr ctxt;
14707
Daniel Veillard61b93382003-11-03 14:28:31 +000014708 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014709 if (ctxt == NULL)
14710 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014711 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014712}
14713
14714/**
14715 * xmlReadMemory:
14716 * @buffer: a pointer to a char array
14717 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014718 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014719 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014720 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014721 *
14722 * parse an XML in-memory document and build a tree.
14723 *
14724 * Returns the resulting document tree
14725 */
14726xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014727xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014728{
14729 xmlParserCtxtPtr ctxt;
14730
14731 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14732 if (ctxt == NULL)
14733 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014734 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014735}
14736
14737/**
14738 * xmlReadFd:
14739 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014740 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014741 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014742 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014743 *
14744 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014745 * NOTE that the file descriptor will not be closed when the
14746 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014747 *
14748 * Returns the resulting document tree
14749 */
14750xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014751xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014752{
14753 xmlParserCtxtPtr ctxt;
14754 xmlParserInputBufferPtr input;
14755 xmlParserInputPtr stream;
14756
14757 if (fd < 0)
14758 return (NULL);
14759
14760 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14761 if (input == NULL)
14762 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014763 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014764 ctxt = xmlNewParserCtxt();
14765 if (ctxt == NULL) {
14766 xmlFreeParserInputBuffer(input);
14767 return (NULL);
14768 }
14769 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14770 if (stream == NULL) {
14771 xmlFreeParserInputBuffer(input);
14772 xmlFreeParserCtxt(ctxt);
14773 return (NULL);
14774 }
14775 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014776 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014777}
14778
14779/**
14780 * xmlReadIO:
14781 * @ioread: an I/O read function
14782 * @ioclose: an I/O close function
14783 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014784 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014785 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014786 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014787 *
14788 * parse an XML document from I/O functions and source and build a tree.
14789 *
14790 * Returns the resulting document tree
14791 */
14792xmlDocPtr
14793xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014794 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014795{
14796 xmlParserCtxtPtr ctxt;
14797 xmlParserInputBufferPtr input;
14798 xmlParserInputPtr stream;
14799
14800 if (ioread == NULL)
14801 return (NULL);
14802
14803 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14804 XML_CHAR_ENCODING_NONE);
14805 if (input == NULL)
14806 return (NULL);
14807 ctxt = xmlNewParserCtxt();
14808 if (ctxt == NULL) {
14809 xmlFreeParserInputBuffer(input);
14810 return (NULL);
14811 }
14812 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14813 if (stream == NULL) {
14814 xmlFreeParserInputBuffer(input);
14815 xmlFreeParserCtxt(ctxt);
14816 return (NULL);
14817 }
14818 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014819 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014820}
14821
14822/**
14823 * xmlCtxtReadDoc:
14824 * @ctxt: an XML parser context
14825 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014826 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014827 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014828 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014829 *
14830 * parse an XML in-memory document and build a tree.
14831 * This reuses the existing @ctxt parser context
14832 *
14833 * Returns the resulting document tree
14834 */
14835xmlDocPtr
14836xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014837 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014838{
14839 xmlParserInputPtr stream;
14840
14841 if (cur == NULL)
14842 return (NULL);
14843 if (ctxt == NULL)
14844 return (NULL);
14845
14846 xmlCtxtReset(ctxt);
14847
14848 stream = xmlNewStringInputStream(ctxt, cur);
14849 if (stream == NULL) {
14850 return (NULL);
14851 }
14852 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014853 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014854}
14855
14856/**
14857 * xmlCtxtReadFile:
14858 * @ctxt: an XML parser context
14859 * @filename: a file or URL
14860 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014861 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014862 *
14863 * parse an XML file from the filesystem or the network.
14864 * This reuses the existing @ctxt parser context
14865 *
14866 * Returns the resulting document tree
14867 */
14868xmlDocPtr
14869xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14870 const char *encoding, int options)
14871{
14872 xmlParserInputPtr stream;
14873
14874 if (filename == NULL)
14875 return (NULL);
14876 if (ctxt == NULL)
14877 return (NULL);
14878
14879 xmlCtxtReset(ctxt);
14880
Daniel Veillard29614c72004-11-26 10:47:26 +000014881 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014882 if (stream == NULL) {
14883 return (NULL);
14884 }
14885 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014886 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014887}
14888
14889/**
14890 * xmlCtxtReadMemory:
14891 * @ctxt: an XML parser context
14892 * @buffer: a pointer to a char array
14893 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014894 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014895 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014896 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014897 *
14898 * parse an XML in-memory document and build a tree.
14899 * This reuses the existing @ctxt parser context
14900 *
14901 * Returns the resulting document tree
14902 */
14903xmlDocPtr
14904xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014905 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014906{
14907 xmlParserInputBufferPtr input;
14908 xmlParserInputPtr stream;
14909
14910 if (ctxt == NULL)
14911 return (NULL);
14912 if (buffer == NULL)
14913 return (NULL);
14914
14915 xmlCtxtReset(ctxt);
14916
14917 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14918 if (input == NULL) {
14919 return(NULL);
14920 }
14921
14922 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14923 if (stream == NULL) {
14924 xmlFreeParserInputBuffer(input);
14925 return(NULL);
14926 }
14927
14928 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014929 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014930}
14931
14932/**
14933 * xmlCtxtReadFd:
14934 * @ctxt: an XML parser context
14935 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014936 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014937 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014938 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014939 *
14940 * parse an XML from a file descriptor and build a tree.
14941 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014942 * NOTE that the file descriptor will not be closed when the
14943 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014944 *
14945 * Returns the resulting document tree
14946 */
14947xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014948xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14949 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014950{
14951 xmlParserInputBufferPtr input;
14952 xmlParserInputPtr stream;
14953
14954 if (fd < 0)
14955 return (NULL);
14956 if (ctxt == NULL)
14957 return (NULL);
14958
14959 xmlCtxtReset(ctxt);
14960
14961
14962 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14963 if (input == NULL)
14964 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014965 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014966 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14967 if (stream == NULL) {
14968 xmlFreeParserInputBuffer(input);
14969 return (NULL);
14970 }
14971 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014972 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014973}
14974
14975/**
14976 * xmlCtxtReadIO:
14977 * @ctxt: an XML parser context
14978 * @ioread: an I/O read function
14979 * @ioclose: an I/O close function
14980 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014981 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014982 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014983 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014984 *
14985 * parse an XML document from I/O functions and source and build a tree.
14986 * This reuses the existing @ctxt parser context
14987 *
14988 * Returns the resulting document tree
14989 */
14990xmlDocPtr
14991xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14992 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014993 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014994 const char *encoding, int options)
14995{
14996 xmlParserInputBufferPtr input;
14997 xmlParserInputPtr stream;
14998
14999 if (ioread == NULL)
15000 return (NULL);
15001 if (ctxt == NULL)
15002 return (NULL);
15003
15004 xmlCtxtReset(ctxt);
15005
15006 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15007 XML_CHAR_ENCODING_NONE);
15008 if (input == NULL)
15009 return (NULL);
15010 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15011 if (stream == NULL) {
15012 xmlFreeParserInputBuffer(input);
15013 return (NULL);
15014 }
15015 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015016 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015017}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015018
15019#define bottom_parser
15020#include "elfgcchack.h"