blob: b39aa59c61a58ee1bd08619eedab2f7bf98bb7a8 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020082#ifdef HAVE_LZMA_H
83#include <lzma.h>
84#endif
Owen Taylor3473f882001-02-23 17:55:21 +000085
Daniel Veillard0161e632008-08-28 15:36:32 +000086static void
87xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
88
Rob Richards9c0aa472009-03-26 18:10:19 +000089static xmlParserCtxtPtr
90xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
91 const xmlChar *base, xmlParserCtxtPtr pctx);
92
Daniel Veillard0161e632008-08-28 15:36:32 +000093/************************************************************************
94 * *
95 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
96 * *
97 ************************************************************************/
98
99#define XML_PARSER_BIG_ENTITY 1000
100#define XML_PARSER_LOT_ENTITY 5000
101
102/*
103 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
104 * replacement over the size in byte of the input indicates that you have
105 * and eponential behaviour. A value of 10 correspond to at least 3 entity
106 * replacement per byte of input.
107 */
108#define XML_PARSER_NON_LINEAR 10
109
110/*
111 * xmlParserEntityCheck
112 *
113 * Function to check non-linear entity expansion behaviour
114 * This is here to detect and stop exponential linear entity expansion
115 * This is not a limitation of the parser but a safety
116 * boundary feature. It can be disabled with the XML_PARSE_HUGE
117 * parser option.
118 */
119static int
120xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
121 xmlEntityPtr ent)
122{
Daniel Veillardcba68392008-08-29 12:43:40 +0000123 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000124
125 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
126 return (0);
127 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
128 return (1);
129 if (size != 0) {
130 /*
131 * Do the check based on the replacement size of the entity
132 */
133 if (size < XML_PARSER_BIG_ENTITY)
134 return(0);
135
136 /*
137 * A limit on the amount of text data reasonably used
138 */
139 if (ctxt->input != NULL) {
140 consumed = ctxt->input->consumed +
141 (ctxt->input->cur - ctxt->input->base);
142 }
143 consumed += ctxt->sizeentities;
144
145 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
146 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
147 return (0);
148 } else if (ent != NULL) {
149 /*
150 * use the number of parsed entities in the replacement
151 */
152 size = ent->checked;
153
154 /*
155 * The amount of data parsed counting entities size only once
156 */
157 if (ctxt->input != NULL) {
158 consumed = ctxt->input->consumed +
159 (ctxt->input->cur - ctxt->input->base);
160 }
161 consumed += ctxt->sizeentities;
162
163 /*
164 * Check the density of entities for the amount of data
165 * knowing an entity reference will take at least 3 bytes
166 */
167 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
168 return (0);
169 } else {
170 /*
171 * strange we got no data for checking just return
172 */
173 return (0);
174 }
175
176 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
177 return (1);
178}
179
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000180/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000181 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000182 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000183 * arbitrary depth limit for the XML documents that we allow to
184 * process. This is not a limitation of the parser but a safety
185 * boundary feature. It can be disabled with the XML_PARSE_HUGE
186 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000187 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000188unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000189
Daniel Veillard0fb18932003-09-07 09:14:37 +0000190
Daniel Veillard0161e632008-08-28 15:36:32 +0000191
192#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000193#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000194#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000195#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
196
Owen Taylor3473f882001-02-23 17:55:21 +0000197/*
Owen Taylor3473f882001-02-23 17:55:21 +0000198 * List of XML prefixed PI allowed by W3C specs
199 */
200
Daniel Veillardb44025c2001-10-11 22:55:55 +0000201static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000202 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800203 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000204 NULL
205};
206
Daniel Veillarda07050d2003-10-19 14:46:32 +0000207
Owen Taylor3473f882001-02-23 17:55:21 +0000208/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200209static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
210 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000211
Daniel Veillard7d515752003-09-26 19:12:37 +0000212static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000213xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
214 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000215 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000216 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000217
Daniel Veillard37334572008-07-31 08:20:02 +0000218static int
219xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
220 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000222static void
223xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
224 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000225#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000226
Daniel Veillard7d515752003-09-26 19:12:37 +0000227static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000228xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
229 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000231static int
232xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
233
Daniel Veillarde57ec792003-09-10 10:50:59 +0000234/************************************************************************
235 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000236 * Some factorized error routines *
237 * *
238 ************************************************************************/
239
240/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 * xmlErrAttributeDup:
242 * @ctxt: an XML parser context
243 * @prefix: the attribute prefix
244 * @localname: the attribute localname
245 *
246 * Handle a redefinition of attribute error
247 */
248static void
249xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
250 const xmlChar * localname)
251{
Daniel Veillard157fee02003-10-31 10:36:03 +0000252 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253 (ctxt->instate == XML_PARSER_EOF))
254 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000255 if (ctxt != NULL)
256 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200257
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000258 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200260 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000261 (const char *) localname, NULL, NULL, 0, 0,
262 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000263 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000264 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200265 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 (const char *) prefix, (const char *) localname,
267 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
268 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000269 if (ctxt != NULL) {
270 ctxt->wellFormed = 0;
271 if (ctxt->recovery == 0)
272 ctxt->disableSAX = 1;
273 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274}
275
276/**
277 * xmlFatalErr:
278 * @ctxt: an XML parser context
279 * @error: the error number
280 * @extra: extra information string
281 *
282 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
283 */
284static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000285xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286{
287 const char *errmsg;
288
Daniel Veillard157fee02003-10-31 10:36:03 +0000289 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
290 (ctxt->instate == XML_PARSER_EOF))
291 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 switch (error) {
293 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "CharRef: invalid hexadecimal value\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "CharRef: invalid decimal value\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "CharRef: invalid value\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "internal error";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "PEReference at end of document\n";
307 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000309 errmsg = "PEReference in prolog\n";
310 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000311 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000312 errmsg = "PEReference in epilog\n";
313 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000314 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000315 errmsg = "PEReference: no name\n";
316 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000318 errmsg = "PEReference: expecting ';'\n";
319 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000321 errmsg = "Detected an entity reference loop\n";
322 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000323 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000324 errmsg = "EntityValue: \" or ' expected\n";
325 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000326 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 errmsg = "PEReferences forbidden in internal subset\n";
328 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000329 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 errmsg = "EntityValue: \" or ' expected\n";
331 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000332 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 errmsg = "AttValue: \" or ' expected\n";
334 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000335 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000336 errmsg = "Unescaped '<' not allowed in attributes values\n";
337 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000338 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 errmsg = "SystemLiteral \" or ' expected\n";
340 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000341 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 errmsg = "Unfinished System or Public ID \" or ' expected\n";
343 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000344 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 errmsg = "Sequence ']]>' not allowed in content\n";
346 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000347 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
349 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000350 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 errmsg = "PUBLIC, the Public Identifier is missing\n";
352 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000353 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 errmsg = "Comment must not contain '--' (double-hyphen)\n";
355 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 errmsg = "xmlParsePI : no target name\n";
358 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000359 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 errmsg = "Invalid PI name\n";
361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 errmsg = "NOTATION: Name expected here\n";
364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 errmsg = "'>' required to close NOTATION declaration\n";
367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 errmsg = "Entity value required\n";
370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 errmsg = "Fragment not allowed";
373 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000374 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 errmsg = "'(' required to start ATTLIST enumeration\n";
376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 errmsg = "NmToken expected in ATTLIST enumeration\n";
379 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 errmsg = "')' required to finish ATTLIST enumeration\n";
382 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
385 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000386 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
388 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000389 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 errmsg = "ContentDecl : Name or '(' expected\n";
391 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000392 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 errmsg =
397 "PEReference: forbidden within markup decl in internal subset\n";
398 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000399 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000400 errmsg = "expected '>'\n";
401 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000402 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000403 errmsg = "XML conditional section '[' expected\n";
404 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000405 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000406 errmsg = "Content error in the external subset\n";
407 break;
408 case XML_ERR_CONDSEC_INVALID_KEYWORD:
409 errmsg =
410 "conditional section INCLUDE or IGNORE keyword expected\n";
411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000413 errmsg = "XML conditional section not closed\n";
414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000416 errmsg = "Text declaration '<?xml' required\n";
417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000419 errmsg = "parsing XML declaration: '?>' expected\n";
420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000422 errmsg = "external parsed entities cannot be standalone\n";
423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000425 errmsg = "EntityRef: expecting ';'\n";
426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg = "DOCTYPE improperly terminated\n";
429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000431 errmsg = "EndTag: '</' not found\n";
432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000434 errmsg = "expected '='\n";
435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000437 errmsg = "String not closed expecting \" or '\n";
438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000440 errmsg = "String not started expecting ' or \"\n";
441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 errmsg = "Invalid XML encoding name\n";
444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 errmsg = "standalone accepts only 'yes' or 'no'\n";
447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 errmsg = "Document is empty\n";
450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 errmsg = "Extra content at the end of the document\n";
453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 errmsg = "chunk is not well balanced\n";
456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 errmsg = "extra content at the end of well balanced chunk\n";
459 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000460 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 errmsg = "Malformed declaration expecting version\n";
462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 case:
465 errmsg = "\n";
466 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000467#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000468 default:
469 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000470 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000471 if (ctxt != NULL)
472 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000473 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000474 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
475 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000476 if (ctxt != NULL) {
477 ctxt->wellFormed = 0;
478 if (ctxt->recovery == 0)
479 ctxt->disableSAX = 1;
480 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000481}
482
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000483/**
484 * xmlFatalErrMsg:
485 * @ctxt: an XML parser context
486 * @error: the error number
487 * @msg: the error message
488 *
489 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
490 */
491static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000498 if (ctxt != NULL)
499 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000500 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200501 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000502 if (ctxt != NULL) {
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000507}
508
509/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000510 * xmlWarningMsg:
511 * @ctxt: an XML parser context
512 * @error: the error number
513 * @msg: the error message
514 * @str1: extra data
515 * @str2: extra data
516 *
517 * Handle a warning.
518 */
519static void
520xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
521 const char *msg, const xmlChar *str1, const xmlChar *str2)
522{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000523 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000524
Daniel Veillard157fee02003-10-31 10:36:03 +0000525 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
526 (ctxt->instate == XML_PARSER_EOF))
527 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000528 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
529 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000530 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200531 if (ctxt != NULL) {
532 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000533 (ctxt->sax) ? ctxt->sax->warning : NULL,
534 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000535 ctxt, NULL, XML_FROM_PARSER, error,
536 XML_ERR_WARNING, NULL, 0,
537 (const char *) str1, (const char *) str2, NULL, 0, 0,
538 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200539 } else {
540 __xmlRaiseError(schannel, NULL, NULL,
541 ctxt, NULL, XML_FROM_PARSER, error,
542 XML_ERR_WARNING, NULL, 0,
543 (const char *) str1, (const char *) str2, NULL, 0, 0,
544 msg, (const char *) str1, (const char *) str2);
545 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000546}
547
548/**
549 * xmlValidityError:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 * @str1: extra data
554 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000555 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000556 */
557static void
558xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000559 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000560{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000561 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000562
563 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
564 (ctxt->instate == XML_PARSER_EOF))
565 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000566 if (ctxt != NULL) {
567 ctxt->errNo = error;
568 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
569 schannel = ctxt->sax->serror;
570 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200571 if (ctxt != NULL) {
572 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000573 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000574 ctxt, NULL, XML_FROM_DTD, error,
575 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000576 (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000578 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200579 } else {
580 __xmlRaiseError(schannel, NULL, NULL,
581 ctxt, NULL, XML_FROM_DTD, error,
582 XML_ERR_ERROR, NULL, 0, (const char *) str1,
583 (const char *) str2, NULL, 0, 0,
584 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000585 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000586}
587
588/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000589 * xmlFatalErrMsgInt:
590 * @ctxt: an XML parser context
591 * @error: the error number
592 * @msg: the error message
593 * @val: an integer value
594 *
595 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
596 */
597static void
598xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000599 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000600{
Daniel Veillard157fee02003-10-31 10:36:03 +0000601 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
602 (ctxt->instate == XML_PARSER_EOF))
603 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000604 if (ctxt != NULL)
605 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000606 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000607 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
608 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000609 if (ctxt != NULL) {
610 ctxt->wellFormed = 0;
611 if (ctxt->recovery == 0)
612 ctxt->disableSAX = 1;
613 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000614}
615
616/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000617 * xmlFatalErrMsgStrIntStr:
618 * @ctxt: an XML parser context
619 * @error: the error number
620 * @msg: the error message
621 * @str1: an string info
622 * @val: an integer value
623 * @str2: an string info
624 *
625 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
626 */
627static void
628xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
629 const char *msg, const xmlChar *str1, int val,
630 const xmlChar *str2)
631{
Daniel Veillard157fee02003-10-31 10:36:03 +0000632 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
633 (ctxt->instate == XML_PARSER_EOF))
634 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000635 if (ctxt != NULL)
636 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000637 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000638 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
639 NULL, 0, (const char *) str1, (const char *) str2,
640 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000641 if (ctxt != NULL) {
642 ctxt->wellFormed = 0;
643 if (ctxt->recovery == 0)
644 ctxt->disableSAX = 1;
645 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000646}
647
648/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000649 * xmlFatalErrMsgStr:
650 * @ctxt: an XML parser context
651 * @error: the error number
652 * @msg: the error message
653 * @val: a string value
654 *
655 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
656 */
657static void
658xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000659 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000660{
Daniel Veillard157fee02003-10-31 10:36:03 +0000661 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
662 (ctxt->instate == XML_PARSER_EOF))
663 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000664 if (ctxt != NULL)
665 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000666 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000667 XML_FROM_PARSER, error, XML_ERR_FATAL,
668 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
669 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000670 if (ctxt != NULL) {
671 ctxt->wellFormed = 0;
672 if (ctxt->recovery == 0)
673 ctxt->disableSAX = 1;
674 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000675}
676
677/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000678 * xmlErrMsgStr:
679 * @ctxt: an XML parser context
680 * @error: the error number
681 * @msg: the error message
682 * @val: a string value
683 *
684 * Handle a non fatal parser error
685 */
686static void
687xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
688 const char *msg, const xmlChar * val)
689{
Daniel Veillard157fee02003-10-31 10:36:03 +0000690 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
691 (ctxt->instate == XML_PARSER_EOF))
692 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000693 if (ctxt != NULL)
694 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000695 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000696 XML_FROM_PARSER, error, XML_ERR_ERROR,
697 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
698 val);
699}
700
701/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000702 * xmlNsErr:
703 * @ctxt: an XML parser context
704 * @error: the error number
705 * @msg: the message
706 * @info1: extra information string
707 * @info2: extra information string
708 *
709 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
710 */
711static void
712xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
713 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000714 const xmlChar * info1, const xmlChar * info2,
715 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000716{
Daniel Veillard157fee02003-10-31 10:36:03 +0000717 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
718 (ctxt->instate == XML_PARSER_EOF))
719 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000720 if (ctxt != NULL)
721 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000722 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000723 XML_ERR_ERROR, NULL, 0, (const char *) info1,
724 (const char *) info2, (const char *) info3, 0, 0, msg,
725 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000726 if (ctxt != NULL)
727 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000728}
729
Daniel Veillard37334572008-07-31 08:20:02 +0000730/**
731 * xmlNsWarn
732 * @ctxt: an XML parser context
733 * @error: the error number
734 * @msg: the message
735 * @info1: extra information string
736 * @info2: extra information string
737 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800738 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000739 */
740static void
741xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
742 const char *msg,
743 const xmlChar * info1, const xmlChar * info2,
744 const xmlChar * info3)
745{
746 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
747 (ctxt->instate == XML_PARSER_EOF))
748 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000749 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
750 XML_ERR_WARNING, NULL, 0, (const char *) info1,
751 (const char *) info2, (const char *) info3, 0, 0, msg,
752 info1, info2, info3);
753}
754
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000755/************************************************************************
756 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757 * Library wide options *
758 * *
759 ************************************************************************/
760
761/**
762 * xmlHasFeature:
763 * @feature: the feature to be examined
764 *
765 * Examines if the library has been compiled with a given feature.
766 *
767 * Returns a non-zero value if the feature exist, otherwise zero.
768 * Returns zero (0) if the feature does not exist or an unknown
769 * unknown feature is requested, non-zero otherwise.
770 */
771int
772xmlHasFeature(xmlFeature feature)
773{
774 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000775 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000776#ifdef LIBXML_THREAD_ENABLED
777 return(1);
778#else
779 return(0);
780#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000781 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000782#ifdef LIBXML_TREE_ENABLED
783 return(1);
784#else
785 return(0);
786#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000787 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000788#ifdef LIBXML_OUTPUT_ENABLED
789 return(1);
790#else
791 return(0);
792#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000793 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000794#ifdef LIBXML_PUSH_ENABLED
795 return(1);
796#else
797 return(0);
798#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000799 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000800#ifdef LIBXML_READER_ENABLED
801 return(1);
802#else
803 return(0);
804#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000805 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000806#ifdef LIBXML_PATTERN_ENABLED
807 return(1);
808#else
809 return(0);
810#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000811 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000812#ifdef LIBXML_WRITER_ENABLED
813 return(1);
814#else
815 return(0);
816#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000817 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000818#ifdef LIBXML_SAX1_ENABLED
819 return(1);
820#else
821 return(0);
822#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000823 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000824#ifdef LIBXML_FTP_ENABLED
825 return(1);
826#else
827 return(0);
828#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000829 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000830#ifdef LIBXML_HTTP_ENABLED
831 return(1);
832#else
833 return(0);
834#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000835 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000836#ifdef LIBXML_VALID_ENABLED
837 return(1);
838#else
839 return(0);
840#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000841 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000842#ifdef LIBXML_HTML_ENABLED
843 return(1);
844#else
845 return(0);
846#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000847 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000848#ifdef LIBXML_LEGACY_ENABLED
849 return(1);
850#else
851 return(0);
852#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000853 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000854#ifdef LIBXML_C14N_ENABLED
855 return(1);
856#else
857 return(0);
858#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000859 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000860#ifdef LIBXML_CATALOG_ENABLED
861 return(1);
862#else
863 return(0);
864#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000865 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000866#ifdef LIBXML_XPATH_ENABLED
867 return(1);
868#else
869 return(0);
870#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000871 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000872#ifdef LIBXML_XPTR_ENABLED
873 return(1);
874#else
875 return(0);
876#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000877 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000878#ifdef LIBXML_XINCLUDE_ENABLED
879 return(1);
880#else
881 return(0);
882#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000883 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000884#ifdef LIBXML_ICONV_ENABLED
885 return(1);
886#else
887 return(0);
888#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000889 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000890#ifdef LIBXML_ISO8859X_ENABLED
891 return(1);
892#else
893 return(0);
894#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000895 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000896#ifdef LIBXML_UNICODE_ENABLED
897 return(1);
898#else
899 return(0);
900#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000901 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000902#ifdef LIBXML_REGEXP_ENABLED
903 return(1);
904#else
905 return(0);
906#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000907 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000908#ifdef LIBXML_AUTOMATA_ENABLED
909 return(1);
910#else
911 return(0);
912#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000913 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000914#ifdef LIBXML_EXPR_ENABLED
915 return(1);
916#else
917 return(0);
918#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000919 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000920#ifdef LIBXML_SCHEMAS_ENABLED
921 return(1);
922#else
923 return(0);
924#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000925 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000926#ifdef LIBXML_SCHEMATRON_ENABLED
927 return(1);
928#else
929 return(0);
930#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000931 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000932#ifdef LIBXML_MODULES_ENABLED
933 return(1);
934#else
935 return(0);
936#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000937 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000938#ifdef LIBXML_DEBUG_ENABLED
939 return(1);
940#else
941 return(0);
942#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000943 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000944#ifdef DEBUG_MEMORY_LOCATION
945 return(1);
946#else
947 return(0);
948#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000949 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000950#ifdef LIBXML_DEBUG_RUNTIME
951 return(1);
952#else
953 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000954#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000955 case XML_WITH_ZLIB:
956#ifdef LIBXML_ZLIB_ENABLED
957 return(1);
958#else
959 return(0);
960#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +0200961 case XML_WITH_LZMA:
962#ifdef LIBXML_LZMA_ENABLED
963 return(1);
964#else
965 return(0);
966#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100967 case XML_WITH_ICU:
968#ifdef LIBXML_ICU_ENABLED
969 return(1);
970#else
971 return(0);
972#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000973 default:
974 break;
975 }
976 return(0);
977}
978
979/************************************************************************
980 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000981 * SAX2 defaulted attributes handling *
982 * *
983 ************************************************************************/
984
985/**
986 * xmlDetectSAX2:
987 * @ctxt: an XML parser context
988 *
989 * Do the SAX2 detection and specific intialization
990 */
991static void
992xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
993 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000994#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000995 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
996 ((ctxt->sax->startElementNs != NULL) ||
997 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000998#else
999 ctxt->sax2 = 1;
1000#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001001
1002 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1003 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1004 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +00001005 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1006 (ctxt->str_xml_ns == NULL)) {
1007 xmlErrMemory(ctxt, NULL);
1008 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001009}
1010
Daniel Veillarde57ec792003-09-10 10:50:59 +00001011typedef struct _xmlDefAttrs xmlDefAttrs;
1012typedef xmlDefAttrs *xmlDefAttrsPtr;
1013struct _xmlDefAttrs {
1014 int nbAttrs; /* number of defaulted attributes on that element */
1015 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001016 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001017};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001018
1019/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001020 * xmlAttrNormalizeSpace:
1021 * @src: the source string
1022 * @dst: the target string
1023 *
1024 * Normalize the space in non CDATA attribute values:
1025 * If the attribute type is not CDATA, then the XML processor MUST further
1026 * process the normalized attribute value by discarding any leading and
1027 * trailing space (#x20) characters, and by replacing sequences of space
1028 * (#x20) characters by a single space (#x20) character.
1029 * Note that the size of dst need to be at least src, and if one doesn't need
1030 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1031 * passing src as dst is just fine.
1032 *
1033 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1034 * is needed.
1035 */
1036static xmlChar *
1037xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1038{
1039 if ((src == NULL) || (dst == NULL))
1040 return(NULL);
1041
1042 while (*src == 0x20) src++;
1043 while (*src != 0) {
1044 if (*src == 0x20) {
1045 while (*src == 0x20) src++;
1046 if (*src != 0)
1047 *dst++ = 0x20;
1048 } else {
1049 *dst++ = *src++;
1050 }
1051 }
1052 *dst = 0;
1053 if (dst == src)
1054 return(NULL);
1055 return(dst);
1056}
1057
1058/**
1059 * xmlAttrNormalizeSpace2:
1060 * @src: the source string
1061 *
1062 * Normalize the space in non CDATA attribute values, a slightly more complex
1063 * front end to avoid allocation problems when running on attribute values
1064 * coming from the input.
1065 *
1066 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1067 * is needed.
1068 */
1069static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001070xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001071{
1072 int i;
1073 int remove_head = 0;
1074 int need_realloc = 0;
1075 const xmlChar *cur;
1076
1077 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1078 return(NULL);
1079 i = *len;
1080 if (i <= 0)
1081 return(NULL);
1082
1083 cur = src;
1084 while (*cur == 0x20) {
1085 cur++;
1086 remove_head++;
1087 }
1088 while (*cur != 0) {
1089 if (*cur == 0x20) {
1090 cur++;
1091 if ((*cur == 0x20) || (*cur == 0)) {
1092 need_realloc = 1;
1093 break;
1094 }
1095 } else
1096 cur++;
1097 }
1098 if (need_realloc) {
1099 xmlChar *ret;
1100
1101 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1102 if (ret == NULL) {
1103 xmlErrMemory(ctxt, NULL);
1104 return(NULL);
1105 }
1106 xmlAttrNormalizeSpace(ret, ret);
1107 *len = (int) strlen((const char *)ret);
1108 return(ret);
1109 } else if (remove_head) {
1110 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001111 memmove(src, src + remove_head, 1 + *len);
1112 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001113 }
1114 return(NULL);
1115}
1116
1117/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001118 * xmlAddDefAttrs:
1119 * @ctxt: an XML parser context
1120 * @fullname: the element fullname
1121 * @fullattr: the attribute fullname
1122 * @value: the attribute value
1123 *
1124 * Add a defaulted attribute for an element
1125 */
1126static void
1127xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1128 const xmlChar *fullname,
1129 const xmlChar *fullattr,
1130 const xmlChar *value) {
1131 xmlDefAttrsPtr defaults;
1132 int len;
1133 const xmlChar *name;
1134 const xmlChar *prefix;
1135
Daniel Veillard6a31b832008-03-26 14:06:44 +00001136 /*
1137 * Allows to detect attribute redefinitions
1138 */
1139 if (ctxt->attsSpecial != NULL) {
1140 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1141 return;
1142 }
1143
Daniel Veillarde57ec792003-09-10 10:50:59 +00001144 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001145 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001146 if (ctxt->attsDefault == NULL)
1147 goto mem_error;
1148 }
1149
1150 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001151 * split the element name into prefix:localname , the string found
1152 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 */
1154 name = xmlSplitQName3(fullname, &len);
1155 if (name == NULL) {
1156 name = xmlDictLookup(ctxt->dict, fullname, -1);
1157 prefix = NULL;
1158 } else {
1159 name = xmlDictLookup(ctxt->dict, name, -1);
1160 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1161 }
1162
1163 /*
1164 * make sure there is some storage
1165 */
1166 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1167 if (defaults == NULL) {
1168 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001169 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001170 if (defaults == NULL)
1171 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001172 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001173 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001174 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1175 defaults, NULL) < 0) {
1176 xmlFree(defaults);
1177 goto mem_error;
1178 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001179 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001180 xmlDefAttrsPtr temp;
1181
1182 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001183 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001184 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001186 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001187 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001188 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1189 defaults, NULL) < 0) {
1190 xmlFree(defaults);
1191 goto mem_error;
1192 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001193 }
1194
1195 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001196 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001197 * are within the DTD and hen not associated to namespace names.
1198 */
1199 name = xmlSplitQName3(fullattr, &len);
1200 if (name == NULL) {
1201 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1202 prefix = NULL;
1203 } else {
1204 name = xmlDictLookup(ctxt->dict, name, -1);
1205 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1206 }
1207
Daniel Veillardae0765b2008-07-31 19:54:59 +00001208 defaults->values[5 * defaults->nbAttrs] = name;
1209 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001210 /* intern the string and precompute the end */
1211 len = xmlStrlen(value);
1212 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001213 defaults->values[5 * defaults->nbAttrs + 2] = value;
1214 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1215 if (ctxt->external)
1216 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1217 else
1218 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001219 defaults->nbAttrs++;
1220
1221 return;
1222
1223mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001224 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001225 return;
1226}
1227
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001228/**
1229 * xmlAddSpecialAttr:
1230 * @ctxt: an XML parser context
1231 * @fullname: the element fullname
1232 * @fullattr: the attribute fullname
1233 * @type: the attribute type
1234 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001235 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001236 */
1237static void
1238xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1239 const xmlChar *fullname,
1240 const xmlChar *fullattr,
1241 int type)
1242{
1243 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001244 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001245 if (ctxt->attsSpecial == NULL)
1246 goto mem_error;
1247 }
1248
Daniel Veillardac4118d2008-01-11 05:27:32 +00001249 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1250 return;
1251
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001252 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1253 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001254 return;
1255
1256mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001257 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001258 return;
1259}
1260
Daniel Veillard4432df22003-09-28 18:58:27 +00001261/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001262 * xmlCleanSpecialAttrCallback:
1263 *
1264 * Removes CDATA attributes from the special attribute table
1265 */
1266static void
1267xmlCleanSpecialAttrCallback(void *payload, void *data,
1268 const xmlChar *fullname, const xmlChar *fullattr,
1269 const xmlChar *unused ATTRIBUTE_UNUSED) {
1270 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1271
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001272 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001273 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1274 }
1275}
1276
1277/**
1278 * xmlCleanSpecialAttr:
1279 * @ctxt: an XML parser context
1280 *
1281 * Trim the list of attributes defined to remove all those of type
1282 * CDATA as they are not special. This call should be done when finishing
1283 * to parse the DTD and before starting to parse the document root.
1284 */
1285static void
1286xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1287{
1288 if (ctxt->attsSpecial == NULL)
1289 return;
1290
1291 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1292
1293 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1294 xmlHashFree(ctxt->attsSpecial, NULL);
1295 ctxt->attsSpecial = NULL;
1296 }
1297 return;
1298}
1299
1300/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001301 * xmlCheckLanguageID:
1302 * @lang: pointer to the string value
1303 *
1304 * Checks that the value conforms to the LanguageID production:
1305 *
1306 * NOTE: this is somewhat deprecated, those productions were removed from
1307 * the XML Second edition.
1308 *
1309 * [33] LanguageID ::= Langcode ('-' Subcode)*
1310 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1311 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1312 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1313 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1314 * [38] Subcode ::= ([a-z] | [A-Z])+
1315 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001316 * The current REC reference the sucessors of RFC 1766, currently 5646
1317 *
1318 * http://www.rfc-editor.org/rfc/rfc5646.txt
1319 * langtag = language
1320 * ["-" script]
1321 * ["-" region]
1322 * *("-" variant)
1323 * *("-" extension)
1324 * ["-" privateuse]
1325 * language = 2*3ALPHA ; shortest ISO 639 code
1326 * ["-" extlang] ; sometimes followed by
1327 * ; extended language subtags
1328 * / 4ALPHA ; or reserved for future use
1329 * / 5*8ALPHA ; or registered language subtag
1330 *
1331 * extlang = 3ALPHA ; selected ISO 639 codes
1332 * *2("-" 3ALPHA) ; permanently reserved
1333 *
1334 * script = 4ALPHA ; ISO 15924 code
1335 *
1336 * region = 2ALPHA ; ISO 3166-1 code
1337 * / 3DIGIT ; UN M.49 code
1338 *
1339 * variant = 5*8alphanum ; registered variants
1340 * / (DIGIT 3alphanum)
1341 *
1342 * extension = singleton 1*("-" (2*8alphanum))
1343 *
1344 * ; Single alphanumerics
1345 * ; "x" reserved for private use
1346 * singleton = DIGIT ; 0 - 9
1347 * / %x41-57 ; A - W
1348 * / %x59-5A ; Y - Z
1349 * / %x61-77 ; a - w
1350 * / %x79-7A ; y - z
1351 *
1352 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1353 * The parser below doesn't try to cope with extension or privateuse
1354 * that could be added but that's not interoperable anyway
1355 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001356 * Returns 1 if correct 0 otherwise
1357 **/
1358int
1359xmlCheckLanguageID(const xmlChar * lang)
1360{
Daniel Veillard60587d62010-11-04 15:16:27 +01001361 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001362
1363 if (cur == NULL)
1364 return (0);
1365 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001366 ((cur[0] == 'I') && (cur[1] == '-')) ||
1367 ((cur[0] == 'x') && (cur[1] == '-')) ||
1368 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001369 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001370 * Still allow IANA code and user code which were coming
1371 * from the previous version of the XML-1.0 specification
1372 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001373 */
1374 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001375 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001376 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1377 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001378 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001379 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001380 nxt = cur;
1381 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1382 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1383 nxt++;
1384 if (nxt - cur >= 4) {
1385 /*
1386 * Reserved
1387 */
1388 if ((nxt - cur > 8) || (nxt[0] != 0))
1389 return(0);
1390 return(1);
1391 }
1392 if (nxt - cur < 2)
1393 return(0);
1394 /* we got an ISO 639 code */
1395 if (nxt[0] == 0)
1396 return(1);
1397 if (nxt[0] != '-')
1398 return(0);
1399
1400 nxt++;
1401 cur = nxt;
1402 /* now we can have extlang or script or region or variant */
1403 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1404 goto region_m49;
1405
1406 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1407 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1408 nxt++;
1409 if (nxt - cur == 4)
1410 goto script;
1411 if (nxt - cur == 2)
1412 goto region;
1413 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1414 goto variant;
1415 if (nxt - cur != 3)
1416 return(0);
1417 /* we parsed an extlang */
1418 if (nxt[0] == 0)
1419 return(1);
1420 if (nxt[0] != '-')
1421 return(0);
1422
1423 nxt++;
1424 cur = nxt;
1425 /* now we can have script or region or variant */
1426 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1427 goto region_m49;
1428
1429 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1430 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1431 nxt++;
1432 if (nxt - cur == 2)
1433 goto region;
1434 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1435 goto variant;
1436 if (nxt - cur != 4)
1437 return(0);
1438 /* we parsed a script */
1439script:
1440 if (nxt[0] == 0)
1441 return(1);
1442 if (nxt[0] != '-')
1443 return(0);
1444
1445 nxt++;
1446 cur = nxt;
1447 /* now we can have region or variant */
1448 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1449 goto region_m49;
1450
1451 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1452 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1453 nxt++;
1454
1455 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1456 goto variant;
1457 if (nxt - cur != 2)
1458 return(0);
1459 /* we parsed a region */
1460region:
1461 if (nxt[0] == 0)
1462 return(1);
1463 if (nxt[0] != '-')
1464 return(0);
1465
1466 nxt++;
1467 cur = nxt;
1468 /* now we can just have a variant */
1469 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1470 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1471 nxt++;
1472
1473 if ((nxt - cur < 5) || (nxt - cur > 8))
1474 return(0);
1475
1476 /* we parsed a variant */
1477variant:
1478 if (nxt[0] == 0)
1479 return(1);
1480 if (nxt[0] != '-')
1481 return(0);
1482 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001483 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001484
1485region_m49:
1486 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1487 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1488 nxt += 3;
1489 goto region;
1490 }
1491 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001492}
1493
Owen Taylor3473f882001-02-23 17:55:21 +00001494/************************************************************************
1495 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001496 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001497 * *
1498 ************************************************************************/
1499
Daniel Veillard8ed10722009-08-20 19:17:36 +02001500static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1501 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001502
Daniel Veillard0fb18932003-09-07 09:14:37 +00001503#ifdef SAX2
1504/**
1505 * nsPush:
1506 * @ctxt: an XML parser context
1507 * @prefix: the namespace prefix or NULL
1508 * @URL: the namespace name
1509 *
1510 * Pushes a new parser namespace on top of the ns stack
1511 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001512 * Returns -1 in case of error, -2 if the namespace should be discarded
1513 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001514 */
1515static int
1516nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1517{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001518 if (ctxt->options & XML_PARSE_NSCLEAN) {
1519 int i;
1520 for (i = 0;i < ctxt->nsNr;i += 2) {
1521 if (ctxt->nsTab[i] == prefix) {
1522 /* in scope */
1523 if (ctxt->nsTab[i + 1] == URL)
1524 return(-2);
1525 /* out of scope keep it */
1526 break;
1527 }
1528 }
1529 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001530 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1531 ctxt->nsMax = 10;
1532 ctxt->nsNr = 0;
1533 ctxt->nsTab = (const xmlChar **)
1534 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1535 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001536 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001537 ctxt->nsMax = 0;
1538 return (-1);
1539 }
1540 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001541 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001542 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001543 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1544 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1545 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001546 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001547 ctxt->nsMax /= 2;
1548 return (-1);
1549 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001550 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001551 }
1552 ctxt->nsTab[ctxt->nsNr++] = prefix;
1553 ctxt->nsTab[ctxt->nsNr++] = URL;
1554 return (ctxt->nsNr);
1555}
1556/**
1557 * nsPop:
1558 * @ctxt: an XML parser context
1559 * @nr: the number to pop
1560 *
1561 * Pops the top @nr parser prefix/namespace from the ns stack
1562 *
1563 * Returns the number of namespaces removed
1564 */
1565static int
1566nsPop(xmlParserCtxtPtr ctxt, int nr)
1567{
1568 int i;
1569
1570 if (ctxt->nsTab == NULL) return(0);
1571 if (ctxt->nsNr < nr) {
1572 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1573 nr = ctxt->nsNr;
1574 }
1575 if (ctxt->nsNr <= 0)
1576 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001577
Daniel Veillard0fb18932003-09-07 09:14:37 +00001578 for (i = 0;i < nr;i++) {
1579 ctxt->nsNr--;
1580 ctxt->nsTab[ctxt->nsNr] = NULL;
1581 }
1582 return(nr);
1583}
1584#endif
1585
1586static int
1587xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1588 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001589 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001590 int maxatts;
1591
1592 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001593 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001594 atts = (const xmlChar **)
1595 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001596 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001597 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001598 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1599 if (attallocs == NULL) goto mem_error;
1600 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001601 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001602 } else if (nr + 5 > ctxt->maxatts) {
1603 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001604 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1605 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001606 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001607 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001608 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1609 (maxatts / 5) * sizeof(int));
1610 if (attallocs == NULL) goto mem_error;
1611 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001612 ctxt->maxatts = maxatts;
1613 }
1614 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001615mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001616 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001617 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001618}
1619
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001620/**
1621 * inputPush:
1622 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001623 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001624 *
1625 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001626 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001627 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001628 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001629int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001630inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1631{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001632 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001633 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001634 if (ctxt->inputNr >= ctxt->inputMax) {
1635 ctxt->inputMax *= 2;
1636 ctxt->inputTab =
1637 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1638 ctxt->inputMax *
1639 sizeof(ctxt->inputTab[0]));
1640 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001641 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001642 xmlFreeInputStream(value);
1643 ctxt->inputMax /= 2;
1644 value = NULL;
1645 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001646 }
1647 }
1648 ctxt->inputTab[ctxt->inputNr] = value;
1649 ctxt->input = value;
1650 return (ctxt->inputNr++);
1651}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001652/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001653 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001654 * @ctxt: an XML parser context
1655 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001656 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001657 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001658 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001659 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001660xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001661inputPop(xmlParserCtxtPtr ctxt)
1662{
1663 xmlParserInputPtr ret;
1664
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001665 if (ctxt == NULL)
1666 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001667 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001668 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669 ctxt->inputNr--;
1670 if (ctxt->inputNr > 0)
1671 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1672 else
1673 ctxt->input = NULL;
1674 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001675 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001676 return (ret);
1677}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001678/**
1679 * nodePush:
1680 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001681 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001682 *
1683 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001684 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001685 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001686 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001687int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001688nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1689{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001690 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001691 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001692 xmlNodePtr *tmp;
1693
1694 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1695 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001696 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001697 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001698 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001699 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001701 ctxt->nodeTab = tmp;
1702 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001703 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001704 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1705 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001706 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001707 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001708 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001709 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001710 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001711 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001712 ctxt->nodeTab[ctxt->nodeNr] = value;
1713 ctxt->node = value;
1714 return (ctxt->nodeNr++);
1715}
Daniel Veillard8915c152008-08-26 13:05:34 +00001716
Daniel Veillard1c732d22002-11-30 11:22:59 +00001717/**
1718 * nodePop:
1719 * @ctxt: an XML parser context
1720 *
1721 * Pops the top element node from the node stack
1722 *
1723 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001724 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001725xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001726nodePop(xmlParserCtxtPtr ctxt)
1727{
1728 xmlNodePtr ret;
1729
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001730 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001731 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001732 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001733 ctxt->nodeNr--;
1734 if (ctxt->nodeNr > 0)
1735 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1736 else
1737 ctxt->node = NULL;
1738 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001739 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001740 return (ret);
1741}
Daniel Veillarda2351322004-06-27 12:08:10 +00001742
1743#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001744/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001745 * nameNsPush:
1746 * @ctxt: an XML parser context
1747 * @value: the element name
1748 * @prefix: the element prefix
1749 * @URI: the element namespace name
1750 *
1751 * Pushes a new element name/prefix/URL on top of the name stack
1752 *
1753 * Returns -1 in case of error, the index in the stack otherwise
1754 */
1755static int
1756nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1757 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1758{
1759 if (ctxt->nameNr >= ctxt->nameMax) {
1760 const xmlChar * *tmp;
1761 void **tmp2;
1762 ctxt->nameMax *= 2;
1763 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1764 ctxt->nameMax *
1765 sizeof(ctxt->nameTab[0]));
1766 if (tmp == NULL) {
1767 ctxt->nameMax /= 2;
1768 goto mem_error;
1769 }
1770 ctxt->nameTab = tmp;
1771 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1772 ctxt->nameMax * 3 *
1773 sizeof(ctxt->pushTab[0]));
1774 if (tmp2 == NULL) {
1775 ctxt->nameMax /= 2;
1776 goto mem_error;
1777 }
1778 ctxt->pushTab = tmp2;
1779 }
1780 ctxt->nameTab[ctxt->nameNr] = value;
1781 ctxt->name = value;
1782 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1783 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001784 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001785 return (ctxt->nameNr++);
1786mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001787 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001788 return (-1);
1789}
1790/**
1791 * nameNsPop:
1792 * @ctxt: an XML parser context
1793 *
1794 * Pops the top element/prefix/URI name from the name stack
1795 *
1796 * Returns the name just removed
1797 */
1798static const xmlChar *
1799nameNsPop(xmlParserCtxtPtr ctxt)
1800{
1801 const xmlChar *ret;
1802
1803 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001804 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001805 ctxt->nameNr--;
1806 if (ctxt->nameNr > 0)
1807 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1808 else
1809 ctxt->name = NULL;
1810 ret = ctxt->nameTab[ctxt->nameNr];
1811 ctxt->nameTab[ctxt->nameNr] = NULL;
1812 return (ret);
1813}
Daniel Veillarda2351322004-06-27 12:08:10 +00001814#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001815
1816/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001817 * namePush:
1818 * @ctxt: an XML parser context
1819 * @value: the element name
1820 *
1821 * Pushes a new element name on top of the name stack
1822 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001823 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001824 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001825int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001826namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001827{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001828 if (ctxt == NULL) return (-1);
1829
Daniel Veillard1c732d22002-11-30 11:22:59 +00001830 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001831 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001832 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001833 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001834 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001835 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001836 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001837 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001838 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001839 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001840 }
1841 ctxt->nameTab[ctxt->nameNr] = value;
1842 ctxt->name = value;
1843 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001844mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001845 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001846 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001847}
1848/**
1849 * namePop:
1850 * @ctxt: an XML parser context
1851 *
1852 * Pops the top element name from the name stack
1853 *
1854 * Returns the name just removed
1855 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001856const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001857namePop(xmlParserCtxtPtr ctxt)
1858{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001859 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001860
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001861 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1862 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 ctxt->nameNr--;
1864 if (ctxt->nameNr > 0)
1865 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1866 else
1867 ctxt->name = NULL;
1868 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001869 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001870 return (ret);
1871}
Owen Taylor3473f882001-02-23 17:55:21 +00001872
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001873static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001874 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001875 int *tmp;
1876
Owen Taylor3473f882001-02-23 17:55:21 +00001877 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001878 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1879 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1880 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001881 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001882 ctxt->spaceMax /=2;
1883 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001884 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001885 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001886 }
1887 ctxt->spaceTab[ctxt->spaceNr] = val;
1888 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1889 return(ctxt->spaceNr++);
1890}
1891
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001892static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001893 int ret;
1894 if (ctxt->spaceNr <= 0) return(0);
1895 ctxt->spaceNr--;
1896 if (ctxt->spaceNr > 0)
1897 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1898 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001899 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001900 ret = ctxt->spaceTab[ctxt->spaceNr];
1901 ctxt->spaceTab[ctxt->spaceNr] = -1;
1902 return(ret);
1903}
1904
1905/*
1906 * Macros for accessing the content. Those should be used only by the parser,
1907 * and not exported.
1908 *
1909 * Dirty macros, i.e. one often need to make assumption on the context to
1910 * use them
1911 *
1912 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1913 * To be used with extreme caution since operations consuming
1914 * characters may move the input buffer to a different location !
1915 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1916 * This should be used internally by the parser
1917 * only to compare to ASCII values otherwise it would break when
1918 * running with UTF-8 encoding.
1919 * RAW same as CUR but in the input buffer, bypass any token
1920 * extraction that may have been done
1921 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1922 * to compare on ASCII based substring.
1923 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001924 * strings without newlines within the parser.
1925 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1926 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001927 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1928 *
1929 * NEXT Skip to the next character, this does the proper decoding
1930 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001931 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001932 * CUR_CHAR(l) returns the current unicode character (int), set l
1933 * to the number of xmlChars used for the encoding [0-5].
1934 * CUR_SCHAR same but operate on a string instead of the context
1935 * COPY_BUF copy the current unicode char to the target buffer, increment
1936 * the index
1937 * GROW, SHRINK handling of input buffers
1938 */
1939
Daniel Veillardfdc91562002-07-01 21:52:03 +00001940#define RAW (*ctxt->input->cur)
1941#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001942#define NXT(val) ctxt->input->cur[(val)]
1943#define CUR_PTR ctxt->input->cur
1944
Daniel Veillarda07050d2003-10-19 14:46:32 +00001945#define CMP4( s, c1, c2, c3, c4 ) \
1946 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1947 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1948#define CMP5( s, c1, c2, c3, c4, c5 ) \
1949 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1950#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1951 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1952#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1953 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1954#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1955 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1956#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1957 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1958 ((unsigned char *) s)[ 8 ] == c9 )
1959#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1960 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1961 ((unsigned char *) s)[ 9 ] == c10 )
1962
Owen Taylor3473f882001-02-23 17:55:21 +00001963#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001964 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001965 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001966 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1968 xmlPopInput(ctxt); \
1969 } while (0)
1970
Daniel Veillard0b787f32004-03-26 17:29:53 +00001971#define SKIPL(val) do { \
1972 int skipl; \
1973 for(skipl=0; skipl<val; skipl++) { \
1974 if (*(ctxt->input->cur) == '\n') { \
1975 ctxt->input->line++; ctxt->input->col = 1; \
1976 } else ctxt->input->col++; \
1977 ctxt->nbChars++; \
1978 ctxt->input->cur++; \
1979 } \
1980 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1981 if ((*ctxt->input->cur == 0) && \
1982 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1983 xmlPopInput(ctxt); \
1984 } while (0)
1985
Daniel Veillarda880b122003-04-21 21:36:41 +00001986#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001987 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1988 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001989 xmlSHRINK (ctxt);
1990
1991static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1992 xmlParserInputShrink(ctxt->input);
1993 if ((*ctxt->input->cur == 0) &&
1994 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1995 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001996 }
Owen Taylor3473f882001-02-23 17:55:21 +00001997
Daniel Veillarda880b122003-04-21 21:36:41 +00001998#define GROW if ((ctxt->progressive == 0) && \
1999 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002000 xmlGROW (ctxt);
2001
2002static void xmlGROW (xmlParserCtxtPtr ctxt) {
2003 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002004 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002005 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2006 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002007}
Owen Taylor3473f882001-02-23 17:55:21 +00002008
2009#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2010
2011#define NEXT xmlNextChar(ctxt)
2012
Daniel Veillard21a0f912001-02-25 19:54:14 +00002013#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002014 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002015 ctxt->input->cur++; \
2016 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002017 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002018 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2019 }
2020
Owen Taylor3473f882001-02-23 17:55:21 +00002021#define NEXTL(l) do { \
2022 if (*(ctxt->input->cur) == '\n') { \
2023 ctxt->input->line++; ctxt->input->col = 1; \
2024 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002025 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002026 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002027 } while (0)
2028
2029#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2030#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2031
2032#define COPY_BUF(l,b,i,v) \
2033 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002034 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002035
2036/**
2037 * xmlSkipBlankChars:
2038 * @ctxt: the XML parser context
2039 *
2040 * skip all blanks character found at that point in the input streams.
2041 * It pops up finished entities in the process if allowable at that point.
2042 *
2043 * Returns the number of space chars skipped
2044 */
2045
2046int
2047xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002048 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002049
2050 /*
2051 * It's Okay to use CUR/NEXT here since all the blanks are on
2052 * the ASCII range.
2053 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002054 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2055 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002056 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002057 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002058 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002059 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002060 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002061 if (*cur == '\n') {
2062 ctxt->input->line++; ctxt->input->col = 1;
2063 }
2064 cur++;
2065 res++;
2066 if (*cur == 0) {
2067 ctxt->input->cur = cur;
2068 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2069 cur = ctxt->input->cur;
2070 }
2071 }
2072 ctxt->input->cur = cur;
2073 } else {
2074 int cur;
2075 do {
2076 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002077 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002078 NEXT;
2079 cur = CUR;
2080 res++;
2081 }
2082 while ((cur == 0) && (ctxt->inputNr > 1) &&
2083 (ctxt->instate != XML_PARSER_COMMENT)) {
2084 xmlPopInput(ctxt);
2085 cur = CUR;
2086 }
2087 /*
2088 * Need to handle support of entities branching here
2089 */
2090 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2091 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2092 }
Owen Taylor3473f882001-02-23 17:55:21 +00002093 return(res);
2094}
2095
2096/************************************************************************
2097 * *
2098 * Commodity functions to handle entities *
2099 * *
2100 ************************************************************************/
2101
2102/**
2103 * xmlPopInput:
2104 * @ctxt: an XML parser context
2105 *
2106 * xmlPopInput: the current input pointed by ctxt->input came to an end
2107 * pop it and return the next char.
2108 *
2109 * Returns the current xmlChar in the parser context
2110 */
2111xmlChar
2112xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002113 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002114 if (xmlParserDebugEntities)
2115 xmlGenericError(xmlGenericErrorContext,
2116 "Popping input %d\n", ctxt->inputNr);
2117 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002118 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002119 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2120 return(xmlPopInput(ctxt));
2121 return(CUR);
2122}
2123
2124/**
2125 * xmlPushInput:
2126 * @ctxt: an XML parser context
2127 * @input: an XML parser input fragment (entity, XML fragment ...).
2128 *
2129 * xmlPushInput: switch to a new input stream which is stacked on top
2130 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002131 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002132 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002133int
Owen Taylor3473f882001-02-23 17:55:21 +00002134xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002135 int ret;
2136 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002137
2138 if (xmlParserDebugEntities) {
2139 if ((ctxt->input != NULL) && (ctxt->input->filename))
2140 xmlGenericError(xmlGenericErrorContext,
2141 "%s(%d): ", ctxt->input->filename,
2142 ctxt->input->line);
2143 xmlGenericError(xmlGenericErrorContext,
2144 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2145 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002146 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002147 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002148 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002149}
2150
2151/**
2152 * xmlParseCharRef:
2153 * @ctxt: an XML parser context
2154 *
2155 * parse Reference declarations
2156 *
2157 * [66] CharRef ::= '&#' [0-9]+ ';' |
2158 * '&#x' [0-9a-fA-F]+ ';'
2159 *
2160 * [ WFC: Legal Character ]
2161 * Characters referred to using character references must match the
2162 * production for Char.
2163 *
2164 * Returns the value parsed (as an int), 0 in case of error
2165 */
2166int
2167xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002168 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002169 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002170 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002171
Owen Taylor3473f882001-02-23 17:55:21 +00002172 /*
2173 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2174 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002175 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002176 (NXT(2) == 'x')) {
2177 SKIP(3);
2178 GROW;
2179 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002180 if (count++ > 20) {
2181 count = 0;
2182 GROW;
2183 }
2184 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002185 val = val * 16 + (CUR - '0');
2186 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2187 val = val * 16 + (CUR - 'a') + 10;
2188 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2189 val = val * 16 + (CUR - 'A') + 10;
2190 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002191 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002192 val = 0;
2193 break;
2194 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002195 if (val > 0x10FFFF)
2196 outofrange = val;
2197
Owen Taylor3473f882001-02-23 17:55:21 +00002198 NEXT;
2199 count++;
2200 }
2201 if (RAW == ';') {
2202 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002203 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002204 ctxt->nbChars ++;
2205 ctxt->input->cur++;
2206 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002207 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002208 SKIP(2);
2209 GROW;
2210 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002211 if (count++ > 20) {
2212 count = 0;
2213 GROW;
2214 }
2215 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002216 val = val * 10 + (CUR - '0');
2217 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002218 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002219 val = 0;
2220 break;
2221 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002222 if (val > 0x10FFFF)
2223 outofrange = val;
2224
Owen Taylor3473f882001-02-23 17:55:21 +00002225 NEXT;
2226 count++;
2227 }
2228 if (RAW == ';') {
2229 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002230 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002231 ctxt->nbChars ++;
2232 ctxt->input->cur++;
2233 }
2234 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002235 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002236 }
2237
2238 /*
2239 * [ WFC: Legal Character ]
2240 * Characters referred to using character references must match the
2241 * production for Char.
2242 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002243 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002244 return(val);
2245 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002246 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2247 "xmlParseCharRef: invalid xmlChar value %d\n",
2248 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002249 }
2250 return(0);
2251}
2252
2253/**
2254 * xmlParseStringCharRef:
2255 * @ctxt: an XML parser context
2256 * @str: a pointer to an index in the string
2257 *
2258 * parse Reference declarations, variant parsing from a string rather
2259 * than an an input flow.
2260 *
2261 * [66] CharRef ::= '&#' [0-9]+ ';' |
2262 * '&#x' [0-9a-fA-F]+ ';'
2263 *
2264 * [ WFC: Legal Character ]
2265 * Characters referred to using character references must match the
2266 * production for Char.
2267 *
2268 * Returns the value parsed (as an int), 0 in case of error, str will be
2269 * updated to the current value of the index
2270 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002271static int
Owen Taylor3473f882001-02-23 17:55:21 +00002272xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2273 const xmlChar *ptr;
2274 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002275 unsigned int val = 0;
2276 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002277
2278 if ((str == NULL) || (*str == NULL)) return(0);
2279 ptr = *str;
2280 cur = *ptr;
2281 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2282 ptr += 3;
2283 cur = *ptr;
2284 while (cur != ';') { /* Non input consuming loop */
2285 if ((cur >= '0') && (cur <= '9'))
2286 val = val * 16 + (cur - '0');
2287 else if ((cur >= 'a') && (cur <= 'f'))
2288 val = val * 16 + (cur - 'a') + 10;
2289 else if ((cur >= 'A') && (cur <= 'F'))
2290 val = val * 16 + (cur - 'A') + 10;
2291 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002292 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002293 val = 0;
2294 break;
2295 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002296 if (val > 0x10FFFF)
2297 outofrange = val;
2298
Owen Taylor3473f882001-02-23 17:55:21 +00002299 ptr++;
2300 cur = *ptr;
2301 }
2302 if (cur == ';')
2303 ptr++;
2304 } else if ((cur == '&') && (ptr[1] == '#')){
2305 ptr += 2;
2306 cur = *ptr;
2307 while (cur != ';') { /* Non input consuming loops */
2308 if ((cur >= '0') && (cur <= '9'))
2309 val = val * 10 + (cur - '0');
2310 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002311 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002312 val = 0;
2313 break;
2314 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002315 if (val > 0x10FFFF)
2316 outofrange = val;
2317
Owen Taylor3473f882001-02-23 17:55:21 +00002318 ptr++;
2319 cur = *ptr;
2320 }
2321 if (cur == ';')
2322 ptr++;
2323 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002324 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002325 return(0);
2326 }
2327 *str = ptr;
2328
2329 /*
2330 * [ WFC: Legal Character ]
2331 * Characters referred to using character references must match the
2332 * production for Char.
2333 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002334 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002335 return(val);
2336 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002337 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2338 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2339 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002340 }
2341 return(0);
2342}
2343
2344/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002345 * xmlNewBlanksWrapperInputStream:
2346 * @ctxt: an XML parser context
2347 * @entity: an Entity pointer
2348 *
2349 * Create a new input stream for wrapping
2350 * blanks around a PEReference
2351 *
2352 * Returns the new input stream or NULL
2353 */
2354
2355static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2356
Daniel Veillardf4862f02002-09-10 11:13:43 +00002357static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002358xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2359 xmlParserInputPtr input;
2360 xmlChar *buffer;
2361 size_t length;
2362 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002363 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2364 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002365 return(NULL);
2366 }
2367 if (xmlParserDebugEntities)
2368 xmlGenericError(xmlGenericErrorContext,
2369 "new blanks wrapper for entity: %s\n", entity->name);
2370 input = xmlNewInputStream(ctxt);
2371 if (input == NULL) {
2372 return(NULL);
2373 }
2374 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002375 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002376 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002377 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002378 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002379 return(NULL);
2380 }
2381 buffer [0] = ' ';
2382 buffer [1] = '%';
2383 buffer [length-3] = ';';
2384 buffer [length-2] = ' ';
2385 buffer [length-1] = 0;
2386 memcpy(buffer + 2, entity->name, length - 5);
2387 input->free = deallocblankswrapper;
2388 input->base = buffer;
2389 input->cur = buffer;
2390 input->length = length;
2391 input->end = &buffer[length];
2392 return(input);
2393}
2394
2395/**
Owen Taylor3473f882001-02-23 17:55:21 +00002396 * xmlParserHandlePEReference:
2397 * @ctxt: the parser context
2398 *
2399 * [69] PEReference ::= '%' Name ';'
2400 *
2401 * [ WFC: No Recursion ]
2402 * A parsed entity must not contain a recursive
2403 * reference to itself, either directly or indirectly.
2404 *
2405 * [ WFC: Entity Declared ]
2406 * In a document without any DTD, a document with only an internal DTD
2407 * subset which contains no parameter entity references, or a document
2408 * with "standalone='yes'", ... ... The declaration of a parameter
2409 * entity must precede any reference to it...
2410 *
2411 * [ VC: Entity Declared ]
2412 * In a document with an external subset or external parameter entities
2413 * with "standalone='no'", ... ... The declaration of a parameter entity
2414 * must precede any reference to it...
2415 *
2416 * [ WFC: In DTD ]
2417 * Parameter-entity references may only appear in the DTD.
2418 * NOTE: misleading but this is handled.
2419 *
2420 * A PEReference may have been detected in the current input stream
2421 * the handling is done accordingly to
2422 * http://www.w3.org/TR/REC-xml#entproc
2423 * i.e.
2424 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002425 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002426 */
2427void
2428xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002429 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002430 xmlEntityPtr entity = NULL;
2431 xmlParserInputPtr input;
2432
Owen Taylor3473f882001-02-23 17:55:21 +00002433 if (RAW != '%') return;
2434 switch(ctxt->instate) {
2435 case XML_PARSER_CDATA_SECTION:
2436 return;
2437 case XML_PARSER_COMMENT:
2438 return;
2439 case XML_PARSER_START_TAG:
2440 return;
2441 case XML_PARSER_END_TAG:
2442 return;
2443 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002444 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002445 return;
2446 case XML_PARSER_PROLOG:
2447 case XML_PARSER_START:
2448 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002449 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002450 return;
2451 case XML_PARSER_ENTITY_DECL:
2452 case XML_PARSER_CONTENT:
2453 case XML_PARSER_ATTRIBUTE_VALUE:
2454 case XML_PARSER_PI:
2455 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002456 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002457 /* we just ignore it there */
2458 return;
2459 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002460 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002461 return;
2462 case XML_PARSER_ENTITY_VALUE:
2463 /*
2464 * NOTE: in the case of entity values, we don't do the
2465 * substitution here since we need the literal
2466 * entity value to be able to save the internal
2467 * subset of the document.
2468 * This will be handled by xmlStringDecodeEntities
2469 */
2470 return;
2471 case XML_PARSER_DTD:
2472 /*
2473 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2474 * In the internal DTD subset, parameter-entity references
2475 * can occur only where markup declarations can occur, not
2476 * within markup declarations.
2477 * In that case this is handled in xmlParseMarkupDecl
2478 */
2479 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2480 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002481 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002482 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002483 break;
2484 case XML_PARSER_IGNORE:
2485 return;
2486 }
2487
2488 NEXT;
2489 name = xmlParseName(ctxt);
2490 if (xmlParserDebugEntities)
2491 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002492 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002493 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002494 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002495 } else {
2496 if (RAW == ';') {
2497 NEXT;
2498 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2499 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2500 if (entity == NULL) {
2501
2502 /*
2503 * [ WFC: Entity Declared ]
2504 * In a document without any DTD, a document with only an
2505 * internal DTD subset which contains no parameter entity
2506 * references, or a document with "standalone='yes'", ...
2507 * ... The declaration of a parameter entity must precede
2508 * any reference to it...
2509 */
2510 if ((ctxt->standalone == 1) ||
2511 ((ctxt->hasExternalSubset == 0) &&
2512 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002513 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002514 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002515 } else {
2516 /*
2517 * [ VC: Entity Declared ]
2518 * In a document with an external subset or external
2519 * parameter entities with "standalone='no'", ...
2520 * ... The declaration of a parameter entity must precede
2521 * any reference to it...
2522 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002523 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2524 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2525 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002526 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002527 } else
2528 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2529 "PEReference: %%%s; not found\n",
2530 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002531 ctxt->valid = 0;
2532 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002533 } else if (ctxt->input->free != deallocblankswrapper) {
2534 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002535 if (xmlPushInput(ctxt, input) < 0)
2536 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002537 } else {
2538 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2539 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002540 xmlChar start[4];
2541 xmlCharEncoding enc;
2542
Owen Taylor3473f882001-02-23 17:55:21 +00002543 /*
2544 * handle the extra spaces added before and after
2545 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002546 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002547 */
2548 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002549 if (xmlPushInput(ctxt, input) < 0)
2550 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002551
2552 /*
2553 * Get the 4 first bytes and decode the charset
2554 * if enc != XML_CHAR_ENCODING_NONE
2555 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002556 * Note that, since we may have some non-UTF8
2557 * encoding (like UTF16, bug 135229), the 'length'
2558 * is not known, but we can calculate based upon
2559 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002560 */
2561 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002562 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002563 start[0] = RAW;
2564 start[1] = NXT(1);
2565 start[2] = NXT(2);
2566 start[3] = NXT(3);
2567 enc = xmlDetectCharEncoding(start, 4);
2568 if (enc != XML_CHAR_ENCODING_NONE) {
2569 xmlSwitchEncoding(ctxt, enc);
2570 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002571 }
2572
Owen Taylor3473f882001-02-23 17:55:21 +00002573 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002574 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2575 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002576 xmlParseTextDecl(ctxt);
2577 }
Owen Taylor3473f882001-02-23 17:55:21 +00002578 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002579 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2580 "PEReference: %s is not a parameter entity\n",
2581 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002582 }
2583 }
2584 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002585 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002586 }
Owen Taylor3473f882001-02-23 17:55:21 +00002587 }
2588}
2589
2590/*
2591 * Macro used to grow the current buffer.
2592 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002593#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002594 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002595 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002596 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002597 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002598 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002599 if (tmp == NULL) goto mem_error; \
2600 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002601}
2602
2603/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002604 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002605 * @ctxt: the parser context
2606 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002607 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002608 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2609 * @end: an end marker xmlChar, 0 if none
2610 * @end2: an end marker xmlChar, 0 if none
2611 * @end3: an end marker xmlChar, 0 if none
2612 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002613 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002614 *
2615 * [67] Reference ::= EntityRef | CharRef
2616 *
2617 * [69] PEReference ::= '%' Name ';'
2618 *
2619 * Returns A newly allocated string with the substitution done. The caller
2620 * must deallocate it !
2621 */
2622xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002623xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2624 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002625 xmlChar *buffer = NULL;
2626 int buffer_size = 0;
2627
2628 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002629 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002630 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002631 xmlEntityPtr ent;
2632 int c,l;
2633 int nbchars = 0;
2634
Daniel Veillarda82b1822004-11-08 16:24:57 +00002635 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002636 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002637 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002638
Daniel Veillard0161e632008-08-28 15:36:32 +00002639 if (((ctxt->depth > 40) &&
2640 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2641 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002642 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002643 return(NULL);
2644 }
2645
2646 /*
2647 * allocate a translation buffer.
2648 */
2649 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002650 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002651 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002652
2653 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002654 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002655 * we are operating on already parsed values.
2656 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002657 if (str < last)
2658 c = CUR_SCHAR(str, l);
2659 else
2660 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002661 while ((c != 0) && (c != end) && /* non input consuming loop */
2662 (c != end2) && (c != end3)) {
2663
2664 if (c == 0) break;
2665 if ((c == '&') && (str[1] == '#')) {
2666 int val = xmlParseStringCharRef(ctxt, &str);
2667 if (val != 0) {
2668 COPY_BUF(0,buffer,nbchars,val);
2669 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002670 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002671 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002672 }
Owen Taylor3473f882001-02-23 17:55:21 +00002673 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2674 if (xmlParserDebugEntities)
2675 xmlGenericError(xmlGenericErrorContext,
2676 "String decoding Entity Reference: %.30s\n",
2677 str);
2678 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002679 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2680 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002681 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002682 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002683 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002684 if ((ent != NULL) &&
2685 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2686 if (ent->content != NULL) {
2687 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002688 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002689 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002690 }
Owen Taylor3473f882001-02-23 17:55:21 +00002691 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002692 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2693 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002694 }
2695 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002696 ctxt->depth++;
2697 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2698 0, 0, 0);
2699 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002700
Owen Taylor3473f882001-02-23 17:55:21 +00002701 if (rep != NULL) {
2702 current = rep;
2703 while (*current != 0) { /* non input consuming loop */
2704 buffer[nbchars++] = *current++;
2705 if (nbchars >
2706 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002707 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2708 goto int_error;
2709 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002710 }
2711 }
2712 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002713 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002714 }
2715 } else if (ent != NULL) {
2716 int i = xmlStrlen(ent->name);
2717 const xmlChar *cur = ent->name;
2718
2719 buffer[nbchars++] = '&';
2720 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002721 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002722 }
2723 for (;i > 0;i--)
2724 buffer[nbchars++] = *cur++;
2725 buffer[nbchars++] = ';';
2726 }
2727 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2728 if (xmlParserDebugEntities)
2729 xmlGenericError(xmlGenericErrorContext,
2730 "String decoding PE Reference: %.30s\n", str);
2731 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002732 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2733 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002734 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002735 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002736 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002737 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002738 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002739 }
Owen Taylor3473f882001-02-23 17:55:21 +00002740 ctxt->depth++;
2741 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2742 0, 0, 0);
2743 ctxt->depth--;
2744 if (rep != NULL) {
2745 current = rep;
2746 while (*current != 0) { /* non input consuming loop */
2747 buffer[nbchars++] = *current++;
2748 if (nbchars >
2749 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002750 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2751 goto int_error;
2752 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002753 }
2754 }
2755 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002756 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002757 }
2758 }
2759 } else {
2760 COPY_BUF(l,buffer,nbchars,c);
2761 str += l;
2762 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002763 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002764 }
2765 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002766 if (str < last)
2767 c = CUR_SCHAR(str, l);
2768 else
2769 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002770 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002771 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002772 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002773
2774mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002775 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002776int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002777 if (rep != NULL)
2778 xmlFree(rep);
2779 if (buffer != NULL)
2780 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002781 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002782}
2783
Daniel Veillarde57ec792003-09-10 10:50:59 +00002784/**
2785 * xmlStringDecodeEntities:
2786 * @ctxt: the parser context
2787 * @str: the input string
2788 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2789 * @end: an end marker xmlChar, 0 if none
2790 * @end2: an end marker xmlChar, 0 if none
2791 * @end3: an end marker xmlChar, 0 if none
2792 *
2793 * Takes a entity string content and process to do the adequate substitutions.
2794 *
2795 * [67] Reference ::= EntityRef | CharRef
2796 *
2797 * [69] PEReference ::= '%' Name ';'
2798 *
2799 * Returns A newly allocated string with the substitution done. The caller
2800 * must deallocate it !
2801 */
2802xmlChar *
2803xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2804 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002805 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002806 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2807 end, end2, end3));
2808}
Owen Taylor3473f882001-02-23 17:55:21 +00002809
2810/************************************************************************
2811 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002812 * Commodity functions, cleanup needed ? *
2813 * *
2814 ************************************************************************/
2815
2816/**
2817 * areBlanks:
2818 * @ctxt: an XML parser context
2819 * @str: a xmlChar *
2820 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002821 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002822 *
2823 * Is this a sequence of blank chars that one can ignore ?
2824 *
2825 * Returns 1 if ignorable 0 otherwise.
2826 */
2827
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002828static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2829 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002830 int i, ret;
2831 xmlNodePtr lastChild;
2832
Daniel Veillard05c13a22001-09-09 08:38:09 +00002833 /*
2834 * Don't spend time trying to differentiate them, the same callback is
2835 * used !
2836 */
2837 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002838 return(0);
2839
Owen Taylor3473f882001-02-23 17:55:21 +00002840 /*
2841 * Check for xml:space value.
2842 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002843 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2844 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002845 return(0);
2846
2847 /*
2848 * Check that the string is made of blanks
2849 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002850 if (blank_chars == 0) {
2851 for (i = 0;i < len;i++)
2852 if (!(IS_BLANK_CH(str[i]))) return(0);
2853 }
Owen Taylor3473f882001-02-23 17:55:21 +00002854
2855 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002856 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002857 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002858 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002859 if (ctxt->myDoc != NULL) {
2860 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2861 if (ret == 0) return(1);
2862 if (ret == 1) return(0);
2863 }
2864
2865 /*
2866 * Otherwise, heuristic :-\
2867 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002868 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002869 if ((ctxt->node->children == NULL) &&
2870 (RAW == '<') && (NXT(1) == '/')) return(0);
2871
2872 lastChild = xmlGetLastChild(ctxt->node);
2873 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002874 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2875 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002876 } else if (xmlNodeIsText(lastChild))
2877 return(0);
2878 else if ((ctxt->node->children != NULL) &&
2879 (xmlNodeIsText(ctxt->node->children)))
2880 return(0);
2881 return(1);
2882}
2883
Owen Taylor3473f882001-02-23 17:55:21 +00002884/************************************************************************
2885 * *
2886 * Extra stuff for namespace support *
2887 * Relates to http://www.w3.org/TR/WD-xml-names *
2888 * *
2889 ************************************************************************/
2890
2891/**
2892 * xmlSplitQName:
2893 * @ctxt: an XML parser context
2894 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002895 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002896 *
2897 * parse an UTF8 encoded XML qualified name string
2898 *
2899 * [NS 5] QName ::= (Prefix ':')? LocalPart
2900 *
2901 * [NS 6] Prefix ::= NCName
2902 *
2903 * [NS 7] LocalPart ::= NCName
2904 *
2905 * Returns the local part, and prefix is updated
2906 * to get the Prefix if any.
2907 */
2908
2909xmlChar *
2910xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2911 xmlChar buf[XML_MAX_NAMELEN + 5];
2912 xmlChar *buffer = NULL;
2913 int len = 0;
2914 int max = XML_MAX_NAMELEN;
2915 xmlChar *ret = NULL;
2916 const xmlChar *cur = name;
2917 int c;
2918
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002919 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002920 *prefix = NULL;
2921
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002922 if (cur == NULL) return(NULL);
2923
Owen Taylor3473f882001-02-23 17:55:21 +00002924#ifndef XML_XML_NAMESPACE
2925 /* xml: prefix is not really a namespace */
2926 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2927 (cur[2] == 'l') && (cur[3] == ':'))
2928 return(xmlStrdup(name));
2929#endif
2930
Daniel Veillard597bc482003-07-24 16:08:28 +00002931 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002932 if (cur[0] == ':')
2933 return(xmlStrdup(name));
2934
2935 c = *cur++;
2936 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2937 buf[len++] = c;
2938 c = *cur++;
2939 }
2940 if (len >= max) {
2941 /*
2942 * Okay someone managed to make a huge name, so he's ready to pay
2943 * for the processing speed.
2944 */
2945 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002946
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002947 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002948 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002949 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002950 return(NULL);
2951 }
2952 memcpy(buffer, buf, len);
2953 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2954 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002955 xmlChar *tmp;
2956
Owen Taylor3473f882001-02-23 17:55:21 +00002957 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002958 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002959 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002960 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002961 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002962 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002963 return(NULL);
2964 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002965 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002966 }
2967 buffer[len++] = c;
2968 c = *cur++;
2969 }
2970 buffer[len] = 0;
2971 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002972
Daniel Veillard597bc482003-07-24 16:08:28 +00002973 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002974 if (buffer != NULL)
2975 xmlFree(buffer);
2976 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002977 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002978 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002979
Owen Taylor3473f882001-02-23 17:55:21 +00002980 if (buffer == NULL)
2981 ret = xmlStrndup(buf, len);
2982 else {
2983 ret = buffer;
2984 buffer = NULL;
2985 max = XML_MAX_NAMELEN;
2986 }
2987
2988
2989 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002990 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002991 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002992 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002993 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002994 }
Owen Taylor3473f882001-02-23 17:55:21 +00002995 len = 0;
2996
Daniel Veillardbb284f42002-10-16 18:02:47 +00002997 /*
2998 * Check that the first character is proper to start
2999 * a new name
3000 */
3001 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3002 ((c >= 0x41) && (c <= 0x5A)) ||
3003 (c == '_') || (c == ':'))) {
3004 int l;
3005 int first = CUR_SCHAR(cur, l);
3006
3007 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003008 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003009 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003010 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003011 }
3012 }
3013 cur++;
3014
Owen Taylor3473f882001-02-23 17:55:21 +00003015 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3016 buf[len++] = c;
3017 c = *cur++;
3018 }
3019 if (len >= max) {
3020 /*
3021 * Okay someone managed to make a huge name, so he's ready to pay
3022 * for the processing speed.
3023 */
3024 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003025
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003026 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003027 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003028 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003029 return(NULL);
3030 }
3031 memcpy(buffer, buf, len);
3032 while (c != 0) { /* tested bigname2.xml */
3033 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003034 xmlChar *tmp;
3035
Owen Taylor3473f882001-02-23 17:55:21 +00003036 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003037 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003038 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003039 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003040 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003041 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003042 return(NULL);
3043 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003044 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003045 }
3046 buffer[len++] = c;
3047 c = *cur++;
3048 }
3049 buffer[len] = 0;
3050 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003051
Owen Taylor3473f882001-02-23 17:55:21 +00003052 if (buffer == NULL)
3053 ret = xmlStrndup(buf, len);
3054 else {
3055 ret = buffer;
3056 }
3057 }
3058
3059 return(ret);
3060}
3061
3062/************************************************************************
3063 * *
3064 * The parser itself *
3065 * Relates to http://www.w3.org/TR/REC-xml *
3066 * *
3067 ************************************************************************/
3068
Daniel Veillard34e3f642008-07-29 09:02:27 +00003069/************************************************************************
3070 * *
3071 * Routines to parse Name, NCName and NmToken *
3072 * *
3073 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003074#ifdef DEBUG
3075static unsigned long nbParseName = 0;
3076static unsigned long nbParseNmToken = 0;
3077static unsigned long nbParseNCName = 0;
3078static unsigned long nbParseNCNameComplex = 0;
3079static unsigned long nbParseNameComplex = 0;
3080static unsigned long nbParseStringName = 0;
3081#endif
3082
Daniel Veillard34e3f642008-07-29 09:02:27 +00003083/*
3084 * The two following functions are related to the change of accepted
3085 * characters for Name and NmToken in the Revision 5 of XML-1.0
3086 * They correspond to the modified production [4] and the new production [4a]
3087 * changes in that revision. Also note that the macros used for the
3088 * productions Letter, Digit, CombiningChar and Extender are not needed
3089 * anymore.
3090 * We still keep compatibility to pre-revision5 parsing semantic if the
3091 * new XML_PARSE_OLD10 option is given to the parser.
3092 */
3093static int
3094xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3095 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3096 /*
3097 * Use the new checks of production [4] [4a] amd [5] of the
3098 * Update 5 of XML-1.0
3099 */
3100 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3101 (((c >= 'a') && (c <= 'z')) ||
3102 ((c >= 'A') && (c <= 'Z')) ||
3103 (c == '_') || (c == ':') ||
3104 ((c >= 0xC0) && (c <= 0xD6)) ||
3105 ((c >= 0xD8) && (c <= 0xF6)) ||
3106 ((c >= 0xF8) && (c <= 0x2FF)) ||
3107 ((c >= 0x370) && (c <= 0x37D)) ||
3108 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3109 ((c >= 0x200C) && (c <= 0x200D)) ||
3110 ((c >= 0x2070) && (c <= 0x218F)) ||
3111 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3112 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3113 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3114 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3115 ((c >= 0x10000) && (c <= 0xEFFFF))))
3116 return(1);
3117 } else {
3118 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3119 return(1);
3120 }
3121 return(0);
3122}
3123
3124static int
3125xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3126 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3127 /*
3128 * Use the new checks of production [4] [4a] amd [5] of the
3129 * Update 5 of XML-1.0
3130 */
3131 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3132 (((c >= 'a') && (c <= 'z')) ||
3133 ((c >= 'A') && (c <= 'Z')) ||
3134 ((c >= '0') && (c <= '9')) || /* !start */
3135 (c == '_') || (c == ':') ||
3136 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3137 ((c >= 0xC0) && (c <= 0xD6)) ||
3138 ((c >= 0xD8) && (c <= 0xF6)) ||
3139 ((c >= 0xF8) && (c <= 0x2FF)) ||
3140 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3141 ((c >= 0x370) && (c <= 0x37D)) ||
3142 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3143 ((c >= 0x200C) && (c <= 0x200D)) ||
3144 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3145 ((c >= 0x2070) && (c <= 0x218F)) ||
3146 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3147 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3148 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3149 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3150 ((c >= 0x10000) && (c <= 0xEFFFF))))
3151 return(1);
3152 } else {
3153 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3154 (c == '.') || (c == '-') ||
3155 (c == '_') || (c == ':') ||
3156 (IS_COMBINING(c)) ||
3157 (IS_EXTENDER(c)))
3158 return(1);
3159 }
3160 return(0);
3161}
3162
Daniel Veillarde57ec792003-09-10 10:50:59 +00003163static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003164 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003165
Daniel Veillard34e3f642008-07-29 09:02:27 +00003166static const xmlChar *
3167xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3168 int len = 0, l;
3169 int c;
3170 int count = 0;
3171
Daniel Veillardc6561462009-03-25 10:22:31 +00003172#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003173 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003174#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003175
3176 /*
3177 * Handler for more complex cases
3178 */
3179 GROW;
3180 c = CUR_CHAR(l);
3181 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3182 /*
3183 * Use the new checks of production [4] [4a] amd [5] of the
3184 * Update 5 of XML-1.0
3185 */
3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187 (!(((c >= 'a') && (c <= 'z')) ||
3188 ((c >= 'A') && (c <= 'Z')) ||
3189 (c == '_') || (c == ':') ||
3190 ((c >= 0xC0) && (c <= 0xD6)) ||
3191 ((c >= 0xD8) && (c <= 0xF6)) ||
3192 ((c >= 0xF8) && (c <= 0x2FF)) ||
3193 ((c >= 0x370) && (c <= 0x37D)) ||
3194 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3195 ((c >= 0x200C) && (c <= 0x200D)) ||
3196 ((c >= 0x2070) && (c <= 0x218F)) ||
3197 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3198 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3199 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3200 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3201 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3202 return(NULL);
3203 }
3204 len += l;
3205 NEXTL(l);
3206 c = CUR_CHAR(l);
3207 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 (((c >= 'a') && (c <= 'z')) ||
3209 ((c >= 'A') && (c <= 'Z')) ||
3210 ((c >= '0') && (c <= '9')) || /* !start */
3211 (c == '_') || (c == ':') ||
3212 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3213 ((c >= 0xC0) && (c <= 0xD6)) ||
3214 ((c >= 0xD8) && (c <= 0xF6)) ||
3215 ((c >= 0xF8) && (c <= 0x2FF)) ||
3216 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3217 ((c >= 0x370) && (c <= 0x37D)) ||
3218 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3219 ((c >= 0x200C) && (c <= 0x200D)) ||
3220 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3221 ((c >= 0x2070) && (c <= 0x218F)) ||
3222 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3223 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3224 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3225 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3226 ((c >= 0x10000) && (c <= 0xEFFFF))
3227 )) {
3228 if (count++ > 100) {
3229 count = 0;
3230 GROW;
3231 }
3232 len += l;
3233 NEXTL(l);
3234 c = CUR_CHAR(l);
3235 }
3236 } else {
3237 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3238 (!IS_LETTER(c) && (c != '_') &&
3239 (c != ':'))) {
3240 return(NULL);
3241 }
3242 len += l;
3243 NEXTL(l);
3244 c = CUR_CHAR(l);
3245
3246 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3247 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3248 (c == '.') || (c == '-') ||
3249 (c == '_') || (c == ':') ||
3250 (IS_COMBINING(c)) ||
3251 (IS_EXTENDER(c)))) {
3252 if (count++ > 100) {
3253 count = 0;
3254 GROW;
3255 }
3256 len += l;
3257 NEXTL(l);
3258 c = CUR_CHAR(l);
3259 }
3260 }
3261 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3262 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3263 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3264}
3265
Owen Taylor3473f882001-02-23 17:55:21 +00003266/**
3267 * xmlParseName:
3268 * @ctxt: an XML parser context
3269 *
3270 * parse an XML name.
3271 *
3272 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3273 * CombiningChar | Extender
3274 *
3275 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3276 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003277 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003278 *
3279 * Returns the Name parsed or NULL
3280 */
3281
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003282const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003283xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003284 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003285 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003286 int count = 0;
3287
3288 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003289
Daniel Veillardc6561462009-03-25 10:22:31 +00003290#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003291 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003292#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003293
Daniel Veillard48b2f892001-02-25 16:11:03 +00003294 /*
3295 * Accelerator for simple ASCII names
3296 */
3297 in = ctxt->input->cur;
3298 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3299 ((*in >= 0x41) && (*in <= 0x5A)) ||
3300 (*in == '_') || (*in == ':')) {
3301 in++;
3302 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3303 ((*in >= 0x41) && (*in <= 0x5A)) ||
3304 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003305 (*in == '_') || (*in == '-') ||
3306 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003307 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003308 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003309 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003310 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003311 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003312 ctxt->nbChars += count;
3313 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003314 if (ret == NULL)
3315 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003316 return(ret);
3317 }
3318 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003319 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003320 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003321}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003322
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323static const xmlChar *
3324xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3325 int len = 0, l;
3326 int c;
3327 int count = 0;
3328
Daniel Veillardc6561462009-03-25 10:22:31 +00003329#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003330 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003331#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003332
3333 /*
3334 * Handler for more complex cases
3335 */
3336 GROW;
3337 c = CUR_CHAR(l);
3338 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3339 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3340 return(NULL);
3341 }
3342
3343 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3344 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3345 if (count++ > 100) {
3346 count = 0;
3347 GROW;
3348 }
3349 len += l;
3350 NEXTL(l);
3351 c = CUR_CHAR(l);
3352 }
3353 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3354}
3355
3356/**
3357 * xmlParseNCName:
3358 * @ctxt: an XML parser context
3359 * @len: lenght of the string parsed
3360 *
3361 * parse an XML name.
3362 *
3363 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3364 * CombiningChar | Extender
3365 *
3366 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3367 *
3368 * Returns the Name parsed or NULL
3369 */
3370
3371static const xmlChar *
3372xmlParseNCName(xmlParserCtxtPtr ctxt) {
3373 const xmlChar *in;
3374 const xmlChar *ret;
3375 int count = 0;
3376
Daniel Veillardc6561462009-03-25 10:22:31 +00003377#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003378 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003379#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003380
3381 /*
3382 * Accelerator for simple ASCII names
3383 */
3384 in = ctxt->input->cur;
3385 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3386 ((*in >= 0x41) && (*in <= 0x5A)) ||
3387 (*in == '_')) {
3388 in++;
3389 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3390 ((*in >= 0x41) && (*in <= 0x5A)) ||
3391 ((*in >= 0x30) && (*in <= 0x39)) ||
3392 (*in == '_') || (*in == '-') ||
3393 (*in == '.'))
3394 in++;
3395 if ((*in > 0) && (*in < 0x80)) {
3396 count = in - ctxt->input->cur;
3397 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3398 ctxt->input->cur = in;
3399 ctxt->nbChars += count;
3400 ctxt->input->col += count;
3401 if (ret == NULL) {
3402 xmlErrMemory(ctxt, NULL);
3403 }
3404 return(ret);
3405 }
3406 }
3407 return(xmlParseNCNameComplex(ctxt));
3408}
3409
Daniel Veillard46de64e2002-05-29 08:21:33 +00003410/**
3411 * xmlParseNameAndCompare:
3412 * @ctxt: an XML parser context
3413 *
3414 * parse an XML name and compares for match
3415 * (specialized for endtag parsing)
3416 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003417 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3418 * and the name for mismatch
3419 */
3420
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003421static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003422xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003423 register const xmlChar *cmp = other;
3424 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003425 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003426
3427 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003428
Daniel Veillard46de64e2002-05-29 08:21:33 +00003429 in = ctxt->input->cur;
3430 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003431 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003432 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003433 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003434 }
William M. Brack76e95df2003-10-18 16:20:14 +00003435 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003436 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003437 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003438 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003439 }
3440 /* failure (or end of input buffer), check with full function */
3441 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003442 /* strings coming from the dictionnary direct compare possible */
3443 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003444 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003445 }
3446 return ret;
3447}
3448
Owen Taylor3473f882001-02-23 17:55:21 +00003449/**
3450 * xmlParseStringName:
3451 * @ctxt: an XML parser context
3452 * @str: a pointer to the string pointer (IN/OUT)
3453 *
3454 * parse an XML name.
3455 *
3456 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3457 * CombiningChar | Extender
3458 *
3459 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3460 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003461 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003462 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003463 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003464 * is updated to the current location in the string.
3465 */
3466
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003467static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003468xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3469 xmlChar buf[XML_MAX_NAMELEN + 5];
3470 const xmlChar *cur = *str;
3471 int len = 0, l;
3472 int c;
3473
Daniel Veillardc6561462009-03-25 10:22:31 +00003474#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003475 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003476#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003477
Owen Taylor3473f882001-02-23 17:55:21 +00003478 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003479 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003480 return(NULL);
3481 }
3482
Daniel Veillard34e3f642008-07-29 09:02:27 +00003483 COPY_BUF(l,buf,len,c);
3484 cur += l;
3485 c = CUR_SCHAR(cur, l);
3486 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003487 COPY_BUF(l,buf,len,c);
3488 cur += l;
3489 c = CUR_SCHAR(cur, l);
3490 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3491 /*
3492 * Okay someone managed to make a huge name, so he's ready to pay
3493 * for the processing speed.
3494 */
3495 xmlChar *buffer;
3496 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003497
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003498 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003499 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003500 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003501 return(NULL);
3502 }
3503 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003504 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003505 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003506 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003507 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003508 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003509 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003510 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003511 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003512 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003513 return(NULL);
3514 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003515 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003516 }
3517 COPY_BUF(l,buffer,len,c);
3518 cur += l;
3519 c = CUR_SCHAR(cur, l);
3520 }
3521 buffer[len] = 0;
3522 *str = cur;
3523 return(buffer);
3524 }
3525 }
3526 *str = cur;
3527 return(xmlStrndup(buf, len));
3528}
3529
3530/**
3531 * xmlParseNmtoken:
3532 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003533 *
Owen Taylor3473f882001-02-23 17:55:21 +00003534 * parse an XML Nmtoken.
3535 *
3536 * [7] Nmtoken ::= (NameChar)+
3537 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003538 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003539 *
3540 * Returns the Nmtoken parsed or NULL
3541 */
3542
3543xmlChar *
3544xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3545 xmlChar buf[XML_MAX_NAMELEN + 5];
3546 int len = 0, l;
3547 int c;
3548 int count = 0;
3549
Daniel Veillardc6561462009-03-25 10:22:31 +00003550#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003551 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003552#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003553
Owen Taylor3473f882001-02-23 17:55:21 +00003554 GROW;
3555 c = CUR_CHAR(l);
3556
Daniel Veillard34e3f642008-07-29 09:02:27 +00003557 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003558 if (count++ > 100) {
3559 count = 0;
3560 GROW;
3561 }
3562 COPY_BUF(l,buf,len,c);
3563 NEXTL(l);
3564 c = CUR_CHAR(l);
3565 if (len >= XML_MAX_NAMELEN) {
3566 /*
3567 * Okay someone managed to make a huge token, so he's ready to pay
3568 * for the processing speed.
3569 */
3570 xmlChar *buffer;
3571 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003572
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003573 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003574 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003575 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003576 return(NULL);
3577 }
3578 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003579 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003580 if (count++ > 100) {
3581 count = 0;
3582 GROW;
3583 }
3584 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003585 xmlChar *tmp;
3586
Owen Taylor3473f882001-02-23 17:55:21 +00003587 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003588 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003589 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003590 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003591 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003592 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003593 return(NULL);
3594 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003595 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003596 }
3597 COPY_BUF(l,buffer,len,c);
3598 NEXTL(l);
3599 c = CUR_CHAR(l);
3600 }
3601 buffer[len] = 0;
3602 return(buffer);
3603 }
3604 }
3605 if (len == 0)
3606 return(NULL);
3607 return(xmlStrndup(buf, len));
3608}
3609
3610/**
3611 * xmlParseEntityValue:
3612 * @ctxt: an XML parser context
3613 * @orig: if non-NULL store a copy of the original entity value
3614 *
3615 * parse a value for ENTITY declarations
3616 *
3617 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3618 * "'" ([^%&'] | PEReference | Reference)* "'"
3619 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003620 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003621 */
3622
3623xmlChar *
3624xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3625 xmlChar *buf = NULL;
3626 int len = 0;
3627 int size = XML_PARSER_BUFFER_SIZE;
3628 int c, l;
3629 xmlChar stop;
3630 xmlChar *ret = NULL;
3631 const xmlChar *cur = NULL;
3632 xmlParserInputPtr input;
3633
3634 if (RAW == '"') stop = '"';
3635 else if (RAW == '\'') stop = '\'';
3636 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003637 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003638 return(NULL);
3639 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003640 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003641 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003642 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003643 return(NULL);
3644 }
3645
3646 /*
3647 * The content of the entity definition is copied in a buffer.
3648 */
3649
3650 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3651 input = ctxt->input;
3652 GROW;
3653 NEXT;
3654 c = CUR_CHAR(l);
3655 /*
3656 * NOTE: 4.4.5 Included in Literal
3657 * When a parameter entity reference appears in a literal entity
3658 * value, ... a single or double quote character in the replacement
3659 * text is always treated as a normal data character and will not
3660 * terminate the literal.
3661 * In practice it means we stop the loop only when back at parsing
3662 * the initial entity and the quote is found
3663 */
William M. Brack871611b2003-10-18 04:53:14 +00003664 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003665 (ctxt->input != input))) {
3666 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003667 xmlChar *tmp;
3668
Owen Taylor3473f882001-02-23 17:55:21 +00003669 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003670 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3671 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003672 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003673 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003674 return(NULL);
3675 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003676 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003677 }
3678 COPY_BUF(l,buf,len,c);
3679 NEXTL(l);
3680 /*
3681 * Pop-up of finished entities.
3682 */
3683 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3684 xmlPopInput(ctxt);
3685
3686 GROW;
3687 c = CUR_CHAR(l);
3688 if (c == 0) {
3689 GROW;
3690 c = CUR_CHAR(l);
3691 }
3692 }
3693 buf[len] = 0;
3694
3695 /*
3696 * Raise problem w.r.t. '&' and '%' being used in non-entities
3697 * reference constructs. Note Charref will be handled in
3698 * xmlStringDecodeEntities()
3699 */
3700 cur = buf;
3701 while (*cur != 0) { /* non input consuming */
3702 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3703 xmlChar *name;
3704 xmlChar tmp = *cur;
3705
3706 cur++;
3707 name = xmlParseStringName(ctxt, &cur);
3708 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003709 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003710 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003711 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003712 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003713 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3714 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003715 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 }
3717 if (name != NULL)
3718 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003719 if (*cur == 0)
3720 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003721 }
3722 cur++;
3723 }
3724
3725 /*
3726 * Then PEReference entities are substituted.
3727 */
3728 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003729 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003730 xmlFree(buf);
3731 } else {
3732 NEXT;
3733 /*
3734 * NOTE: 4.4.7 Bypassed
3735 * When a general entity reference appears in the EntityValue in
3736 * an entity declaration, it is bypassed and left as is.
3737 * so XML_SUBSTITUTE_REF is not set here.
3738 */
3739 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3740 0, 0, 0);
3741 if (orig != NULL)
3742 *orig = buf;
3743 else
3744 xmlFree(buf);
3745 }
3746
3747 return(ret);
3748}
3749
3750/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003751 * xmlParseAttValueComplex:
3752 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003753 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003754 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003755 *
3756 * parse a value for an attribute, this is the fallback function
3757 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003758 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003759 *
3760 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3761 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003762static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003763xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003764 xmlChar limit = 0;
3765 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003766 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003767 int len = 0;
3768 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003769 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003770 xmlChar *current = NULL;
3771 xmlEntityPtr ent;
3772
Owen Taylor3473f882001-02-23 17:55:21 +00003773 if (NXT(0) == '"') {
3774 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3775 limit = '"';
3776 NEXT;
3777 } else if (NXT(0) == '\'') {
3778 limit = '\'';
3779 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3780 NEXT;
3781 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003782 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003783 return(NULL);
3784 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003785
Owen Taylor3473f882001-02-23 17:55:21 +00003786 /*
3787 * allocate a translation buffer.
3788 */
3789 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003790 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003791 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003792
3793 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003794 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003795 */
3796 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003797 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003798 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003799 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003800 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003801 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003802 if (NXT(1) == '#') {
3803 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003804
Owen Taylor3473f882001-02-23 17:55:21 +00003805 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003806 if (ctxt->replaceEntities) {
3807 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003808 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003809 }
3810 buf[len++] = '&';
3811 } else {
3812 /*
3813 * The reparsing will be done in xmlStringGetNodeList()
3814 * called by the attribute() function in SAX.c
3815 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003816 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003817 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003818 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003819 buf[len++] = '&';
3820 buf[len++] = '#';
3821 buf[len++] = '3';
3822 buf[len++] = '8';
3823 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003824 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003825 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003826 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003827 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003828 }
Owen Taylor3473f882001-02-23 17:55:21 +00003829 len += xmlCopyChar(0, &buf[len], val);
3830 }
3831 } else {
3832 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003833 ctxt->nbentities++;
3834 if (ent != NULL)
3835 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003836 if ((ent != NULL) &&
3837 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3838 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003839 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003840 }
3841 if ((ctxt->replaceEntities == 0) &&
3842 (ent->content[0] == '&')) {
3843 buf[len++] = '&';
3844 buf[len++] = '#';
3845 buf[len++] = '3';
3846 buf[len++] = '8';
3847 buf[len++] = ';';
3848 } else {
3849 buf[len++] = ent->content[0];
3850 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003851 } else if ((ent != NULL) &&
3852 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003853 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3854 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003855 XML_SUBSTITUTE_REF,
3856 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003857 if (rep != NULL) {
3858 current = rep;
3859 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003860 if ((*current == 0xD) || (*current == 0xA) ||
3861 (*current == 0x9)) {
3862 buf[len++] = 0x20;
3863 current++;
3864 } else
3865 buf[len++] = *current++;
Owen Taylor3473f882001-02-23 17:55:21 +00003866 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003867 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003868 }
3869 }
3870 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003871 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003872 }
3873 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003874 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003875 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003876 }
Owen Taylor3473f882001-02-23 17:55:21 +00003877 if (ent->content != NULL)
3878 buf[len++] = ent->content[0];
3879 }
3880 } else if (ent != NULL) {
3881 int i = xmlStrlen(ent->name);
3882 const xmlChar *cur = ent->name;
3883
3884 /*
3885 * This may look absurd but is needed to detect
3886 * entities problems
3887 */
3888 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3889 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003890 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003891 XML_SUBSTITUTE_REF, 0, 0, 0);
3892 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003893 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003894 rep = NULL;
3895 }
Owen Taylor3473f882001-02-23 17:55:21 +00003896 }
3897
3898 /*
3899 * Just output the reference
3900 */
3901 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003902 while (len > buf_size - i - 10) {
3903 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003904 }
3905 for (;i > 0;i--)
3906 buf[len++] = *cur++;
3907 buf[len++] = ';';
3908 }
3909 }
3910 } else {
3911 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003912 if ((len != 0) || (!normalize)) {
3913 if ((!normalize) || (!in_space)) {
3914 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003915 while (len > buf_size - 10) {
3916 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003917 }
3918 }
3919 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003920 }
3921 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003922 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003923 COPY_BUF(l,buf,len,c);
3924 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003925 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003926 }
3927 }
3928 NEXTL(l);
3929 }
3930 GROW;
3931 c = CUR_CHAR(l);
3932 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003933 if ((in_space) && (normalize)) {
3934 while (buf[len - 1] == 0x20) len--;
3935 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003936 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003937 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003938 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003939 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003940 if ((c != 0) && (!IS_CHAR(c))) {
3941 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3942 "invalid character in attribute value\n");
3943 } else {
3944 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3945 "AttValue: ' expected\n");
3946 }
Owen Taylor3473f882001-02-23 17:55:21 +00003947 } else
3948 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003949 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003951
3952mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003953 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003954 if (buf != NULL)
3955 xmlFree(buf);
3956 if (rep != NULL)
3957 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003958 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003959}
3960
3961/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003962 * xmlParseAttValue:
3963 * @ctxt: an XML parser context
3964 *
3965 * parse a value for an attribute
3966 * Note: the parser won't do substitution of entities here, this
3967 * will be handled later in xmlStringGetNodeList
3968 *
3969 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3970 * "'" ([^<&'] | Reference)* "'"
3971 *
3972 * 3.3.3 Attribute-Value Normalization:
3973 * Before the value of an attribute is passed to the application or
3974 * checked for validity, the XML processor must normalize it as follows:
3975 * - a character reference is processed by appending the referenced
3976 * character to the attribute value
3977 * - an entity reference is processed by recursively processing the
3978 * replacement text of the entity
3979 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3980 * appending #x20 to the normalized value, except that only a single
3981 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3982 * parsed entity or the literal entity value of an internal parsed entity
3983 * - other characters are processed by appending them to the normalized value
3984 * If the declared value is not CDATA, then the XML processor must further
3985 * process the normalized attribute value by discarding any leading and
3986 * trailing space (#x20) characters, and by replacing sequences of space
3987 * (#x20) characters by a single space (#x20) character.
3988 * All attributes for which no declaration has been read should be treated
3989 * by a non-validating parser as if declared CDATA.
3990 *
3991 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3992 */
3993
3994
3995xmlChar *
3996xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003997 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003998 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003999}
4000
4001/**
Owen Taylor3473f882001-02-23 17:55:21 +00004002 * xmlParseSystemLiteral:
4003 * @ctxt: an XML parser context
4004 *
4005 * parse an XML Literal
4006 *
4007 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4008 *
4009 * Returns the SystemLiteral parsed or NULL
4010 */
4011
4012xmlChar *
4013xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4014 xmlChar *buf = NULL;
4015 int len = 0;
4016 int size = XML_PARSER_BUFFER_SIZE;
4017 int cur, l;
4018 xmlChar stop;
4019 int state = ctxt->instate;
4020 int count = 0;
4021
4022 SHRINK;
4023 if (RAW == '"') {
4024 NEXT;
4025 stop = '"';
4026 } else if (RAW == '\'') {
4027 NEXT;
4028 stop = '\'';
4029 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004030 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004031 return(NULL);
4032 }
4033
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004034 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004035 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004036 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004037 return(NULL);
4038 }
4039 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4040 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004041 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004042 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004043 xmlChar *tmp;
4044
Owen Taylor3473f882001-02-23 17:55:21 +00004045 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004046 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4047 if (tmp == NULL) {
4048 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004049 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004050 ctxt->instate = (xmlParserInputState) state;
4051 return(NULL);
4052 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004053 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004054 }
4055 count++;
4056 if (count > 50) {
4057 GROW;
4058 count = 0;
4059 }
4060 COPY_BUF(l,buf,len,cur);
4061 NEXTL(l);
4062 cur = CUR_CHAR(l);
4063 if (cur == 0) {
4064 GROW;
4065 SHRINK;
4066 cur = CUR_CHAR(l);
4067 }
4068 }
4069 buf[len] = 0;
4070 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004071 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004072 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004073 } else {
4074 NEXT;
4075 }
4076 return(buf);
4077}
4078
4079/**
4080 * xmlParsePubidLiteral:
4081 * @ctxt: an XML parser context
4082 *
4083 * parse an XML public literal
4084 *
4085 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4086 *
4087 * Returns the PubidLiteral parsed or NULL.
4088 */
4089
4090xmlChar *
4091xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4092 xmlChar *buf = NULL;
4093 int len = 0;
4094 int size = XML_PARSER_BUFFER_SIZE;
4095 xmlChar cur;
4096 xmlChar stop;
4097 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004098 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004099
4100 SHRINK;
4101 if (RAW == '"') {
4102 NEXT;
4103 stop = '"';
4104 } else if (RAW == '\'') {
4105 NEXT;
4106 stop = '\'';
4107 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004108 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004109 return(NULL);
4110 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004111 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004112 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004113 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004114 return(NULL);
4115 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004116 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004117 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004118 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004119 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004120 xmlChar *tmp;
4121
Owen Taylor3473f882001-02-23 17:55:21 +00004122 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004123 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4124 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004125 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004126 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004127 return(NULL);
4128 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004129 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004130 }
4131 buf[len++] = cur;
4132 count++;
4133 if (count > 50) {
4134 GROW;
4135 count = 0;
4136 }
4137 NEXT;
4138 cur = CUR;
4139 if (cur == 0) {
4140 GROW;
4141 SHRINK;
4142 cur = CUR;
4143 }
4144 }
4145 buf[len] = 0;
4146 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004147 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004148 } else {
4149 NEXT;
4150 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004151 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004152 return(buf);
4153}
4154
Daniel Veillard8ed10722009-08-20 19:17:36 +02004155static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004156
4157/*
4158 * used for the test in the inner loop of the char data testing
4159 */
4160static const unsigned char test_char_data[256] = {
4161 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4162 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4163 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4164 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4165 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4166 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4167 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4168 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4169 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4170 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4171 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4172 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4173 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4174 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4175 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4176 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4184 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4185 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4186 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4187 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4188 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4193};
4194
Owen Taylor3473f882001-02-23 17:55:21 +00004195/**
4196 * xmlParseCharData:
4197 * @ctxt: an XML parser context
4198 * @cdata: int indicating whether we are within a CDATA section
4199 *
4200 * parse a CharData section.
4201 * if we are within a CDATA section ']]>' marks an end of section.
4202 *
4203 * The right angle bracket (>) may be represented using the string "&gt;",
4204 * and must, for compatibility, be escaped using "&gt;" or a character
4205 * reference when it appears in the string "]]>" in content, when that
4206 * string is not marking the end of a CDATA section.
4207 *
4208 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4209 */
4210
4211void
4212xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004213 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004214 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004215 int line = ctxt->input->line;
4216 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004217 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004218
4219 SHRINK;
4220 GROW;
4221 /*
4222 * Accelerated common case where input don't need to be
4223 * modified before passing it to the handler.
4224 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004225 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004226 in = ctxt->input->cur;
4227 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004228get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004229 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004230 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004231 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004232 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004233 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004234 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004235 goto get_more_space;
4236 }
4237 if (*in == '<') {
4238 nbchar = in - ctxt->input->cur;
4239 if (nbchar > 0) {
4240 const xmlChar *tmp = ctxt->input->cur;
4241 ctxt->input->cur = in;
4242
Daniel Veillard34099b42004-11-04 17:34:35 +00004243 if ((ctxt->sax != NULL) &&
4244 (ctxt->sax->ignorableWhitespace !=
4245 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004246 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004247 if (ctxt->sax->ignorableWhitespace != NULL)
4248 ctxt->sax->ignorableWhitespace(ctxt->userData,
4249 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004250 } else {
4251 if (ctxt->sax->characters != NULL)
4252 ctxt->sax->characters(ctxt->userData,
4253 tmp, nbchar);
4254 if (*ctxt->space == -1)
4255 *ctxt->space = -2;
4256 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004257 } else if ((ctxt->sax != NULL) &&
4258 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004259 ctxt->sax->characters(ctxt->userData,
4260 tmp, nbchar);
4261 }
4262 }
4263 return;
4264 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004265
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004266get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004267 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004268 while (test_char_data[*in]) {
4269 in++;
4270 ccol++;
4271 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004272 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004273 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004274 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004275 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004276 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004277 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004278 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004279 }
4280 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004281 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004282 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004283 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004284 return;
4285 }
4286 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004287 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004288 goto get_more;
4289 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004290 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004291 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004292 if ((ctxt->sax != NULL) &&
4293 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004294 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004295 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004296 const xmlChar *tmp = ctxt->input->cur;
4297 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004298
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004299 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004300 if (ctxt->sax->ignorableWhitespace != NULL)
4301 ctxt->sax->ignorableWhitespace(ctxt->userData,
4302 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004303 } else {
4304 if (ctxt->sax->characters != NULL)
4305 ctxt->sax->characters(ctxt->userData,
4306 tmp, nbchar);
4307 if (*ctxt->space == -1)
4308 *ctxt->space = -2;
4309 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004310 line = ctxt->input->line;
4311 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004312 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004313 if (ctxt->sax->characters != NULL)
4314 ctxt->sax->characters(ctxt->userData,
4315 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004316 line = ctxt->input->line;
4317 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004318 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004319 /* something really bad happened in the SAX callback */
4320 if (ctxt->instate != XML_PARSER_CONTENT)
4321 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004322 }
4323 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004324 if (*in == 0xD) {
4325 in++;
4326 if (*in == 0xA) {
4327 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004328 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004329 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004330 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004331 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004332 in--;
4333 }
4334 if (*in == '<') {
4335 return;
4336 }
4337 if (*in == '&') {
4338 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004339 }
4340 SHRINK;
4341 GROW;
4342 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004343 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004344 nbchar = 0;
4345 }
Daniel Veillard50582112001-03-26 22:52:16 +00004346 ctxt->input->line = line;
4347 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004348 xmlParseCharDataComplex(ctxt, cdata);
4349}
4350
Daniel Veillard01c13b52002-12-10 15:19:08 +00004351/**
4352 * xmlParseCharDataComplex:
4353 * @ctxt: an XML parser context
4354 * @cdata: int indicating whether we are within a CDATA section
4355 *
4356 * parse a CharData section.this is the fallback function
4357 * of xmlParseCharData() when the parsing requires handling
4358 * of non-ASCII characters.
4359 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004360static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004361xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004362 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4363 int nbchar = 0;
4364 int cur, l;
4365 int count = 0;
4366
4367 SHRINK;
4368 GROW;
4369 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004370 while ((cur != '<') && /* checked */
4371 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004372 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004373 if ((cur == ']') && (NXT(1) == ']') &&
4374 (NXT(2) == '>')) {
4375 if (cdata) break;
4376 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004377 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004378 }
4379 }
4380 COPY_BUF(l,buf,nbchar,cur);
4381 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004382 buf[nbchar] = 0;
4383
Owen Taylor3473f882001-02-23 17:55:21 +00004384 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004385 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004386 */
4387 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004388 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004389 if (ctxt->sax->ignorableWhitespace != NULL)
4390 ctxt->sax->ignorableWhitespace(ctxt->userData,
4391 buf, nbchar);
4392 } else {
4393 if (ctxt->sax->characters != NULL)
4394 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004395 if ((ctxt->sax->characters !=
4396 ctxt->sax->ignorableWhitespace) &&
4397 (*ctxt->space == -1))
4398 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
4400 }
4401 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004402 /* something really bad happened in the SAX callback */
4403 if (ctxt->instate != XML_PARSER_CONTENT)
4404 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004405 }
4406 count++;
4407 if (count > 50) {
4408 GROW;
4409 count = 0;
4410 }
4411 NEXTL(l);
4412 cur = CUR_CHAR(l);
4413 }
4414 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004415 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004416 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004417 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004418 */
4419 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004420 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004421 if (ctxt->sax->ignorableWhitespace != NULL)
4422 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4423 } else {
4424 if (ctxt->sax->characters != NULL)
4425 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004426 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4427 (*ctxt->space == -1))
4428 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004429 }
4430 }
4431 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004432 if ((cur != 0) && (!IS_CHAR(cur))) {
4433 /* Generate the error and skip the offending character */
4434 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4435 "PCDATA invalid Char value %d\n",
4436 cur);
4437 NEXTL(l);
4438 }
Owen Taylor3473f882001-02-23 17:55:21 +00004439}
4440
4441/**
4442 * xmlParseExternalID:
4443 * @ctxt: an XML parser context
4444 * @publicID: a xmlChar** receiving PubidLiteral
4445 * @strict: indicate whether we should restrict parsing to only
4446 * production [75], see NOTE below
4447 *
4448 * Parse an External ID or a Public ID
4449 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004450 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004451 * 'PUBLIC' S PubidLiteral S SystemLiteral
4452 *
4453 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4454 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4455 *
4456 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4457 *
4458 * Returns the function returns SystemLiteral and in the second
4459 * case publicID receives PubidLiteral, is strict is off
4460 * it is possible to return NULL and have publicID set.
4461 */
4462
4463xmlChar *
4464xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4465 xmlChar *URI = NULL;
4466
4467 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004468
4469 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004470 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004471 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004472 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4474 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004475 }
4476 SKIP_BLANKS;
4477 URI = xmlParseSystemLiteral(ctxt);
4478 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004479 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004480 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004481 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004482 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004483 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004484 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004485 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004486 }
4487 SKIP_BLANKS;
4488 *publicID = xmlParsePubidLiteral(ctxt);
4489 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004490 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004491 }
4492 if (strict) {
4493 /*
4494 * We don't handle [83] so "S SystemLiteral" is required.
4495 */
William M. Brack76e95df2003-10-18 16:20:14 +00004496 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004498 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004499 }
4500 } else {
4501 /*
4502 * We handle [83] so we return immediately, if
4503 * "S SystemLiteral" is not detected. From a purely parsing
4504 * point of view that's a nice mess.
4505 */
4506 const xmlChar *ptr;
4507 GROW;
4508
4509 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004510 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004511
William M. Brack76e95df2003-10-18 16:20:14 +00004512 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004513 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4514 }
4515 SKIP_BLANKS;
4516 URI = xmlParseSystemLiteral(ctxt);
4517 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004518 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004519 }
4520 }
4521 return(URI);
4522}
4523
4524/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004525 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004526 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004527 * @buf: the already parsed part of the buffer
4528 * @len: number of bytes filles in the buffer
4529 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004530 *
4531 * Skip an XML (SGML) comment <!-- .... -->
4532 * The spec says that "For compatibility, the string "--" (double-hyphen)
4533 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004534 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004535 *
4536 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4537 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004538static void
4539xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004540 int q, ql;
4541 int r, rl;
4542 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004543 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004544 int inputid;
4545
4546 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004547
Owen Taylor3473f882001-02-23 17:55:21 +00004548 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004549 len = 0;
4550 size = XML_PARSER_BUFFER_SIZE;
4551 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4552 if (buf == NULL) {
4553 xmlErrMemory(ctxt, NULL);
4554 return;
4555 }
Owen Taylor3473f882001-02-23 17:55:21 +00004556 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004557 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004558 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004559 if (q == 0)
4560 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004561 if (!IS_CHAR(q)) {
4562 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4563 "xmlParseComment: invalid xmlChar value %d\n",
4564 q);
4565 xmlFree (buf);
4566 return;
4567 }
Owen Taylor3473f882001-02-23 17:55:21 +00004568 NEXTL(ql);
4569 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004570 if (r == 0)
4571 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004572 if (!IS_CHAR(r)) {
4573 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4574 "xmlParseComment: invalid xmlChar value %d\n",
4575 q);
4576 xmlFree (buf);
4577 return;
4578 }
Owen Taylor3473f882001-02-23 17:55:21 +00004579 NEXTL(rl);
4580 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004581 if (cur == 0)
4582 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004583 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004584 ((cur != '>') ||
4585 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004586 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004587 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004588 }
4589 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004590 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004591 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004592 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4593 if (new_buf == NULL) {
4594 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004595 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004596 return;
4597 }
William M. Bracka3215c72004-07-31 16:24:01 +00004598 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004599 }
4600 COPY_BUF(ql,buf,len,q);
4601 q = r;
4602 ql = rl;
4603 r = cur;
4604 rl = l;
4605
4606 count++;
4607 if (count > 50) {
4608 GROW;
4609 count = 0;
4610 }
4611 NEXTL(l);
4612 cur = CUR_CHAR(l);
4613 if (cur == 0) {
4614 SHRINK;
4615 GROW;
4616 cur = CUR_CHAR(l);
4617 }
4618 }
4619 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004620 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004621 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004622 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004623 } else if (!IS_CHAR(cur)) {
4624 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4625 "xmlParseComment: invalid xmlChar value %d\n",
4626 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004627 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004628 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004629 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4630 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004631 }
4632 NEXT;
4633 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4634 (!ctxt->disableSAX))
4635 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004636 }
Daniel Veillardda629342007-08-01 07:49:06 +00004637 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004638 return;
4639not_terminated:
4640 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4641 "Comment not terminated\n", NULL);
4642 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004643 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004644}
Daniel Veillardda629342007-08-01 07:49:06 +00004645
Daniel Veillard4c778d82005-01-23 17:37:44 +00004646/**
4647 * xmlParseComment:
4648 * @ctxt: an XML parser context
4649 *
4650 * Skip an XML (SGML) comment <!-- .... -->
4651 * The spec says that "For compatibility, the string "--" (double-hyphen)
4652 * must not occur within comments. "
4653 *
4654 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4655 */
4656void
4657xmlParseComment(xmlParserCtxtPtr ctxt) {
4658 xmlChar *buf = NULL;
4659 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004660 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004661 xmlParserInputState state;
4662 const xmlChar *in;
4663 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004664 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004665
4666 /*
4667 * Check that there is a comment right here.
4668 */
4669 if ((RAW != '<') || (NXT(1) != '!') ||
4670 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004671 state = ctxt->instate;
4672 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004673 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004674 SKIP(4);
4675 SHRINK;
4676 GROW;
4677
4678 /*
4679 * Accelerated common case where input don't need to be
4680 * modified before passing it to the handler.
4681 */
4682 in = ctxt->input->cur;
4683 do {
4684 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004685 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004686 ctxt->input->line++; ctxt->input->col = 1;
4687 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004688 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004689 }
4690get_more:
4691 ccol = ctxt->input->col;
4692 while (((*in > '-') && (*in <= 0x7F)) ||
4693 ((*in >= 0x20) && (*in < '-')) ||
4694 (*in == 0x09)) {
4695 in++;
4696 ccol++;
4697 }
4698 ctxt->input->col = ccol;
4699 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004700 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004701 ctxt->input->line++; ctxt->input->col = 1;
4702 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004703 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004704 goto get_more;
4705 }
4706 nbchar = in - ctxt->input->cur;
4707 /*
4708 * save current set of data
4709 */
4710 if (nbchar > 0) {
4711 if ((ctxt->sax != NULL) &&
4712 (ctxt->sax->comment != NULL)) {
4713 if (buf == NULL) {
4714 if ((*in == '-') && (in[1] == '-'))
4715 size = nbchar + 1;
4716 else
4717 size = XML_PARSER_BUFFER_SIZE + nbchar;
4718 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4719 if (buf == NULL) {
4720 xmlErrMemory(ctxt, NULL);
4721 ctxt->instate = state;
4722 return;
4723 }
4724 len = 0;
4725 } else if (len + nbchar + 1 >= size) {
4726 xmlChar *new_buf;
4727 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4728 new_buf = (xmlChar *) xmlRealloc(buf,
4729 size * sizeof(xmlChar));
4730 if (new_buf == NULL) {
4731 xmlFree (buf);
4732 xmlErrMemory(ctxt, NULL);
4733 ctxt->instate = state;
4734 return;
4735 }
4736 buf = new_buf;
4737 }
4738 memcpy(&buf[len], ctxt->input->cur, nbchar);
4739 len += nbchar;
4740 buf[len] = 0;
4741 }
4742 }
4743 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004744 if (*in == 0xA) {
4745 in++;
4746 ctxt->input->line++; ctxt->input->col = 1;
4747 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004748 if (*in == 0xD) {
4749 in++;
4750 if (*in == 0xA) {
4751 ctxt->input->cur = in;
4752 in++;
4753 ctxt->input->line++; ctxt->input->col = 1;
4754 continue; /* while */
4755 }
4756 in--;
4757 }
4758 SHRINK;
4759 GROW;
4760 in = ctxt->input->cur;
4761 if (*in == '-') {
4762 if (in[1] == '-') {
4763 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004764 if (ctxt->input->id != inputid) {
4765 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4766 "comment doesn't start and stop in the same entity\n");
4767 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004768 SKIP(3);
4769 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4770 (!ctxt->disableSAX)) {
4771 if (buf != NULL)
4772 ctxt->sax->comment(ctxt->userData, buf);
4773 else
4774 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4775 }
4776 if (buf != NULL)
4777 xmlFree(buf);
4778 ctxt->instate = state;
4779 return;
4780 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004781 if (buf != NULL) {
4782 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4783 "Double hyphen within comment: "
4784 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004785 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004786 } else
4787 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4788 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004789 in++;
4790 ctxt->input->col++;
4791 }
4792 in++;
4793 ctxt->input->col++;
4794 goto get_more;
4795 }
4796 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4797 xmlParseCommentComplex(ctxt, buf, len, size);
4798 ctxt->instate = state;
4799 return;
4800}
4801
Owen Taylor3473f882001-02-23 17:55:21 +00004802
4803/**
4804 * xmlParsePITarget:
4805 * @ctxt: an XML parser context
4806 *
4807 * parse the name of a PI
4808 *
4809 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4810 *
4811 * Returns the PITarget name or NULL
4812 */
4813
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004814const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004815xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004816 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004817
4818 name = xmlParseName(ctxt);
4819 if ((name != NULL) &&
4820 ((name[0] == 'x') || (name[0] == 'X')) &&
4821 ((name[1] == 'm') || (name[1] == 'M')) &&
4822 ((name[2] == 'l') || (name[2] == 'L'))) {
4823 int i;
4824 if ((name[0] == 'x') && (name[1] == 'm') &&
4825 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004826 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004827 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004828 return(name);
4829 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004830 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004831 return(name);
4832 }
4833 for (i = 0;;i++) {
4834 if (xmlW3CPIs[i] == NULL) break;
4835 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4836 return(name);
4837 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004838 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4839 "xmlParsePITarget: invalid name prefix 'xml'\n",
4840 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004841 }
Daniel Veillard37334572008-07-31 08:20:02 +00004842 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4843 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4844 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4845 }
Owen Taylor3473f882001-02-23 17:55:21 +00004846 return(name);
4847}
4848
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004849#ifdef LIBXML_CATALOG_ENABLED
4850/**
4851 * xmlParseCatalogPI:
4852 * @ctxt: an XML parser context
4853 * @catalog: the PI value string
4854 *
4855 * parse an XML Catalog Processing Instruction.
4856 *
4857 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4858 *
4859 * Occurs only if allowed by the user and if happening in the Misc
4860 * part of the document before any doctype informations
4861 * This will add the given catalog to the parsing context in order
4862 * to be used if there is a resolution need further down in the document
4863 */
4864
4865static void
4866xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4867 xmlChar *URL = NULL;
4868 const xmlChar *tmp, *base;
4869 xmlChar marker;
4870
4871 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004872 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004873 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4874 goto error;
4875 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004876 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004877 if (*tmp != '=') {
4878 return;
4879 }
4880 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004881 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004882 marker = *tmp;
4883 if ((marker != '\'') && (marker != '"'))
4884 goto error;
4885 tmp++;
4886 base = tmp;
4887 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4888 if (*tmp == 0)
4889 goto error;
4890 URL = xmlStrndup(base, tmp - base);
4891 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004892 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004893 if (*tmp != 0)
4894 goto error;
4895
4896 if (URL != NULL) {
4897 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4898 xmlFree(URL);
4899 }
4900 return;
4901
4902error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004903 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4904 "Catalog PI syntax error: %s\n",
4905 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004906 if (URL != NULL)
4907 xmlFree(URL);
4908}
4909#endif
4910
Owen Taylor3473f882001-02-23 17:55:21 +00004911/**
4912 * xmlParsePI:
4913 * @ctxt: an XML parser context
4914 *
4915 * parse an XML Processing Instruction.
4916 *
4917 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4918 *
4919 * The processing is transfered to SAX once parsed.
4920 */
4921
4922void
4923xmlParsePI(xmlParserCtxtPtr ctxt) {
4924 xmlChar *buf = NULL;
4925 int len = 0;
4926 int size = XML_PARSER_BUFFER_SIZE;
4927 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004928 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004929 xmlParserInputState state;
4930 int count = 0;
4931
4932 if ((RAW == '<') && (NXT(1) == '?')) {
4933 xmlParserInputPtr input = ctxt->input;
4934 state = ctxt->instate;
4935 ctxt->instate = XML_PARSER_PI;
4936 /*
4937 * this is a Processing Instruction.
4938 */
4939 SKIP(2);
4940 SHRINK;
4941
4942 /*
4943 * Parse the target name and check for special support like
4944 * namespace.
4945 */
4946 target = xmlParsePITarget(ctxt);
4947 if (target != NULL) {
4948 if ((RAW == '?') && (NXT(1) == '>')) {
4949 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004950 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4951 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004952 }
4953 SKIP(2);
4954
4955 /*
4956 * SAX: PI detected.
4957 */
4958 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4959 (ctxt->sax->processingInstruction != NULL))
4960 ctxt->sax->processingInstruction(ctxt->userData,
4961 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08004962 if (ctxt->instate != XML_PARSER_EOF)
4963 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004964 return;
4965 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004966 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004967 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004968 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004969 ctxt->instate = state;
4970 return;
4971 }
4972 cur = CUR;
4973 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004974 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4975 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004976 }
4977 SKIP_BLANKS;
4978 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004979 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004980 ((cur != '?') || (NXT(1) != '>'))) {
4981 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004982 xmlChar *tmp;
4983
Owen Taylor3473f882001-02-23 17:55:21 +00004984 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004985 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4986 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004987 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004988 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004989 ctxt->instate = state;
4990 return;
4991 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004992 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004993 }
4994 count++;
4995 if (count > 50) {
4996 GROW;
4997 count = 0;
4998 }
4999 COPY_BUF(l,buf,len,cur);
5000 NEXTL(l);
5001 cur = CUR_CHAR(l);
5002 if (cur == 0) {
5003 SHRINK;
5004 GROW;
5005 cur = CUR_CHAR(l);
5006 }
5007 }
5008 buf[len] = 0;
5009 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005010 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5011 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005012 } else {
5013 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005014 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5015 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005016 }
5017 SKIP(2);
5018
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005019#ifdef LIBXML_CATALOG_ENABLED
5020 if (((state == XML_PARSER_MISC) ||
5021 (state == XML_PARSER_START)) &&
5022 (xmlStrEqual(target, XML_CATALOG_PI))) {
5023 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5024 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5025 (allow == XML_CATA_ALLOW_ALL))
5026 xmlParseCatalogPI(ctxt, buf);
5027 }
5028#endif
5029
5030
Owen Taylor3473f882001-02-23 17:55:21 +00005031 /*
5032 * SAX: PI detected.
5033 */
5034 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5035 (ctxt->sax->processingInstruction != NULL))
5036 ctxt->sax->processingInstruction(ctxt->userData,
5037 target, buf);
5038 }
5039 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005040 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005041 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005042 }
Chris Evans77404b82011-12-14 16:18:25 +08005043 if (ctxt->instate != XML_PARSER_EOF)
5044 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005045 }
5046}
5047
5048/**
5049 * xmlParseNotationDecl:
5050 * @ctxt: an XML parser context
5051 *
5052 * parse a notation declaration
5053 *
5054 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5055 *
5056 * Hence there is actually 3 choices:
5057 * 'PUBLIC' S PubidLiteral
5058 * 'PUBLIC' S PubidLiteral S SystemLiteral
5059 * and 'SYSTEM' S SystemLiteral
5060 *
5061 * See the NOTE on xmlParseExternalID().
5062 */
5063
5064void
5065xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005066 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005067 xmlChar *Pubid;
5068 xmlChar *Systemid;
5069
Daniel Veillarda07050d2003-10-19 14:46:32 +00005070 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005071 xmlParserInputPtr input = ctxt->input;
5072 SHRINK;
5073 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005074 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005075 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5076 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005077 return;
5078 }
5079 SKIP_BLANKS;
5080
Daniel Veillard76d66f42001-05-16 21:05:17 +00005081 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005082 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005083 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005084 return;
5085 }
William M. Brack76e95df2003-10-18 16:20:14 +00005086 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005087 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005088 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005089 return;
5090 }
Daniel Veillard37334572008-07-31 08:20:02 +00005091 if (xmlStrchr(name, ':') != NULL) {
5092 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5093 "colon are forbidden from notation names '%s'\n",
5094 name, NULL, NULL);
5095 }
Owen Taylor3473f882001-02-23 17:55:21 +00005096 SKIP_BLANKS;
5097
5098 /*
5099 * Parse the IDs.
5100 */
5101 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5102 SKIP_BLANKS;
5103
5104 if (RAW == '>') {
5105 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005106 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5107 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005108 }
5109 NEXT;
5110 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5111 (ctxt->sax->notationDecl != NULL))
5112 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5113 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005114 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005115 }
Owen Taylor3473f882001-02-23 17:55:21 +00005116 if (Systemid != NULL) xmlFree(Systemid);
5117 if (Pubid != NULL) xmlFree(Pubid);
5118 }
5119}
5120
5121/**
5122 * xmlParseEntityDecl:
5123 * @ctxt: an XML parser context
5124 *
5125 * parse <!ENTITY declarations
5126 *
5127 * [70] EntityDecl ::= GEDecl | PEDecl
5128 *
5129 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5130 *
5131 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5132 *
5133 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5134 *
5135 * [74] PEDef ::= EntityValue | ExternalID
5136 *
5137 * [76] NDataDecl ::= S 'NDATA' S Name
5138 *
5139 * [ VC: Notation Declared ]
5140 * The Name must match the declared name of a notation.
5141 */
5142
5143void
5144xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005145 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005146 xmlChar *value = NULL;
5147 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005148 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005149 int isParameter = 0;
5150 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005151 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005152
Daniel Veillard4c778d82005-01-23 17:37:44 +00005153 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005154 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005155 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005156 SHRINK;
5157 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005158 skipped = SKIP_BLANKS;
5159 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005160 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5161 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005162 }
Owen Taylor3473f882001-02-23 17:55:21 +00005163
5164 if (RAW == '%') {
5165 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005166 skipped = SKIP_BLANKS;
5167 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005168 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5169 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005170 }
Owen Taylor3473f882001-02-23 17:55:21 +00005171 isParameter = 1;
5172 }
5173
Daniel Veillard76d66f42001-05-16 21:05:17 +00005174 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005175 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005176 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5177 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005178 return;
5179 }
Daniel Veillard37334572008-07-31 08:20:02 +00005180 if (xmlStrchr(name, ':') != NULL) {
5181 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5182 "colon are forbidden from entities names '%s'\n",
5183 name, NULL, NULL);
5184 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005185 skipped = SKIP_BLANKS;
5186 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005187 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5188 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005189 }
Owen Taylor3473f882001-02-23 17:55:21 +00005190
Daniel Veillardf5582f12002-06-11 10:08:16 +00005191 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005192 /*
5193 * handle the various case of definitions...
5194 */
5195 if (isParameter) {
5196 if ((RAW == '"') || (RAW == '\'')) {
5197 value = xmlParseEntityValue(ctxt, &orig);
5198 if (value) {
5199 if ((ctxt->sax != NULL) &&
5200 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5201 ctxt->sax->entityDecl(ctxt->userData, name,
5202 XML_INTERNAL_PARAMETER_ENTITY,
5203 NULL, NULL, value);
5204 }
5205 } else {
5206 URI = xmlParseExternalID(ctxt, &literal, 1);
5207 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005208 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005209 }
5210 if (URI) {
5211 xmlURIPtr uri;
5212
5213 uri = xmlParseURI((const char *) URI);
5214 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005215 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5216 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005217 /*
5218 * This really ought to be a well formedness error
5219 * but the XML Core WG decided otherwise c.f. issue
5220 * E26 of the XML erratas.
5221 */
Owen Taylor3473f882001-02-23 17:55:21 +00005222 } else {
5223 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005224 /*
5225 * Okay this is foolish to block those but not
5226 * invalid URIs.
5227 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005228 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005229 } else {
5230 if ((ctxt->sax != NULL) &&
5231 (!ctxt->disableSAX) &&
5232 (ctxt->sax->entityDecl != NULL))
5233 ctxt->sax->entityDecl(ctxt->userData, name,
5234 XML_EXTERNAL_PARAMETER_ENTITY,
5235 literal, URI, NULL);
5236 }
5237 xmlFreeURI(uri);
5238 }
5239 }
5240 }
5241 } else {
5242 if ((RAW == '"') || (RAW == '\'')) {
5243 value = xmlParseEntityValue(ctxt, &orig);
5244 if ((ctxt->sax != NULL) &&
5245 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5246 ctxt->sax->entityDecl(ctxt->userData, name,
5247 XML_INTERNAL_GENERAL_ENTITY,
5248 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005249 /*
5250 * For expat compatibility in SAX mode.
5251 */
5252 if ((ctxt->myDoc == NULL) ||
5253 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5254 if (ctxt->myDoc == NULL) {
5255 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005256 if (ctxt->myDoc == NULL) {
5257 xmlErrMemory(ctxt, "New Doc failed");
5258 return;
5259 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005260 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005261 }
5262 if (ctxt->myDoc->intSubset == NULL)
5263 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5264 BAD_CAST "fake", NULL, NULL);
5265
Daniel Veillard1af9a412003-08-20 22:54:39 +00005266 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5267 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005268 }
Owen Taylor3473f882001-02-23 17:55:21 +00005269 } else {
5270 URI = xmlParseExternalID(ctxt, &literal, 1);
5271 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005272 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005273 }
5274 if (URI) {
5275 xmlURIPtr uri;
5276
5277 uri = xmlParseURI((const char *)URI);
5278 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005279 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5280 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005281 /*
5282 * This really ought to be a well formedness error
5283 * but the XML Core WG decided otherwise c.f. issue
5284 * E26 of the XML erratas.
5285 */
Owen Taylor3473f882001-02-23 17:55:21 +00005286 } else {
5287 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005288 /*
5289 * Okay this is foolish to block those but not
5290 * invalid URIs.
5291 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005292 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005293 }
5294 xmlFreeURI(uri);
5295 }
5296 }
William M. Brack76e95df2003-10-18 16:20:14 +00005297 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005298 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5299 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005300 }
5301 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005302 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005303 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005304 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005305 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5306 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005307 }
5308 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005309 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005310 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5311 (ctxt->sax->unparsedEntityDecl != NULL))
5312 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5313 literal, URI, ndata);
5314 } else {
5315 if ((ctxt->sax != NULL) &&
5316 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5317 ctxt->sax->entityDecl(ctxt->userData, name,
5318 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5319 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005320 /*
5321 * For expat compatibility in SAX mode.
5322 * assuming the entity repalcement was asked for
5323 */
5324 if ((ctxt->replaceEntities != 0) &&
5325 ((ctxt->myDoc == NULL) ||
5326 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5327 if (ctxt->myDoc == NULL) {
5328 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005329 if (ctxt->myDoc == NULL) {
5330 xmlErrMemory(ctxt, "New Doc failed");
5331 return;
5332 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005333 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005334 }
5335
5336 if (ctxt->myDoc->intSubset == NULL)
5337 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5338 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005339 xmlSAX2EntityDecl(ctxt, name,
5340 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5341 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005342 }
Owen Taylor3473f882001-02-23 17:55:21 +00005343 }
5344 }
5345 }
5346 SKIP_BLANKS;
5347 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005348 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005349 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005350 } else {
5351 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005352 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5353 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005354 }
5355 NEXT;
5356 }
5357 if (orig != NULL) {
5358 /*
5359 * Ugly mechanism to save the raw entity value.
5360 */
5361 xmlEntityPtr cur = NULL;
5362
5363 if (isParameter) {
5364 if ((ctxt->sax != NULL) &&
5365 (ctxt->sax->getParameterEntity != NULL))
5366 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5367 } else {
5368 if ((ctxt->sax != NULL) &&
5369 (ctxt->sax->getEntity != NULL))
5370 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005371 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005372 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005373 }
Owen Taylor3473f882001-02-23 17:55:21 +00005374 }
5375 if (cur != NULL) {
5376 if (cur->orig != NULL)
5377 xmlFree(orig);
5378 else
5379 cur->orig = orig;
5380 } else
5381 xmlFree(orig);
5382 }
Owen Taylor3473f882001-02-23 17:55:21 +00005383 if (value != NULL) xmlFree(value);
5384 if (URI != NULL) xmlFree(URI);
5385 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005386 }
5387}
5388
5389/**
5390 * xmlParseDefaultDecl:
5391 * @ctxt: an XML parser context
5392 * @value: Receive a possible fixed default value for the attribute
5393 *
5394 * Parse an attribute default declaration
5395 *
5396 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5397 *
5398 * [ VC: Required Attribute ]
5399 * if the default declaration is the keyword #REQUIRED, then the
5400 * attribute must be specified for all elements of the type in the
5401 * attribute-list declaration.
5402 *
5403 * [ VC: Attribute Default Legal ]
5404 * The declared default value must meet the lexical constraints of
5405 * the declared attribute type c.f. xmlValidateAttributeDecl()
5406 *
5407 * [ VC: Fixed Attribute Default ]
5408 * if an attribute has a default value declared with the #FIXED
5409 * keyword, instances of that attribute must match the default value.
5410 *
5411 * [ WFC: No < in Attribute Values ]
5412 * handled in xmlParseAttValue()
5413 *
5414 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5415 * or XML_ATTRIBUTE_FIXED.
5416 */
5417
5418int
5419xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5420 int val;
5421 xmlChar *ret;
5422
5423 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005424 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005425 SKIP(9);
5426 return(XML_ATTRIBUTE_REQUIRED);
5427 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005428 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005429 SKIP(8);
5430 return(XML_ATTRIBUTE_IMPLIED);
5431 }
5432 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005433 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005434 SKIP(6);
5435 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005436 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005437 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5438 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005439 }
5440 SKIP_BLANKS;
5441 }
5442 ret = xmlParseAttValue(ctxt);
5443 ctxt->instate = XML_PARSER_DTD;
5444 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005445 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005446 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005447 } else
5448 *value = ret;
5449 return(val);
5450}
5451
5452/**
5453 * xmlParseNotationType:
5454 * @ctxt: an XML parser context
5455 *
5456 * parse an Notation attribute type.
5457 *
5458 * Note: the leading 'NOTATION' S part has already being parsed...
5459 *
5460 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5461 *
5462 * [ VC: Notation Attributes ]
5463 * Values of this type must match one of the notation names included
5464 * in the declaration; all notation names in the declaration must be declared.
5465 *
5466 * Returns: the notation attribute tree built while parsing
5467 */
5468
5469xmlEnumerationPtr
5470xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005471 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005472 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005473
5474 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005475 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005476 return(NULL);
5477 }
5478 SHRINK;
5479 do {
5480 NEXT;
5481 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005482 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005483 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005484 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5485 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005486 xmlFreeEnumeration(ret);
5487 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005488 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005489 tmp = ret;
5490 while (tmp != NULL) {
5491 if (xmlStrEqual(name, tmp->name)) {
5492 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5493 "standalone: attribute notation value token %s duplicated\n",
5494 name, NULL);
5495 if (!xmlDictOwns(ctxt->dict, name))
5496 xmlFree((xmlChar *) name);
5497 break;
5498 }
5499 tmp = tmp->next;
5500 }
5501 if (tmp == NULL) {
5502 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005503 if (cur == NULL) {
5504 xmlFreeEnumeration(ret);
5505 return(NULL);
5506 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005507 if (last == NULL) ret = last = cur;
5508 else {
5509 last->next = cur;
5510 last = cur;
5511 }
Owen Taylor3473f882001-02-23 17:55:21 +00005512 }
5513 SKIP_BLANKS;
5514 } while (RAW == '|');
5515 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005516 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005517 xmlFreeEnumeration(ret);
5518 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005519 }
5520 NEXT;
5521 return(ret);
5522}
5523
5524/**
5525 * xmlParseEnumerationType:
5526 * @ctxt: an XML parser context
5527 *
5528 * parse an Enumeration attribute type.
5529 *
5530 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5531 *
5532 * [ VC: Enumeration ]
5533 * Values of this type must match one of the Nmtoken tokens in
5534 * the declaration
5535 *
5536 * Returns: the enumeration attribute tree built while parsing
5537 */
5538
5539xmlEnumerationPtr
5540xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5541 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005542 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005543
5544 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005545 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005546 return(NULL);
5547 }
5548 SHRINK;
5549 do {
5550 NEXT;
5551 SKIP_BLANKS;
5552 name = xmlParseNmtoken(ctxt);
5553 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005554 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005555 return(ret);
5556 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005557 tmp = ret;
5558 while (tmp != NULL) {
5559 if (xmlStrEqual(name, tmp->name)) {
5560 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5561 "standalone: attribute enumeration value token %s duplicated\n",
5562 name, NULL);
5563 if (!xmlDictOwns(ctxt->dict, name))
5564 xmlFree(name);
5565 break;
5566 }
5567 tmp = tmp->next;
5568 }
5569 if (tmp == NULL) {
5570 cur = xmlCreateEnumeration(name);
5571 if (!xmlDictOwns(ctxt->dict, name))
5572 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005573 if (cur == NULL) {
5574 xmlFreeEnumeration(ret);
5575 return(NULL);
5576 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005577 if (last == NULL) ret = last = cur;
5578 else {
5579 last->next = cur;
5580 last = cur;
5581 }
Owen Taylor3473f882001-02-23 17:55:21 +00005582 }
5583 SKIP_BLANKS;
5584 } while (RAW == '|');
5585 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005586 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005587 return(ret);
5588 }
5589 NEXT;
5590 return(ret);
5591}
5592
5593/**
5594 * xmlParseEnumeratedType:
5595 * @ctxt: an XML parser context
5596 * @tree: the enumeration tree built while parsing
5597 *
5598 * parse an Enumerated attribute type.
5599 *
5600 * [57] EnumeratedType ::= NotationType | Enumeration
5601 *
5602 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5603 *
5604 *
5605 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5606 */
5607
5608int
5609xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005610 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005611 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005612 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005613 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5614 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005615 return(0);
5616 }
5617 SKIP_BLANKS;
5618 *tree = xmlParseNotationType(ctxt);
5619 if (*tree == NULL) return(0);
5620 return(XML_ATTRIBUTE_NOTATION);
5621 }
5622 *tree = xmlParseEnumerationType(ctxt);
5623 if (*tree == NULL) return(0);
5624 return(XML_ATTRIBUTE_ENUMERATION);
5625}
5626
5627/**
5628 * xmlParseAttributeType:
5629 * @ctxt: an XML parser context
5630 * @tree: the enumeration tree built while parsing
5631 *
5632 * parse the Attribute list def for an element
5633 *
5634 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5635 *
5636 * [55] StringType ::= 'CDATA'
5637 *
5638 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5639 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5640 *
5641 * Validity constraints for attribute values syntax are checked in
5642 * xmlValidateAttributeValue()
5643 *
5644 * [ VC: ID ]
5645 * Values of type ID must match the Name production. A name must not
5646 * appear more than once in an XML document as a value of this type;
5647 * i.e., ID values must uniquely identify the elements which bear them.
5648 *
5649 * [ VC: One ID per Element Type ]
5650 * No element type may have more than one ID attribute specified.
5651 *
5652 * [ VC: ID Attribute Default ]
5653 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5654 *
5655 * [ VC: IDREF ]
5656 * Values of type IDREF must match the Name production, and values
5657 * of type IDREFS must match Names; each IDREF Name must match the value
5658 * of an ID attribute on some element in the XML document; i.e. IDREF
5659 * values must match the value of some ID attribute.
5660 *
5661 * [ VC: Entity Name ]
5662 * Values of type ENTITY must match the Name production, values
5663 * of type ENTITIES must match Names; each Entity Name must match the
5664 * name of an unparsed entity declared in the DTD.
5665 *
5666 * [ VC: Name Token ]
5667 * Values of type NMTOKEN must match the Nmtoken production; values
5668 * of type NMTOKENS must match Nmtokens.
5669 *
5670 * Returns the attribute type
5671 */
5672int
5673xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5674 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005675 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005676 SKIP(5);
5677 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005678 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005679 SKIP(6);
5680 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005681 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005682 SKIP(5);
5683 return(XML_ATTRIBUTE_IDREF);
5684 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5685 SKIP(2);
5686 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005687 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005688 SKIP(6);
5689 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005690 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005691 SKIP(8);
5692 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005693 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005694 SKIP(8);
5695 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005696 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005697 SKIP(7);
5698 return(XML_ATTRIBUTE_NMTOKEN);
5699 }
5700 return(xmlParseEnumeratedType(ctxt, tree));
5701}
5702
5703/**
5704 * xmlParseAttributeListDecl:
5705 * @ctxt: an XML parser context
5706 *
5707 * : parse the Attribute list def for an element
5708 *
5709 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5710 *
5711 * [53] AttDef ::= S Name S AttType S DefaultDecl
5712 *
5713 */
5714void
5715xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005716 const xmlChar *elemName;
5717 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005718 xmlEnumerationPtr tree;
5719
Daniel Veillarda07050d2003-10-19 14:46:32 +00005720 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005721 xmlParserInputPtr input = ctxt->input;
5722
5723 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005724 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005725 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005726 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005727 }
5728 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005729 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005730 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005731 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5732 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005733 return;
5734 }
5735 SKIP_BLANKS;
5736 GROW;
5737 while (RAW != '>') {
5738 const xmlChar *check = CUR_PTR;
5739 int type;
5740 int def;
5741 xmlChar *defaultValue = NULL;
5742
5743 GROW;
5744 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005745 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005746 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005747 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5748 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005749 break;
5750 }
5751 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005752 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005753 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005754 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005755 break;
5756 }
5757 SKIP_BLANKS;
5758
5759 type = xmlParseAttributeType(ctxt, &tree);
5760 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005761 break;
5762 }
5763
5764 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005765 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005766 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5767 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005768 if (tree != NULL)
5769 xmlFreeEnumeration(tree);
5770 break;
5771 }
5772 SKIP_BLANKS;
5773
5774 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5775 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005776 if (defaultValue != NULL)
5777 xmlFree(defaultValue);
5778 if (tree != NULL)
5779 xmlFreeEnumeration(tree);
5780 break;
5781 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005782 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5783 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005784
5785 GROW;
5786 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005787 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005788 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005789 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005790 if (defaultValue != NULL)
5791 xmlFree(defaultValue);
5792 if (tree != NULL)
5793 xmlFreeEnumeration(tree);
5794 break;
5795 }
5796 SKIP_BLANKS;
5797 }
5798 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005799 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5800 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005801 if (defaultValue != NULL)
5802 xmlFree(defaultValue);
5803 if (tree != NULL)
5804 xmlFreeEnumeration(tree);
5805 break;
5806 }
5807 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5808 (ctxt->sax->attributeDecl != NULL))
5809 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5810 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005811 else if (tree != NULL)
5812 xmlFreeEnumeration(tree);
5813
5814 if ((ctxt->sax2) && (defaultValue != NULL) &&
5815 (def != XML_ATTRIBUTE_IMPLIED) &&
5816 (def != XML_ATTRIBUTE_REQUIRED)) {
5817 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5818 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005819 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005820 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5821 }
Owen Taylor3473f882001-02-23 17:55:21 +00005822 if (defaultValue != NULL)
5823 xmlFree(defaultValue);
5824 GROW;
5825 }
5826 if (RAW == '>') {
5827 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005828 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5829 "Attribute list declaration doesn't start and stop in the same entity\n",
5830 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005831 }
5832 NEXT;
5833 }
Owen Taylor3473f882001-02-23 17:55:21 +00005834 }
5835}
5836
5837/**
5838 * xmlParseElementMixedContentDecl:
5839 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005840 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005841 *
5842 * parse the declaration for a Mixed Element content
5843 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5844 *
5845 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5846 * '(' S? '#PCDATA' S? ')'
5847 *
5848 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5849 *
5850 * [ VC: No Duplicate Types ]
5851 * The same name must not appear more than once in a single
5852 * mixed-content declaration.
5853 *
5854 * returns: the list of the xmlElementContentPtr describing the element choices
5855 */
5856xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005857xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005858 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005859 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005860
5861 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005862 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005863 SKIP(7);
5864 SKIP_BLANKS;
5865 SHRINK;
5866 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005867 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005868 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5869"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005870 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005871 }
Owen Taylor3473f882001-02-23 17:55:21 +00005872 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005873 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005874 if (ret == NULL)
5875 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005876 if (RAW == '*') {
5877 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5878 NEXT;
5879 }
5880 return(ret);
5881 }
5882 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005883 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005884 if (ret == NULL) return(NULL);
5885 }
5886 while (RAW == '|') {
5887 NEXT;
5888 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005889 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005890 if (ret == NULL) return(NULL);
5891 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005892 if (cur != NULL)
5893 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005894 cur = ret;
5895 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005896 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005897 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005898 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005899 if (n->c1 != NULL)
5900 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005901 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005902 if (n != NULL)
5903 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005904 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005905 }
5906 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005907 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005908 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005909 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005910 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005911 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005912 return(NULL);
5913 }
5914 SKIP_BLANKS;
5915 GROW;
5916 }
5917 if ((RAW == ')') && (NXT(1) == '*')) {
5918 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005919 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005920 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005921 if (cur->c2 != NULL)
5922 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005923 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02005924 if (ret != NULL)
5925 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005926 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005927 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5928"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005929 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005930 }
Owen Taylor3473f882001-02-23 17:55:21 +00005931 SKIP(2);
5932 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005933 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005934 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005935 return(NULL);
5936 }
5937
5938 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005939 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005940 }
5941 return(ret);
5942}
5943
5944/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005945 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005946 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005947 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005948 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005949 *
5950 * parse the declaration for a Mixed Element content
5951 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5952 *
5953 *
5954 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5955 *
5956 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5957 *
5958 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5959 *
5960 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5961 *
5962 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5963 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005964 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005965 * opening or closing parentheses in a choice, seq, or Mixed
5966 * construct is contained in the replacement text for a parameter
5967 * entity, both must be contained in the same replacement text. For
5968 * interoperability, if a parameter-entity reference appears in a
5969 * choice, seq, or Mixed construct, its replacement text should not
5970 * be empty, and neither the first nor last non-blank character of
5971 * the replacement text should be a connector (| or ,).
5972 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005973 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005974 * hierarchy.
5975 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005976static xmlElementContentPtr
5977xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5978 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005979 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005980 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005981 xmlChar type = 0;
5982
Daniel Veillard489f9672009-08-10 16:49:30 +02005983 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5984 (depth > 2048)) {
5985 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5986"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5987 depth);
5988 return(NULL);
5989 }
Owen Taylor3473f882001-02-23 17:55:21 +00005990 SKIP_BLANKS;
5991 GROW;
5992 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005993 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005994
Owen Taylor3473f882001-02-23 17:55:21 +00005995 /* Recurse on first child */
5996 NEXT;
5997 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005998 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5999 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006000 SKIP_BLANKS;
6001 GROW;
6002 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006003 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006004 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006005 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006006 return(NULL);
6007 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006008 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006009 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006010 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006011 return(NULL);
6012 }
Owen Taylor3473f882001-02-23 17:55:21 +00006013 GROW;
6014 if (RAW == '?') {
6015 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6016 NEXT;
6017 } else if (RAW == '*') {
6018 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6019 NEXT;
6020 } else if (RAW == '+') {
6021 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6022 NEXT;
6023 } else {
6024 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6025 }
Owen Taylor3473f882001-02-23 17:55:21 +00006026 GROW;
6027 }
6028 SKIP_BLANKS;
6029 SHRINK;
6030 while (RAW != ')') {
6031 /*
6032 * Each loop we parse one separator and one element.
6033 */
6034 if (RAW == ',') {
6035 if (type == 0) type = CUR;
6036
6037 /*
6038 * Detect "Name | Name , Name" error
6039 */
6040 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006041 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006042 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006043 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006044 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006045 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006046 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006047 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006048 return(NULL);
6049 }
6050 NEXT;
6051
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006052 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006053 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006054 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006055 xmlFreeDocElementContent(ctxt->myDoc, last);
6056 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006057 return(NULL);
6058 }
6059 if (last == NULL) {
6060 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006061 if (ret != NULL)
6062 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006063 ret = cur = op;
6064 } else {
6065 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006066 if (op != NULL)
6067 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006068 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006069 if (last != NULL)
6070 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006071 cur =op;
6072 last = NULL;
6073 }
6074 } else if (RAW == '|') {
6075 if (type == 0) type = CUR;
6076
6077 /*
6078 * Detect "Name , Name | Name" error
6079 */
6080 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006081 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006082 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006083 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006084 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006085 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006086 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006087 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006088 return(NULL);
6089 }
6090 NEXT;
6091
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006092 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006093 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006094 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006095 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006096 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006097 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006098 return(NULL);
6099 }
6100 if (last == NULL) {
6101 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006102 if (ret != NULL)
6103 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006104 ret = cur = op;
6105 } else {
6106 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006107 if (op != NULL)
6108 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006109 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006110 if (last != NULL)
6111 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006112 cur =op;
6113 last = NULL;
6114 }
6115 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006116 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006117 if ((last != NULL) && (last != ret))
6118 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006119 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006120 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006121 return(NULL);
6122 }
6123 GROW;
6124 SKIP_BLANKS;
6125 GROW;
6126 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006127 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006128 /* Recurse on second child */
6129 NEXT;
6130 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006131 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6132 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006133 SKIP_BLANKS;
6134 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006135 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006136 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006137 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006138 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006139 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006140 return(NULL);
6141 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006142 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006143 if (last == NULL) {
6144 if (ret != NULL)
6145 xmlFreeDocElementContent(ctxt->myDoc, ret);
6146 return(NULL);
6147 }
Owen Taylor3473f882001-02-23 17:55:21 +00006148 if (RAW == '?') {
6149 last->ocur = XML_ELEMENT_CONTENT_OPT;
6150 NEXT;
6151 } else if (RAW == '*') {
6152 last->ocur = XML_ELEMENT_CONTENT_MULT;
6153 NEXT;
6154 } else if (RAW == '+') {
6155 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6156 NEXT;
6157 } else {
6158 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6159 }
6160 }
6161 SKIP_BLANKS;
6162 GROW;
6163 }
6164 if ((cur != NULL) && (last != NULL)) {
6165 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006166 if (last != NULL)
6167 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006168 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006169 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006170 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6171"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006172 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006173 }
Owen Taylor3473f882001-02-23 17:55:21 +00006174 NEXT;
6175 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006176 if (ret != NULL) {
6177 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6178 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6179 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6180 else
6181 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6182 }
Owen Taylor3473f882001-02-23 17:55:21 +00006183 NEXT;
6184 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006185 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006186 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006187 cur = ret;
6188 /*
6189 * Some normalization:
6190 * (a | b* | c?)* == (a | b | c)*
6191 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006192 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006193 if ((cur->c1 != NULL) &&
6194 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6195 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6196 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6197 if ((cur->c2 != NULL) &&
6198 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6199 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6200 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6201 cur = cur->c2;
6202 }
6203 }
Owen Taylor3473f882001-02-23 17:55:21 +00006204 NEXT;
6205 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006206 if (ret != NULL) {
6207 int found = 0;
6208
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006209 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6211 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006212 else
6213 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006214 /*
6215 * Some normalization:
6216 * (a | b*)+ == (a | b)*
6217 * (a | b?)+ == (a | b)*
6218 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006219 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006220 if ((cur->c1 != NULL) &&
6221 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6222 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6223 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6224 found = 1;
6225 }
6226 if ((cur->c2 != NULL) &&
6227 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6228 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6229 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6230 found = 1;
6231 }
6232 cur = cur->c2;
6233 }
6234 if (found)
6235 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6236 }
Owen Taylor3473f882001-02-23 17:55:21 +00006237 NEXT;
6238 }
6239 return(ret);
6240}
6241
6242/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006243 * xmlParseElementChildrenContentDecl:
6244 * @ctxt: an XML parser context
6245 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006246 *
6247 * parse the declaration for a Mixed Element content
6248 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6249 *
6250 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6251 *
6252 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6253 *
6254 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6255 *
6256 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6257 *
6258 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6259 * TODO Parameter-entity replacement text must be properly nested
6260 * with parenthesized groups. That is to say, if either of the
6261 * opening or closing parentheses in a choice, seq, or Mixed
6262 * construct is contained in the replacement text for a parameter
6263 * entity, both must be contained in the same replacement text. For
6264 * interoperability, if a parameter-entity reference appears in a
6265 * choice, seq, or Mixed construct, its replacement text should not
6266 * be empty, and neither the first nor last non-blank character of
6267 * the replacement text should be a connector (| or ,).
6268 *
6269 * Returns the tree of xmlElementContentPtr describing the element
6270 * hierarchy.
6271 */
6272xmlElementContentPtr
6273xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6274 /* stub left for API/ABI compat */
6275 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6276}
6277
6278/**
Owen Taylor3473f882001-02-23 17:55:21 +00006279 * xmlParseElementContentDecl:
6280 * @ctxt: an XML parser context
6281 * @name: the name of the element being defined.
6282 * @result: the Element Content pointer will be stored here if any
6283 *
6284 * parse the declaration for an Element content either Mixed or Children,
6285 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6286 *
6287 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6288 *
6289 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6290 */
6291
6292int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006293xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006294 xmlElementContentPtr *result) {
6295
6296 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006297 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006298 int res;
6299
6300 *result = NULL;
6301
6302 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006303 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006304 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006305 return(-1);
6306 }
6307 NEXT;
6308 GROW;
6309 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006310 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006311 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006312 res = XML_ELEMENT_TYPE_MIXED;
6313 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006314 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006315 res = XML_ELEMENT_TYPE_ELEMENT;
6316 }
Owen Taylor3473f882001-02-23 17:55:21 +00006317 SKIP_BLANKS;
6318 *result = tree;
6319 return(res);
6320}
6321
6322/**
6323 * xmlParseElementDecl:
6324 * @ctxt: an XML parser context
6325 *
6326 * parse an Element declaration.
6327 *
6328 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6329 *
6330 * [ VC: Unique Element Type Declaration ]
6331 * No element type may be declared more than once
6332 *
6333 * Returns the type of the element, or -1 in case of error
6334 */
6335int
6336xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006337 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006338 int ret = -1;
6339 xmlElementContentPtr content = NULL;
6340
Daniel Veillard4c778d82005-01-23 17:37:44 +00006341 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006342 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006343 xmlParserInputPtr input = ctxt->input;
6344
6345 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006346 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006347 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6348 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006349 }
6350 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006351 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006352 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006353 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6354 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006355 return(-1);
6356 }
6357 while ((RAW == 0) && (ctxt->inputNr > 1))
6358 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006359 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006360 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6361 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006362 }
6363 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006364 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006365 SKIP(5);
6366 /*
6367 * Element must always be empty.
6368 */
6369 ret = XML_ELEMENT_TYPE_EMPTY;
6370 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6371 (NXT(2) == 'Y')) {
6372 SKIP(3);
6373 /*
6374 * Element is a generic container.
6375 */
6376 ret = XML_ELEMENT_TYPE_ANY;
6377 } else if (RAW == '(') {
6378 ret = xmlParseElementContentDecl(ctxt, name, &content);
6379 } else {
6380 /*
6381 * [ WFC: PEs in Internal Subset ] error handling.
6382 */
6383 if ((RAW == '%') && (ctxt->external == 0) &&
6384 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006385 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006386 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006387 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006388 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006389 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6390 }
Owen Taylor3473f882001-02-23 17:55:21 +00006391 return(-1);
6392 }
6393
6394 SKIP_BLANKS;
6395 /*
6396 * Pop-up of finished entities.
6397 */
6398 while ((RAW == 0) && (ctxt->inputNr > 1))
6399 xmlPopInput(ctxt);
6400 SKIP_BLANKS;
6401
6402 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006403 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006404 if (content != NULL) {
6405 xmlFreeDocElementContent(ctxt->myDoc, content);
6406 }
Owen Taylor3473f882001-02-23 17:55:21 +00006407 } else {
6408 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006409 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6410 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006411 }
6412
6413 NEXT;
6414 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006415 (ctxt->sax->elementDecl != NULL)) {
6416 if (content != NULL)
6417 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006418 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6419 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006420 if ((content != NULL) && (content->parent == NULL)) {
6421 /*
6422 * this is a trick: if xmlAddElementDecl is called,
6423 * instead of copying the full tree it is plugged directly
6424 * if called from the parser. Avoid duplicating the
6425 * interfaces or change the API/ABI
6426 */
6427 xmlFreeDocElementContent(ctxt->myDoc, content);
6428 }
6429 } else if (content != NULL) {
6430 xmlFreeDocElementContent(ctxt->myDoc, content);
6431 }
Owen Taylor3473f882001-02-23 17:55:21 +00006432 }
Owen Taylor3473f882001-02-23 17:55:21 +00006433 }
6434 return(ret);
6435}
6436
6437/**
Owen Taylor3473f882001-02-23 17:55:21 +00006438 * xmlParseConditionalSections
6439 * @ctxt: an XML parser context
6440 *
6441 * [61] conditionalSect ::= includeSect | ignoreSect
6442 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6443 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6444 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6445 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6446 */
6447
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006448static void
Owen Taylor3473f882001-02-23 17:55:21 +00006449xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006450 int id = ctxt->input->id;
6451
Owen Taylor3473f882001-02-23 17:55:21 +00006452 SKIP(3);
6453 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006454 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006455 SKIP(7);
6456 SKIP_BLANKS;
6457 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006458 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006459 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006460 if (ctxt->input->id != id) {
6461 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6462 "All markup of the conditional section is not in the same entity\n",
6463 NULL, NULL);
6464 }
Owen Taylor3473f882001-02-23 17:55:21 +00006465 NEXT;
6466 }
6467 if (xmlParserDebugEntities) {
6468 if ((ctxt->input != NULL) && (ctxt->input->filename))
6469 xmlGenericError(xmlGenericErrorContext,
6470 "%s(%d): ", ctxt->input->filename,
6471 ctxt->input->line);
6472 xmlGenericError(xmlGenericErrorContext,
6473 "Entering INCLUDE Conditional Section\n");
6474 }
6475
6476 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6477 (NXT(2) != '>'))) {
6478 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006479 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006480
6481 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6482 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006483 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006484 NEXT;
6485 } else if (RAW == '%') {
6486 xmlParsePEReference(ctxt);
6487 } else
6488 xmlParseMarkupDecl(ctxt);
6489
6490 /*
6491 * Pop-up of finished entities.
6492 */
6493 while ((RAW == 0) && (ctxt->inputNr > 1))
6494 xmlPopInput(ctxt);
6495
Daniel Veillardfdc91562002-07-01 21:52:03 +00006496 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006497 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006498 break;
6499 }
6500 }
6501 if (xmlParserDebugEntities) {
6502 if ((ctxt->input != NULL) && (ctxt->input->filename))
6503 xmlGenericError(xmlGenericErrorContext,
6504 "%s(%d): ", ctxt->input->filename,
6505 ctxt->input->line);
6506 xmlGenericError(xmlGenericErrorContext,
6507 "Leaving INCLUDE Conditional Section\n");
6508 }
6509
Daniel Veillarda07050d2003-10-19 14:46:32 +00006510 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006511 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006512 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006513 int depth = 0;
6514
6515 SKIP(6);
6516 SKIP_BLANKS;
6517 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006518 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006519 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006520 if (ctxt->input->id != id) {
6521 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6522 "All markup of the conditional section is not in the same entity\n",
6523 NULL, NULL);
6524 }
Owen Taylor3473f882001-02-23 17:55:21 +00006525 NEXT;
6526 }
6527 if (xmlParserDebugEntities) {
6528 if ((ctxt->input != NULL) && (ctxt->input->filename))
6529 xmlGenericError(xmlGenericErrorContext,
6530 "%s(%d): ", ctxt->input->filename,
6531 ctxt->input->line);
6532 xmlGenericError(xmlGenericErrorContext,
6533 "Entering IGNORE Conditional Section\n");
6534 }
6535
6536 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006537 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006538 * But disable SAX event generating DTD building in the meantime
6539 */
6540 state = ctxt->disableSAX;
6541 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006542 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006543 ctxt->instate = XML_PARSER_IGNORE;
6544
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006545 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006546 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6547 depth++;
6548 SKIP(3);
6549 continue;
6550 }
6551 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6552 if (--depth >= 0) SKIP(3);
6553 continue;
6554 }
6555 NEXT;
6556 continue;
6557 }
6558
6559 ctxt->disableSAX = state;
6560 ctxt->instate = instate;
6561
6562 if (xmlParserDebugEntities) {
6563 if ((ctxt->input != NULL) && (ctxt->input->filename))
6564 xmlGenericError(xmlGenericErrorContext,
6565 "%s(%d): ", ctxt->input->filename,
6566 ctxt->input->line);
6567 xmlGenericError(xmlGenericErrorContext,
6568 "Leaving IGNORE Conditional Section\n");
6569 }
6570
6571 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006572 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006573 }
6574
6575 if (RAW == 0)
6576 SHRINK;
6577
6578 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006579 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006580 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006581 if (ctxt->input->id != id) {
6582 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6583 "All markup of the conditional section is not in the same entity\n",
6584 NULL, NULL);
6585 }
Owen Taylor3473f882001-02-23 17:55:21 +00006586 SKIP(3);
6587 }
6588}
6589
6590/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006591 * xmlParseMarkupDecl:
6592 * @ctxt: an XML parser context
6593 *
6594 * parse Markup declarations
6595 *
6596 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6597 * NotationDecl | PI | Comment
6598 *
6599 * [ VC: Proper Declaration/PE Nesting ]
6600 * Parameter-entity replacement text must be properly nested with
6601 * markup declarations. That is to say, if either the first character
6602 * or the last character of a markup declaration (markupdecl above) is
6603 * contained in the replacement text for a parameter-entity reference,
6604 * both must be contained in the same replacement text.
6605 *
6606 * [ WFC: PEs in Internal Subset ]
6607 * In the internal DTD subset, parameter-entity references can occur
6608 * only where markup declarations can occur, not within markup declarations.
6609 * (This does not apply to references that occur in external parameter
6610 * entities or to the external subset.)
6611 */
6612void
6613xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6614 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006615 if (CUR == '<') {
6616 if (NXT(1) == '!') {
6617 switch (NXT(2)) {
6618 case 'E':
6619 if (NXT(3) == 'L')
6620 xmlParseElementDecl(ctxt);
6621 else if (NXT(3) == 'N')
6622 xmlParseEntityDecl(ctxt);
6623 break;
6624 case 'A':
6625 xmlParseAttributeListDecl(ctxt);
6626 break;
6627 case 'N':
6628 xmlParseNotationDecl(ctxt);
6629 break;
6630 case '-':
6631 xmlParseComment(ctxt);
6632 break;
6633 default:
6634 /* there is an error but it will be detected later */
6635 break;
6636 }
6637 } else if (NXT(1) == '?') {
6638 xmlParsePI(ctxt);
6639 }
6640 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006641 /*
6642 * This is only for internal subset. On external entities,
6643 * the replacement is done before parsing stage
6644 */
6645 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6646 xmlParsePEReference(ctxt);
6647
6648 /*
6649 * Conditional sections are allowed from entities included
6650 * by PE References in the internal subset.
6651 */
6652 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6653 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6654 xmlParseConditionalSections(ctxt);
6655 }
6656 }
6657
6658 ctxt->instate = XML_PARSER_DTD;
6659}
6660
6661/**
6662 * xmlParseTextDecl:
6663 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006664 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006665 * parse an XML declaration header for external entities
6666 *
6667 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006668 */
6669
6670void
6671xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6672 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006673 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006674
6675 /*
6676 * We know that '<?xml' is here.
6677 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006678 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006679 SKIP(5);
6680 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006681 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006682 return;
6683 }
6684
William M. Brack76e95df2003-10-18 16:20:14 +00006685 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006686 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6687 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006688 }
6689 SKIP_BLANKS;
6690
6691 /*
6692 * We may have the VersionInfo here.
6693 */
6694 version = xmlParseVersionInfo(ctxt);
6695 if (version == NULL)
6696 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006697 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006698 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006699 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6700 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006701 }
6702 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006703 ctxt->input->version = version;
6704
6705 /*
6706 * We must have the encoding declaration
6707 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006708 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006709 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6710 /*
6711 * The XML REC instructs us to stop parsing right here
6712 */
6713 return;
6714 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006715 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6716 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6717 "Missing encoding in text declaration\n");
6718 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006719
6720 SKIP_BLANKS;
6721 if ((RAW == '?') && (NXT(1) == '>')) {
6722 SKIP(2);
6723 } else if (RAW == '>') {
6724 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006725 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006726 NEXT;
6727 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006728 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006729 MOVETO_ENDTAG(CUR_PTR);
6730 NEXT;
6731 }
6732}
6733
6734/**
Owen Taylor3473f882001-02-23 17:55:21 +00006735 * xmlParseExternalSubset:
6736 * @ctxt: an XML parser context
6737 * @ExternalID: the external identifier
6738 * @SystemID: the system identifier (or URL)
6739 *
6740 * parse Markup declarations from an external subset
6741 *
6742 * [30] extSubset ::= textDecl? extSubsetDecl
6743 *
6744 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6745 */
6746void
6747xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6748 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006749 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006750 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006751
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006752 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006753 (ctxt->input->end - ctxt->input->cur >= 4)) {
6754 xmlChar start[4];
6755 xmlCharEncoding enc;
6756
6757 start[0] = RAW;
6758 start[1] = NXT(1);
6759 start[2] = NXT(2);
6760 start[3] = NXT(3);
6761 enc = xmlDetectCharEncoding(start, 4);
6762 if (enc != XML_CHAR_ENCODING_NONE)
6763 xmlSwitchEncoding(ctxt, enc);
6764 }
6765
Daniel Veillarda07050d2003-10-19 14:46:32 +00006766 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006767 xmlParseTextDecl(ctxt);
6768 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6769 /*
6770 * The XML REC instructs us to stop parsing right here
6771 */
6772 ctxt->instate = XML_PARSER_EOF;
6773 return;
6774 }
6775 }
6776 if (ctxt->myDoc == NULL) {
6777 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006778 if (ctxt->myDoc == NULL) {
6779 xmlErrMemory(ctxt, "New Doc failed");
6780 return;
6781 }
6782 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006783 }
6784 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6785 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6786
6787 ctxt->instate = XML_PARSER_DTD;
6788 ctxt->external = 1;
6789 while (((RAW == '<') && (NXT(1) == '?')) ||
6790 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006791 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006792 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006793 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006794
6795 GROW;
6796 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6797 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006798 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006799 NEXT;
6800 } else if (RAW == '%') {
6801 xmlParsePEReference(ctxt);
6802 } else
6803 xmlParseMarkupDecl(ctxt);
6804
6805 /*
6806 * Pop-up of finished entities.
6807 */
6808 while ((RAW == 0) && (ctxt->inputNr > 1))
6809 xmlPopInput(ctxt);
6810
Daniel Veillardfdc91562002-07-01 21:52:03 +00006811 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006812 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006813 break;
6814 }
6815 }
6816
6817 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006818 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006819 }
6820
6821}
6822
6823/**
6824 * xmlParseReference:
6825 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006826 *
Owen Taylor3473f882001-02-23 17:55:21 +00006827 * parse and handle entity references in content, depending on the SAX
6828 * interface, this may end-up in a call to character() if this is a
6829 * CharRef, a predefined entity, if there is no reference() callback.
6830 * or if the parser was asked to switch to that mode.
6831 *
6832 * [67] Reference ::= EntityRef | CharRef
6833 */
6834void
6835xmlParseReference(xmlParserCtxtPtr ctxt) {
6836 xmlEntityPtr ent;
6837 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006838 int was_checked;
6839 xmlNodePtr list = NULL;
6840 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006841
Daniel Veillard0161e632008-08-28 15:36:32 +00006842
6843 if (RAW != '&')
6844 return;
6845
6846 /*
6847 * Simple case of a CharRef
6848 */
Owen Taylor3473f882001-02-23 17:55:21 +00006849 if (NXT(1) == '#') {
6850 int i = 0;
6851 xmlChar out[10];
6852 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006853 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006854
Daniel Veillarddc171602008-03-26 17:41:38 +00006855 if (value == 0)
6856 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006857 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6858 /*
6859 * So we are using non-UTF-8 buffers
6860 * Check that the char fit on 8bits, if not
6861 * generate a CharRef.
6862 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006863 if (value <= 0xFF) {
6864 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006865 out[1] = 0;
6866 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6867 (!ctxt->disableSAX))
6868 ctxt->sax->characters(ctxt->userData, out, 1);
6869 } else {
6870 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006871 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006872 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006873 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006874 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6875 (!ctxt->disableSAX))
6876 ctxt->sax->reference(ctxt->userData, out);
6877 }
6878 } else {
6879 /*
6880 * Just encode the value in UTF-8
6881 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006882 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006883 out[i] = 0;
6884 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6885 (!ctxt->disableSAX))
6886 ctxt->sax->characters(ctxt->userData, out, i);
6887 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006888 return;
6889 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006890
Daniel Veillard0161e632008-08-28 15:36:32 +00006891 /*
6892 * We are seeing an entity reference
6893 */
6894 ent = xmlParseEntityRef(ctxt);
6895 if (ent == NULL) return;
6896 if (!ctxt->wellFormed)
6897 return;
6898 was_checked = ent->checked;
6899
6900 /* special case of predefined entities */
6901 if ((ent->name == NULL) ||
6902 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6903 val = ent->content;
6904 if (val == NULL) return;
6905 /*
6906 * inline the entity.
6907 */
6908 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6909 (!ctxt->disableSAX))
6910 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6911 return;
6912 }
6913
6914 /*
6915 * The first reference to the entity trigger a parsing phase
6916 * where the ent->children is filled with the result from
6917 * the parsing.
6918 */
6919 if (ent->checked == 0) {
6920 unsigned long oldnbent = ctxt->nbentities;
6921
6922 /*
6923 * This is a bit hackish but this seems the best
6924 * way to make sure both SAX and DOM entity support
6925 * behaves okay.
6926 */
6927 void *user_data;
6928 if (ctxt->userData == ctxt)
6929 user_data = NULL;
6930 else
6931 user_data = ctxt->userData;
6932
6933 /*
6934 * Check that this entity is well formed
6935 * 4.3.2: An internal general parsed entity is well-formed
6936 * if its replacement text matches the production labeled
6937 * content.
6938 */
6939 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6940 ctxt->depth++;
6941 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6942 user_data, &list);
6943 ctxt->depth--;
6944
6945 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6946 ctxt->depth++;
6947 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6948 user_data, ctxt->depth, ent->URI,
6949 ent->ExternalID, &list);
6950 ctxt->depth--;
6951 } else {
6952 ret = XML_ERR_ENTITY_PE_INTERNAL;
6953 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6954 "invalid entity type found\n", NULL);
6955 }
6956
6957 /*
6958 * Store the number of entities needing parsing for this entity
6959 * content and do checkings
6960 */
6961 ent->checked = ctxt->nbentities - oldnbent;
6962 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006963 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006964 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006965 return;
6966 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006967 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6968 xmlFreeNodeList(list);
6969 return;
6970 }
Owen Taylor3473f882001-02-23 17:55:21 +00006971
Daniel Veillard0161e632008-08-28 15:36:32 +00006972 if ((ret == XML_ERR_OK) && (list != NULL)) {
6973 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6974 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6975 (ent->children == NULL)) {
6976 ent->children = list;
6977 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006978 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006979 * Prune it directly in the generated document
6980 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006981 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006982 if (((list->type == XML_TEXT_NODE) &&
6983 (list->next == NULL)) ||
6984 (ctxt->parseMode == XML_PARSE_READER)) {
6985 list->parent = (xmlNodePtr) ent;
6986 list = NULL;
6987 ent->owner = 1;
6988 } else {
6989 ent->owner = 0;
6990 while (list != NULL) {
6991 list->parent = (xmlNodePtr) ctxt->node;
6992 list->doc = ctxt->myDoc;
6993 if (list->next == NULL)
6994 ent->last = list;
6995 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006996 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006997 list = ent->children;
6998#ifdef LIBXML_LEGACY_ENABLED
6999 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7000 xmlAddEntityReference(ent, list, NULL);
7001#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007002 }
7003 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007004 ent->owner = 1;
7005 while (list != NULL) {
7006 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007007 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007008 if (list->next == NULL)
7009 ent->last = list;
7010 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007011 }
7012 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007013 } else {
7014 xmlFreeNodeList(list);
7015 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007016 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007017 } else if ((ret != XML_ERR_OK) &&
7018 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7019 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7020 "Entity '%s' failed to parse\n", ent->name);
7021 } else if (list != NULL) {
7022 xmlFreeNodeList(list);
7023 list = NULL;
7024 }
7025 if (ent->checked == 0)
7026 ent->checked = 1;
7027 } else if (ent->checked != 1) {
7028 ctxt->nbentities += ent->checked;
7029 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007030
Daniel Veillard0161e632008-08-28 15:36:32 +00007031 /*
7032 * Now that the entity content has been gathered
7033 * provide it to the application, this can take different forms based
7034 * on the parsing modes.
7035 */
7036 if (ent->children == NULL) {
7037 /*
7038 * Probably running in SAX mode and the callbacks don't
7039 * build the entity content. So unless we already went
7040 * though parsing for first checking go though the entity
7041 * content to generate callbacks associated to the entity
7042 */
7043 if (was_checked != 0) {
7044 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007045 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007046 * This is a bit hackish but this seems the best
7047 * way to make sure both SAX and DOM entity support
7048 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007049 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007050 if (ctxt->userData == ctxt)
7051 user_data = NULL;
7052 else
7053 user_data = ctxt->userData;
7054
7055 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7056 ctxt->depth++;
7057 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7058 ent->content, user_data, NULL);
7059 ctxt->depth--;
7060 } else if (ent->etype ==
7061 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7062 ctxt->depth++;
7063 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7064 ctxt->sax, user_data, ctxt->depth,
7065 ent->URI, ent->ExternalID, NULL);
7066 ctxt->depth--;
7067 } else {
7068 ret = XML_ERR_ENTITY_PE_INTERNAL;
7069 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7070 "invalid entity type found\n", NULL);
7071 }
7072 if (ret == XML_ERR_ENTITY_LOOP) {
7073 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7074 return;
7075 }
7076 }
7077 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7078 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7079 /*
7080 * Entity reference callback comes second, it's somewhat
7081 * superfluous but a compatibility to historical behaviour
7082 */
7083 ctxt->sax->reference(ctxt->userData, ent->name);
7084 }
7085 return;
7086 }
7087
7088 /*
7089 * If we didn't get any children for the entity being built
7090 */
7091 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7092 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7093 /*
7094 * Create a node.
7095 */
7096 ctxt->sax->reference(ctxt->userData, ent->name);
7097 return;
7098 }
7099
7100 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7101 /*
7102 * There is a problem on the handling of _private for entities
7103 * (bug 155816): Should we copy the content of the field from
7104 * the entity (possibly overwriting some value set by the user
7105 * when a copy is created), should we leave it alone, or should
7106 * we try to take care of different situations? The problem
7107 * is exacerbated by the usage of this field by the xmlReader.
7108 * To fix this bug, we look at _private on the created node
7109 * and, if it's NULL, we copy in whatever was in the entity.
7110 * If it's not NULL we leave it alone. This is somewhat of a
7111 * hack - maybe we should have further tests to determine
7112 * what to do.
7113 */
7114 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7115 /*
7116 * Seems we are generating the DOM content, do
7117 * a simple tree copy for all references except the first
7118 * In the first occurrence list contains the replacement.
7119 * progressive == 2 means we are operating on the Reader
7120 * and since nodes are discarded we must copy all the time.
7121 */
7122 if (((list == NULL) && (ent->owner == 0)) ||
7123 (ctxt->parseMode == XML_PARSE_READER)) {
7124 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7125
7126 /*
7127 * when operating on a reader, the entities definitions
7128 * are always owning the entities subtree.
7129 if (ctxt->parseMode == XML_PARSE_READER)
7130 ent->owner = 1;
7131 */
7132
7133 cur = ent->children;
7134 while (cur != NULL) {
7135 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7136 if (nw != NULL) {
7137 if (nw->_private == NULL)
7138 nw->_private = cur->_private;
7139 if (firstChild == NULL){
7140 firstChild = nw;
7141 }
7142 nw = xmlAddChild(ctxt->node, nw);
7143 }
7144 if (cur == ent->last) {
7145 /*
7146 * needed to detect some strange empty
7147 * node cases in the reader tests
7148 */
7149 if ((ctxt->parseMode == XML_PARSE_READER) &&
7150 (nw != NULL) &&
7151 (nw->type == XML_ELEMENT_NODE) &&
7152 (nw->children == NULL))
7153 nw->extra = 1;
7154
7155 break;
7156 }
7157 cur = cur->next;
7158 }
7159#ifdef LIBXML_LEGACY_ENABLED
7160 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7161 xmlAddEntityReference(ent, firstChild, nw);
7162#endif /* LIBXML_LEGACY_ENABLED */
7163 } else if (list == NULL) {
7164 xmlNodePtr nw = NULL, cur, next, last,
7165 firstChild = NULL;
7166 /*
7167 * Copy the entity child list and make it the new
7168 * entity child list. The goal is to make sure any
7169 * ID or REF referenced will be the one from the
7170 * document content and not the entity copy.
7171 */
7172 cur = ent->children;
7173 ent->children = NULL;
7174 last = ent->last;
7175 ent->last = NULL;
7176 while (cur != NULL) {
7177 next = cur->next;
7178 cur->next = NULL;
7179 cur->parent = NULL;
7180 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7181 if (nw != NULL) {
7182 if (nw->_private == NULL)
7183 nw->_private = cur->_private;
7184 if (firstChild == NULL){
7185 firstChild = cur;
7186 }
7187 xmlAddChild((xmlNodePtr) ent, nw);
7188 xmlAddChild(ctxt->node, cur);
7189 }
7190 if (cur == last)
7191 break;
7192 cur = next;
7193 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007194 if (ent->owner == 0)
7195 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007196#ifdef LIBXML_LEGACY_ENABLED
7197 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7198 xmlAddEntityReference(ent, firstChild, nw);
7199#endif /* LIBXML_LEGACY_ENABLED */
7200 } else {
7201 const xmlChar *nbktext;
7202
7203 /*
7204 * the name change is to avoid coalescing of the
7205 * node with a possible previous text one which
7206 * would make ent->children a dangling pointer
7207 */
7208 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7209 -1);
7210 if (ent->children->type == XML_TEXT_NODE)
7211 ent->children->name = nbktext;
7212 if ((ent->last != ent->children) &&
7213 (ent->last->type == XML_TEXT_NODE))
7214 ent->last->name = nbktext;
7215 xmlAddChildList(ctxt->node, ent->children);
7216 }
7217
7218 /*
7219 * This is to avoid a nasty side effect, see
7220 * characters() in SAX.c
7221 */
7222 ctxt->nodemem = 0;
7223 ctxt->nodelen = 0;
7224 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007225 }
7226 }
7227}
7228
7229/**
7230 * xmlParseEntityRef:
7231 * @ctxt: an XML parser context
7232 *
7233 * parse ENTITY references declarations
7234 *
7235 * [68] EntityRef ::= '&' Name ';'
7236 *
7237 * [ WFC: Entity Declared ]
7238 * In a document without any DTD, a document with only an internal DTD
7239 * subset which contains no parameter entity references, or a document
7240 * with "standalone='yes'", the Name given in the entity reference
7241 * must match that in an entity declaration, except that well-formed
7242 * documents need not declare any of the following entities: amp, lt,
7243 * gt, apos, quot. The declaration of a parameter entity must precede
7244 * any reference to it. Similarly, the declaration of a general entity
7245 * must precede any reference to it which appears in a default value in an
7246 * attribute-list declaration. Note that if entities are declared in the
7247 * external subset or in external parameter entities, a non-validating
7248 * processor is not obligated to read and process their declarations;
7249 * for such documents, the rule that an entity must be declared is a
7250 * well-formedness constraint only if standalone='yes'.
7251 *
7252 * [ WFC: Parsed Entity ]
7253 * An entity reference must not contain the name of an unparsed entity
7254 *
7255 * Returns the xmlEntityPtr if found, or NULL otherwise.
7256 */
7257xmlEntityPtr
7258xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007259 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007260 xmlEntityPtr ent = NULL;
7261
7262 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007263
Daniel Veillard0161e632008-08-28 15:36:32 +00007264 if (RAW != '&')
7265 return(NULL);
7266 NEXT;
7267 name = xmlParseName(ctxt);
7268 if (name == NULL) {
7269 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7270 "xmlParseEntityRef: no name\n");
7271 return(NULL);
7272 }
7273 if (RAW != ';') {
7274 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7275 return(NULL);
7276 }
7277 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007278
Daniel Veillard0161e632008-08-28 15:36:32 +00007279 /*
7280 * Predefined entites override any extra definition
7281 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007282 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7283 ent = xmlGetPredefinedEntity(name);
7284 if (ent != NULL)
7285 return(ent);
7286 }
Owen Taylor3473f882001-02-23 17:55:21 +00007287
Daniel Veillard0161e632008-08-28 15:36:32 +00007288 /*
7289 * Increate the number of entity references parsed
7290 */
7291 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007292
Daniel Veillard0161e632008-08-28 15:36:32 +00007293 /*
7294 * Ask first SAX for entity resolution, otherwise try the
7295 * entities which may have stored in the parser context.
7296 */
7297 if (ctxt->sax != NULL) {
7298 if (ctxt->sax->getEntity != NULL)
7299 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007300 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7301 (ctxt->options & XML_PARSE_OLDSAX))
7302 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007303 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7304 (ctxt->userData==ctxt)) {
7305 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007306 }
7307 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007308 /*
7309 * [ WFC: Entity Declared ]
7310 * In a document without any DTD, a document with only an
7311 * internal DTD subset which contains no parameter entity
7312 * references, or a document with "standalone='yes'", the
7313 * Name given in the entity reference must match that in an
7314 * entity declaration, except that well-formed documents
7315 * need not declare any of the following entities: amp, lt,
7316 * gt, apos, quot.
7317 * The declaration of a parameter entity must precede any
7318 * reference to it.
7319 * Similarly, the declaration of a general entity must
7320 * precede any reference to it which appears in a default
7321 * value in an attribute-list declaration. Note that if
7322 * entities are declared in the external subset or in
7323 * external parameter entities, a non-validating processor
7324 * is not obligated to read and process their declarations;
7325 * for such documents, the rule that an entity must be
7326 * declared is a well-formedness constraint only if
7327 * standalone='yes'.
7328 */
7329 if (ent == NULL) {
7330 if ((ctxt->standalone == 1) ||
7331 ((ctxt->hasExternalSubset == 0) &&
7332 (ctxt->hasPErefs == 0))) {
7333 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7334 "Entity '%s' not defined\n", name);
7335 } else {
7336 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7337 "Entity '%s' not defined\n", name);
7338 if ((ctxt->inSubset == 0) &&
7339 (ctxt->sax != NULL) &&
7340 (ctxt->sax->reference != NULL)) {
7341 ctxt->sax->reference(ctxt->userData, name);
7342 }
7343 }
7344 ctxt->valid = 0;
7345 }
7346
7347 /*
7348 * [ WFC: Parsed Entity ]
7349 * An entity reference must not contain the name of an
7350 * unparsed entity
7351 */
7352 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7353 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7354 "Entity reference to unparsed entity %s\n", name);
7355 }
7356
7357 /*
7358 * [ WFC: No External Entity References ]
7359 * Attribute values cannot contain direct or indirect
7360 * entity references to external entities.
7361 */
7362 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7363 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7364 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7365 "Attribute references external entity '%s'\n", name);
7366 }
7367 /*
7368 * [ WFC: No < in Attribute Values ]
7369 * The replacement text of any entity referred to directly or
7370 * indirectly in an attribute value (other than "&lt;") must
7371 * not contain a <.
7372 */
7373 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7374 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007375 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007376 (xmlStrchr(ent->content, '<'))) {
7377 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7378 "'<' in entity '%s' is not allowed in attributes values\n", name);
7379 }
7380
7381 /*
7382 * Internal check, no parameter entities here ...
7383 */
7384 else {
7385 switch (ent->etype) {
7386 case XML_INTERNAL_PARAMETER_ENTITY:
7387 case XML_EXTERNAL_PARAMETER_ENTITY:
7388 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7389 "Attempt to reference the parameter entity '%s'\n",
7390 name);
7391 break;
7392 default:
7393 break;
7394 }
7395 }
7396
7397 /*
7398 * [ WFC: No Recursion ]
7399 * A parsed entity must not contain a recursive reference
7400 * to itself, either directly or indirectly.
7401 * Done somewhere else
7402 */
Owen Taylor3473f882001-02-23 17:55:21 +00007403 return(ent);
7404}
7405
7406/**
7407 * xmlParseStringEntityRef:
7408 * @ctxt: an XML parser context
7409 * @str: a pointer to an index in the string
7410 *
7411 * parse ENTITY references declarations, but this version parses it from
7412 * a string value.
7413 *
7414 * [68] EntityRef ::= '&' Name ';'
7415 *
7416 * [ WFC: Entity Declared ]
7417 * In a document without any DTD, a document with only an internal DTD
7418 * subset which contains no parameter entity references, or a document
7419 * with "standalone='yes'", the Name given in the entity reference
7420 * must match that in an entity declaration, except that well-formed
7421 * documents need not declare any of the following entities: amp, lt,
7422 * gt, apos, quot. The declaration of a parameter entity must precede
7423 * any reference to it. Similarly, the declaration of a general entity
7424 * must precede any reference to it which appears in a default value in an
7425 * attribute-list declaration. Note that if entities are declared in the
7426 * external subset or in external parameter entities, a non-validating
7427 * processor is not obligated to read and process their declarations;
7428 * for such documents, the rule that an entity must be declared is a
7429 * well-formedness constraint only if standalone='yes'.
7430 *
7431 * [ WFC: Parsed Entity ]
7432 * An entity reference must not contain the name of an unparsed entity
7433 *
7434 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7435 * is updated to the current location in the string.
7436 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007437static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007438xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7439 xmlChar *name;
7440 const xmlChar *ptr;
7441 xmlChar cur;
7442 xmlEntityPtr ent = NULL;
7443
7444 if ((str == NULL) || (*str == NULL))
7445 return(NULL);
7446 ptr = *str;
7447 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007448 if (cur != '&')
7449 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007450
Daniel Veillard0161e632008-08-28 15:36:32 +00007451 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007452 name = xmlParseStringName(ctxt, &ptr);
7453 if (name == NULL) {
7454 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7455 "xmlParseStringEntityRef: no name\n");
7456 *str = ptr;
7457 return(NULL);
7458 }
7459 if (*ptr != ';') {
7460 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007461 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007462 *str = ptr;
7463 return(NULL);
7464 }
7465 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007466
Owen Taylor3473f882001-02-23 17:55:21 +00007467
Daniel Veillard0161e632008-08-28 15:36:32 +00007468 /*
7469 * Predefined entites override any extra definition
7470 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007471 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7472 ent = xmlGetPredefinedEntity(name);
7473 if (ent != NULL) {
7474 xmlFree(name);
7475 *str = ptr;
7476 return(ent);
7477 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007478 }
Owen Taylor3473f882001-02-23 17:55:21 +00007479
Daniel Veillard0161e632008-08-28 15:36:32 +00007480 /*
7481 * Increate the number of entity references parsed
7482 */
7483 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007484
Daniel Veillard0161e632008-08-28 15:36:32 +00007485 /*
7486 * Ask first SAX for entity resolution, otherwise try the
7487 * entities which may have stored in the parser context.
7488 */
7489 if (ctxt->sax != NULL) {
7490 if (ctxt->sax->getEntity != NULL)
7491 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007492 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7493 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007494 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7495 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007496 }
7497 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007498
7499 /*
7500 * [ WFC: Entity Declared ]
7501 * In a document without any DTD, a document with only an
7502 * internal DTD subset which contains no parameter entity
7503 * references, or a document with "standalone='yes'", the
7504 * Name given in the entity reference must match that in an
7505 * entity declaration, except that well-formed documents
7506 * need not declare any of the following entities: amp, lt,
7507 * gt, apos, quot.
7508 * The declaration of a parameter entity must precede any
7509 * reference to it.
7510 * Similarly, the declaration of a general entity must
7511 * precede any reference to it which appears in a default
7512 * value in an attribute-list declaration. Note that if
7513 * entities are declared in the external subset or in
7514 * external parameter entities, a non-validating processor
7515 * is not obligated to read and process their declarations;
7516 * for such documents, the rule that an entity must be
7517 * declared is a well-formedness constraint only if
7518 * standalone='yes'.
7519 */
7520 if (ent == NULL) {
7521 if ((ctxt->standalone == 1) ||
7522 ((ctxt->hasExternalSubset == 0) &&
7523 (ctxt->hasPErefs == 0))) {
7524 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7525 "Entity '%s' not defined\n", name);
7526 } else {
7527 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7528 "Entity '%s' not defined\n",
7529 name);
7530 }
7531 /* TODO ? check regressions ctxt->valid = 0; */
7532 }
7533
7534 /*
7535 * [ WFC: Parsed Entity ]
7536 * An entity reference must not contain the name of an
7537 * unparsed entity
7538 */
7539 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7540 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7541 "Entity reference to unparsed entity %s\n", name);
7542 }
7543
7544 /*
7545 * [ WFC: No External Entity References ]
7546 * Attribute values cannot contain direct or indirect
7547 * entity references to external entities.
7548 */
7549 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7550 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7551 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7552 "Attribute references external entity '%s'\n", name);
7553 }
7554 /*
7555 * [ WFC: No < in Attribute Values ]
7556 * The replacement text of any entity referred to directly or
7557 * indirectly in an attribute value (other than "&lt;") must
7558 * not contain a <.
7559 */
7560 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7561 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007562 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007563 (xmlStrchr(ent->content, '<'))) {
7564 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7565 "'<' in entity '%s' is not allowed in attributes values\n",
7566 name);
7567 }
7568
7569 /*
7570 * Internal check, no parameter entities here ...
7571 */
7572 else {
7573 switch (ent->etype) {
7574 case XML_INTERNAL_PARAMETER_ENTITY:
7575 case XML_EXTERNAL_PARAMETER_ENTITY:
7576 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7577 "Attempt to reference the parameter entity '%s'\n",
7578 name);
7579 break;
7580 default:
7581 break;
7582 }
7583 }
7584
7585 /*
7586 * [ WFC: No Recursion ]
7587 * A parsed entity must not contain a recursive reference
7588 * to itself, either directly or indirectly.
7589 * Done somewhere else
7590 */
7591
7592 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007593 *str = ptr;
7594 return(ent);
7595}
7596
7597/**
7598 * xmlParsePEReference:
7599 * @ctxt: an XML parser context
7600 *
7601 * parse PEReference declarations
7602 * The entity content is handled directly by pushing it's content as
7603 * a new input stream.
7604 *
7605 * [69] PEReference ::= '%' Name ';'
7606 *
7607 * [ WFC: No Recursion ]
7608 * A parsed entity must not contain a recursive
7609 * reference to itself, either directly or indirectly.
7610 *
7611 * [ WFC: Entity Declared ]
7612 * In a document without any DTD, a document with only an internal DTD
7613 * subset which contains no parameter entity references, or a document
7614 * with "standalone='yes'", ... ... The declaration of a parameter
7615 * entity must precede any reference to it...
7616 *
7617 * [ VC: Entity Declared ]
7618 * In a document with an external subset or external parameter entities
7619 * with "standalone='no'", ... ... The declaration of a parameter entity
7620 * must precede any reference to it...
7621 *
7622 * [ WFC: In DTD ]
7623 * Parameter-entity references may only appear in the DTD.
7624 * NOTE: misleading but this is handled.
7625 */
7626void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007627xmlParsePEReference(xmlParserCtxtPtr ctxt)
7628{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007629 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007630 xmlEntityPtr entity = NULL;
7631 xmlParserInputPtr input;
7632
Daniel Veillard0161e632008-08-28 15:36:32 +00007633 if (RAW != '%')
7634 return;
7635 NEXT;
7636 name = xmlParseName(ctxt);
7637 if (name == NULL) {
7638 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7639 "xmlParsePEReference: no name\n");
7640 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007641 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007642 if (RAW != ';') {
7643 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7644 return;
7645 }
7646
7647 NEXT;
7648
7649 /*
7650 * Increate the number of entity references parsed
7651 */
7652 ctxt->nbentities++;
7653
7654 /*
7655 * Request the entity from SAX
7656 */
7657 if ((ctxt->sax != NULL) &&
7658 (ctxt->sax->getParameterEntity != NULL))
7659 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7660 name);
7661 if (entity == NULL) {
7662 /*
7663 * [ WFC: Entity Declared ]
7664 * In a document without any DTD, a document with only an
7665 * internal DTD subset which contains no parameter entity
7666 * references, or a document with "standalone='yes'", ...
7667 * ... The declaration of a parameter entity must precede
7668 * any reference to it...
7669 */
7670 if ((ctxt->standalone == 1) ||
7671 ((ctxt->hasExternalSubset == 0) &&
7672 (ctxt->hasPErefs == 0))) {
7673 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7674 "PEReference: %%%s; not found\n",
7675 name);
7676 } else {
7677 /*
7678 * [ VC: Entity Declared ]
7679 * In a document with an external subset or external
7680 * parameter entities with "standalone='no'", ...
7681 * ... The declaration of a parameter entity must
7682 * precede any reference to it...
7683 */
7684 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7685 "PEReference: %%%s; not found\n",
7686 name, NULL);
7687 ctxt->valid = 0;
7688 }
7689 } else {
7690 /*
7691 * Internal checking in case the entity quest barfed
7692 */
7693 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7694 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7695 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7696 "Internal: %%%s; is not a parameter entity\n",
7697 name, NULL);
7698 } else if (ctxt->input->free != deallocblankswrapper) {
7699 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7700 if (xmlPushInput(ctxt, input) < 0)
7701 return;
7702 } else {
7703 /*
7704 * TODO !!!
7705 * handle the extra spaces added before and after
7706 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7707 */
7708 input = xmlNewEntityInputStream(ctxt, entity);
7709 if (xmlPushInput(ctxt, input) < 0)
7710 return;
7711 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7712 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7713 (IS_BLANK_CH(NXT(5)))) {
7714 xmlParseTextDecl(ctxt);
7715 if (ctxt->errNo ==
7716 XML_ERR_UNSUPPORTED_ENCODING) {
7717 /*
7718 * The XML REC instructs us to stop parsing
7719 * right here
7720 */
7721 ctxt->instate = XML_PARSER_EOF;
7722 return;
7723 }
7724 }
7725 }
7726 }
7727 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007728}
7729
7730/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007731 * xmlLoadEntityContent:
7732 * @ctxt: an XML parser context
7733 * @entity: an unloaded system entity
7734 *
7735 * Load the original content of the given system entity from the
7736 * ExternalID/SystemID given. This is to be used for Included in Literal
7737 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7738 *
7739 * Returns 0 in case of success and -1 in case of failure
7740 */
7741static int
7742xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7743 xmlParserInputPtr input;
7744 xmlBufferPtr buf;
7745 int l, c;
7746 int count = 0;
7747
7748 if ((ctxt == NULL) || (entity == NULL) ||
7749 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7750 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7751 (entity->content != NULL)) {
7752 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7753 "xmlLoadEntityContent parameter error");
7754 return(-1);
7755 }
7756
7757 if (xmlParserDebugEntities)
7758 xmlGenericError(xmlGenericErrorContext,
7759 "Reading %s entity content input\n", entity->name);
7760
7761 buf = xmlBufferCreate();
7762 if (buf == NULL) {
7763 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7764 "xmlLoadEntityContent parameter error");
7765 return(-1);
7766 }
7767
7768 input = xmlNewEntityInputStream(ctxt, entity);
7769 if (input == NULL) {
7770 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7771 "xmlLoadEntityContent input error");
7772 xmlBufferFree(buf);
7773 return(-1);
7774 }
7775
7776 /*
7777 * Push the entity as the current input, read char by char
7778 * saving to the buffer until the end of the entity or an error
7779 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007780 if (xmlPushInput(ctxt, input) < 0) {
7781 xmlBufferFree(buf);
7782 return(-1);
7783 }
7784
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007785 GROW;
7786 c = CUR_CHAR(l);
7787 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7788 (IS_CHAR(c))) {
7789 xmlBufferAdd(buf, ctxt->input->cur, l);
7790 if (count++ > 100) {
7791 count = 0;
7792 GROW;
7793 }
7794 NEXTL(l);
7795 c = CUR_CHAR(l);
7796 }
7797
7798 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7799 xmlPopInput(ctxt);
7800 } else if (!IS_CHAR(c)) {
7801 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7802 "xmlLoadEntityContent: invalid char value %d\n",
7803 c);
7804 xmlBufferFree(buf);
7805 return(-1);
7806 }
7807 entity->content = buf->content;
7808 buf->content = NULL;
7809 xmlBufferFree(buf);
7810
7811 return(0);
7812}
7813
7814/**
Owen Taylor3473f882001-02-23 17:55:21 +00007815 * xmlParseStringPEReference:
7816 * @ctxt: an XML parser context
7817 * @str: a pointer to an index in the string
7818 *
7819 * parse PEReference declarations
7820 *
7821 * [69] PEReference ::= '%' Name ';'
7822 *
7823 * [ WFC: No Recursion ]
7824 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007825 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007826 *
7827 * [ WFC: Entity Declared ]
7828 * In a document without any DTD, a document with only an internal DTD
7829 * subset which contains no parameter entity references, or a document
7830 * with "standalone='yes'", ... ... The declaration of a parameter
7831 * entity must precede any reference to it...
7832 *
7833 * [ VC: Entity Declared ]
7834 * In a document with an external subset or external parameter entities
7835 * with "standalone='no'", ... ... The declaration of a parameter entity
7836 * must precede any reference to it...
7837 *
7838 * [ WFC: In DTD ]
7839 * Parameter-entity references may only appear in the DTD.
7840 * NOTE: misleading but this is handled.
7841 *
7842 * Returns the string of the entity content.
7843 * str is updated to the current value of the index
7844 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007845static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007846xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7847 const xmlChar *ptr;
7848 xmlChar cur;
7849 xmlChar *name;
7850 xmlEntityPtr entity = NULL;
7851
7852 if ((str == NULL) || (*str == NULL)) return(NULL);
7853 ptr = *str;
7854 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007855 if (cur != '%')
7856 return(NULL);
7857 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007858 name = xmlParseStringName(ctxt, &ptr);
7859 if (name == NULL) {
7860 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7861 "xmlParseStringPEReference: no name\n");
7862 *str = ptr;
7863 return(NULL);
7864 }
7865 cur = *ptr;
7866 if (cur != ';') {
7867 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7868 xmlFree(name);
7869 *str = ptr;
7870 return(NULL);
7871 }
7872 ptr++;
7873
7874 /*
7875 * Increate the number of entity references parsed
7876 */
7877 ctxt->nbentities++;
7878
7879 /*
7880 * Request the entity from SAX
7881 */
7882 if ((ctxt->sax != NULL) &&
7883 (ctxt->sax->getParameterEntity != NULL))
7884 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7885 name);
7886 if (entity == NULL) {
7887 /*
7888 * [ WFC: Entity Declared ]
7889 * In a document without any DTD, a document with only an
7890 * internal DTD subset which contains no parameter entity
7891 * references, or a document with "standalone='yes'", ...
7892 * ... The declaration of a parameter entity must precede
7893 * any reference to it...
7894 */
7895 if ((ctxt->standalone == 1) ||
7896 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7897 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7898 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007899 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007900 /*
7901 * [ VC: Entity Declared ]
7902 * In a document with an external subset or external
7903 * parameter entities with "standalone='no'", ...
7904 * ... The declaration of a parameter entity must
7905 * precede any reference to it...
7906 */
7907 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7908 "PEReference: %%%s; not found\n",
7909 name, NULL);
7910 ctxt->valid = 0;
7911 }
7912 } else {
7913 /*
7914 * Internal checking in case the entity quest barfed
7915 */
7916 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7917 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7918 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7919 "%%%s; is not a parameter entity\n",
7920 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007921 }
7922 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007923 ctxt->hasPErefs = 1;
7924 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007925 *str = ptr;
7926 return(entity);
7927}
7928
7929/**
7930 * xmlParseDocTypeDecl:
7931 * @ctxt: an XML parser context
7932 *
7933 * parse a DOCTYPE declaration
7934 *
7935 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7936 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7937 *
7938 * [ VC: Root Element Type ]
7939 * The Name in the document type declaration must match the element
7940 * type of the root element.
7941 */
7942
7943void
7944xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007945 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007946 xmlChar *ExternalID = NULL;
7947 xmlChar *URI = NULL;
7948
7949 /*
7950 * We know that '<!DOCTYPE' has been detected.
7951 */
7952 SKIP(9);
7953
7954 SKIP_BLANKS;
7955
7956 /*
7957 * Parse the DOCTYPE name.
7958 */
7959 name = xmlParseName(ctxt);
7960 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007961 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7962 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007963 }
7964 ctxt->intSubName = name;
7965
7966 SKIP_BLANKS;
7967
7968 /*
7969 * Check for SystemID and ExternalID
7970 */
7971 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7972
7973 if ((URI != NULL) || (ExternalID != NULL)) {
7974 ctxt->hasExternalSubset = 1;
7975 }
7976 ctxt->extSubURI = URI;
7977 ctxt->extSubSystem = ExternalID;
7978
7979 SKIP_BLANKS;
7980
7981 /*
7982 * Create and update the internal subset.
7983 */
7984 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7985 (!ctxt->disableSAX))
7986 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7987
7988 /*
7989 * Is there any internal subset declarations ?
7990 * they are handled separately in xmlParseInternalSubset()
7991 */
7992 if (RAW == '[')
7993 return;
7994
7995 /*
7996 * We should be at the end of the DOCTYPE declaration.
7997 */
7998 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007999 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008000 }
8001 NEXT;
8002}
8003
8004/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008005 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008006 * @ctxt: an XML parser context
8007 *
8008 * parse the internal subset declaration
8009 *
8010 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8011 */
8012
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008013static void
Owen Taylor3473f882001-02-23 17:55:21 +00008014xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8015 /*
8016 * Is there any DTD definition ?
8017 */
8018 if (RAW == '[') {
8019 ctxt->instate = XML_PARSER_DTD;
8020 NEXT;
8021 /*
8022 * Parse the succession of Markup declarations and
8023 * PEReferences.
8024 * Subsequence (markupdecl | PEReference | S)*
8025 */
8026 while (RAW != ']') {
8027 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008028 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008029
8030 SKIP_BLANKS;
8031 xmlParseMarkupDecl(ctxt);
8032 xmlParsePEReference(ctxt);
8033
8034 /*
8035 * Pop-up of finished entities.
8036 */
8037 while ((RAW == 0) && (ctxt->inputNr > 1))
8038 xmlPopInput(ctxt);
8039
8040 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008041 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008042 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008043 break;
8044 }
8045 }
8046 if (RAW == ']') {
8047 NEXT;
8048 SKIP_BLANKS;
8049 }
8050 }
8051
8052 /*
8053 * We should be at the end of the DOCTYPE declaration.
8054 */
8055 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008056 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008057 }
8058 NEXT;
8059}
8060
Daniel Veillard81273902003-09-30 00:43:48 +00008061#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008062/**
8063 * xmlParseAttribute:
8064 * @ctxt: an XML parser context
8065 * @value: a xmlChar ** used to store the value of the attribute
8066 *
8067 * parse an attribute
8068 *
8069 * [41] Attribute ::= Name Eq AttValue
8070 *
8071 * [ WFC: No External Entity References ]
8072 * Attribute values cannot contain direct or indirect entity references
8073 * to external entities.
8074 *
8075 * [ WFC: No < in Attribute Values ]
8076 * The replacement text of any entity referred to directly or indirectly in
8077 * an attribute value (other than "&lt;") must not contain a <.
8078 *
8079 * [ VC: Attribute Value Type ]
8080 * The attribute must have been declared; the value must be of the type
8081 * declared for it.
8082 *
8083 * [25] Eq ::= S? '=' S?
8084 *
8085 * With namespace:
8086 *
8087 * [NS 11] Attribute ::= QName Eq AttValue
8088 *
8089 * Also the case QName == xmlns:??? is handled independently as a namespace
8090 * definition.
8091 *
8092 * Returns the attribute name, and the value in *value.
8093 */
8094
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008095const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008096xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008097 const xmlChar *name;
8098 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008099
8100 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008101 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008102 name = xmlParseName(ctxt);
8103 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008104 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008105 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008106 return(NULL);
8107 }
8108
8109 /*
8110 * read the value
8111 */
8112 SKIP_BLANKS;
8113 if (RAW == '=') {
8114 NEXT;
8115 SKIP_BLANKS;
8116 val = xmlParseAttValue(ctxt);
8117 ctxt->instate = XML_PARSER_CONTENT;
8118 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008119 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008120 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008121 return(NULL);
8122 }
8123
8124 /*
8125 * Check that xml:lang conforms to the specification
8126 * No more registered as an error, just generate a warning now
8127 * since this was deprecated in XML second edition
8128 */
8129 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8130 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008131 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8132 "Malformed value for xml:lang : %s\n",
8133 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008134 }
8135 }
8136
8137 /*
8138 * Check that xml:space conforms to the specification
8139 */
8140 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8141 if (xmlStrEqual(val, BAD_CAST "default"))
8142 *(ctxt->space) = 0;
8143 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8144 *(ctxt->space) = 1;
8145 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008146 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008147"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008148 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008149 }
8150 }
8151
8152 *value = val;
8153 return(name);
8154}
8155
8156/**
8157 * xmlParseStartTag:
8158 * @ctxt: an XML parser context
8159 *
8160 * parse a start of tag either for rule element or
8161 * EmptyElement. In both case we don't parse the tag closing chars.
8162 *
8163 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8164 *
8165 * [ WFC: Unique Att Spec ]
8166 * No attribute name may appear more than once in the same start-tag or
8167 * empty-element tag.
8168 *
8169 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8170 *
8171 * [ WFC: Unique Att Spec ]
8172 * No attribute name may appear more than once in the same start-tag or
8173 * empty-element tag.
8174 *
8175 * With namespace:
8176 *
8177 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8178 *
8179 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8180 *
8181 * Returns the element name parsed
8182 */
8183
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008184const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008185xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008186 const xmlChar *name;
8187 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008188 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008189 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008190 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008191 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008192 int i;
8193
8194 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008195 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008196
8197 name = xmlParseName(ctxt);
8198 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008199 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008200 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008201 return(NULL);
8202 }
8203
8204 /*
8205 * Now parse the attributes, it ends up with the ending
8206 *
8207 * (S Attribute)* S?
8208 */
8209 SKIP_BLANKS;
8210 GROW;
8211
Daniel Veillard21a0f912001-02-25 19:54:14 +00008212 while ((RAW != '>') &&
8213 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008214 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008215 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008216 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008217
8218 attname = xmlParseAttribute(ctxt, &attvalue);
8219 if ((attname != NULL) && (attvalue != NULL)) {
8220 /*
8221 * [ WFC: Unique Att Spec ]
8222 * No attribute name may appear more than once in the same
8223 * start-tag or empty-element tag.
8224 */
8225 for (i = 0; i < nbatts;i += 2) {
8226 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008227 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008228 xmlFree(attvalue);
8229 goto failed;
8230 }
8231 }
Owen Taylor3473f882001-02-23 17:55:21 +00008232 /*
8233 * Add the pair to atts
8234 */
8235 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008236 maxatts = 22; /* allow for 10 attrs by default */
8237 atts = (const xmlChar **)
8238 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008239 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008240 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008241 if (attvalue != NULL)
8242 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008243 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008244 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008245 ctxt->atts = atts;
8246 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008247 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008248 const xmlChar **n;
8249
Owen Taylor3473f882001-02-23 17:55:21 +00008250 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008251 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008252 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008253 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008254 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008255 if (attvalue != NULL)
8256 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008257 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008258 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008259 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008260 ctxt->atts = atts;
8261 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008262 }
8263 atts[nbatts++] = attname;
8264 atts[nbatts++] = attvalue;
8265 atts[nbatts] = NULL;
8266 atts[nbatts + 1] = NULL;
8267 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008268 if (attvalue != NULL)
8269 xmlFree(attvalue);
8270 }
8271
8272failed:
8273
Daniel Veillard3772de32002-12-17 10:31:45 +00008274 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008275 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8276 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008277 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008278 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8279 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008280 }
8281 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008282 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8283 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008284 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8285 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008286 break;
8287 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008288 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008289 GROW;
8290 }
8291
8292 /*
8293 * SAX: Start of Element !
8294 */
8295 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008296 (!ctxt->disableSAX)) {
8297 if (nbatts > 0)
8298 ctxt->sax->startElement(ctxt->userData, name, atts);
8299 else
8300 ctxt->sax->startElement(ctxt->userData, name, NULL);
8301 }
Owen Taylor3473f882001-02-23 17:55:21 +00008302
8303 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008304 /* Free only the content strings */
8305 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008306 if (atts[i] != NULL)
8307 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008308 }
8309 return(name);
8310}
8311
8312/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008313 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008314 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008315 * @line: line of the start tag
8316 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008317 *
8318 * parse an end of tag
8319 *
8320 * [42] ETag ::= '</' Name S? '>'
8321 *
8322 * With namespace
8323 *
8324 * [NS 9] ETag ::= '</' QName S? '>'
8325 */
8326
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008327static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008328xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008329 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008330
8331 GROW;
8332 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008333 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008334 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008335 return;
8336 }
8337 SKIP(2);
8338
Daniel Veillard46de64e2002-05-29 08:21:33 +00008339 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008340
8341 /*
8342 * We should definitely be at the ending "S? '>'" part
8343 */
8344 GROW;
8345 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008346 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008347 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008348 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008349 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008350
8351 /*
8352 * [ WFC: Element Type Match ]
8353 * The Name in an element's end-tag must match the element type in the
8354 * start-tag.
8355 *
8356 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008357 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008358 if (name == NULL) name = BAD_CAST "unparseable";
8359 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008360 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008361 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008362 }
8363
8364 /*
8365 * SAX: End of Tag
8366 */
8367 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8368 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008369 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008370
Daniel Veillarde57ec792003-09-10 10:50:59 +00008371 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008372 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008373 return;
8374}
8375
8376/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008377 * xmlParseEndTag:
8378 * @ctxt: an XML parser context
8379 *
8380 * parse an end of tag
8381 *
8382 * [42] ETag ::= '</' Name S? '>'
8383 *
8384 * With namespace
8385 *
8386 * [NS 9] ETag ::= '</' QName S? '>'
8387 */
8388
8389void
8390xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008391 xmlParseEndTag1(ctxt, 0);
8392}
Daniel Veillard81273902003-09-30 00:43:48 +00008393#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008394
8395/************************************************************************
8396 * *
8397 * SAX 2 specific operations *
8398 * *
8399 ************************************************************************/
8400
Daniel Veillard0fb18932003-09-07 09:14:37 +00008401/*
8402 * xmlGetNamespace:
8403 * @ctxt: an XML parser context
8404 * @prefix: the prefix to lookup
8405 *
8406 * Lookup the namespace name for the @prefix (which ca be NULL)
8407 * The prefix must come from the @ctxt->dict dictionnary
8408 *
8409 * Returns the namespace name or NULL if not bound
8410 */
8411static const xmlChar *
8412xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8413 int i;
8414
Daniel Veillarde57ec792003-09-10 10:50:59 +00008415 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008416 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008417 if (ctxt->nsTab[i] == prefix) {
8418 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8419 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008420 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008421 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008422 return(NULL);
8423}
8424
8425/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008426 * xmlParseQName:
8427 * @ctxt: an XML parser context
8428 * @prefix: pointer to store the prefix part
8429 *
8430 * parse an XML Namespace QName
8431 *
8432 * [6] QName ::= (Prefix ':')? LocalPart
8433 * [7] Prefix ::= NCName
8434 * [8] LocalPart ::= NCName
8435 *
8436 * Returns the Name parsed or NULL
8437 */
8438
8439static const xmlChar *
8440xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8441 const xmlChar *l, *p;
8442
8443 GROW;
8444
8445 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008446 if (l == NULL) {
8447 if (CUR == ':') {
8448 l = xmlParseName(ctxt);
8449 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008450 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8451 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008452 *prefix = NULL;
8453 return(l);
8454 }
8455 }
8456 return(NULL);
8457 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008458 if (CUR == ':') {
8459 NEXT;
8460 p = l;
8461 l = xmlParseNCName(ctxt);
8462 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008463 xmlChar *tmp;
8464
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008465 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8466 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008467 l = xmlParseNmtoken(ctxt);
8468 if (l == NULL)
8469 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8470 else {
8471 tmp = xmlBuildQName(l, p, NULL, 0);
8472 xmlFree((char *)l);
8473 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008474 p = xmlDictLookup(ctxt->dict, tmp, -1);
8475 if (tmp != NULL) xmlFree(tmp);
8476 *prefix = NULL;
8477 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008478 }
8479 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008480 xmlChar *tmp;
8481
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008482 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8483 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008484 NEXT;
8485 tmp = (xmlChar *) xmlParseName(ctxt);
8486 if (tmp != NULL) {
8487 tmp = xmlBuildQName(tmp, l, NULL, 0);
8488 l = xmlDictLookup(ctxt->dict, tmp, -1);
8489 if (tmp != NULL) xmlFree(tmp);
8490 *prefix = p;
8491 return(l);
8492 }
8493 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8494 l = xmlDictLookup(ctxt->dict, tmp, -1);
8495 if (tmp != NULL) xmlFree(tmp);
8496 *prefix = p;
8497 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008498 }
8499 *prefix = p;
8500 } else
8501 *prefix = NULL;
8502 return(l);
8503}
8504
8505/**
8506 * xmlParseQNameAndCompare:
8507 * @ctxt: an XML parser context
8508 * @name: the localname
8509 * @prefix: the prefix, if any.
8510 *
8511 * parse an XML name and compares for match
8512 * (specialized for endtag parsing)
8513 *
8514 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8515 * and the name for mismatch
8516 */
8517
8518static const xmlChar *
8519xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8520 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008521 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008522 const xmlChar *in;
8523 const xmlChar *ret;
8524 const xmlChar *prefix2;
8525
8526 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8527
8528 GROW;
8529 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008530
Daniel Veillard0fb18932003-09-07 09:14:37 +00008531 cmp = prefix;
8532 while (*in != 0 && *in == *cmp) {
8533 ++in;
8534 ++cmp;
8535 }
8536 if ((*cmp == 0) && (*in == ':')) {
8537 in++;
8538 cmp = name;
8539 while (*in != 0 && *in == *cmp) {
8540 ++in;
8541 ++cmp;
8542 }
William M. Brack76e95df2003-10-18 16:20:14 +00008543 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008544 /* success */
8545 ctxt->input->cur = in;
8546 return((const xmlChar*) 1);
8547 }
8548 }
8549 /*
8550 * all strings coms from the dictionary, equality can be done directly
8551 */
8552 ret = xmlParseQName (ctxt, &prefix2);
8553 if ((ret == name) && (prefix == prefix2))
8554 return((const xmlChar*) 1);
8555 return ret;
8556}
8557
8558/**
8559 * xmlParseAttValueInternal:
8560 * @ctxt: an XML parser context
8561 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008562 * @alloc: whether the attribute was reallocated as a new string
8563 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008564 *
8565 * parse a value for an attribute.
8566 * NOTE: if no normalization is needed, the routine will return pointers
8567 * directly from the data buffer.
8568 *
8569 * 3.3.3 Attribute-Value Normalization:
8570 * Before the value of an attribute is passed to the application or
8571 * checked for validity, the XML processor must normalize it as follows:
8572 * - a character reference is processed by appending the referenced
8573 * character to the attribute value
8574 * - an entity reference is processed by recursively processing the
8575 * replacement text of the entity
8576 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8577 * appending #x20 to the normalized value, except that only a single
8578 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8579 * parsed entity or the literal entity value of an internal parsed entity
8580 * - other characters are processed by appending them to the normalized value
8581 * If the declared value is not CDATA, then the XML processor must further
8582 * process the normalized attribute value by discarding any leading and
8583 * trailing space (#x20) characters, and by replacing sequences of space
8584 * (#x20) characters by a single space (#x20) character.
8585 * All attributes for which no declaration has been read should be treated
8586 * by a non-validating parser as if declared CDATA.
8587 *
8588 * Returns the AttValue parsed or NULL. The value has to be freed by the
8589 * caller if it was copied, this can be detected by val[*len] == 0.
8590 */
8591
8592static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008593xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8594 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008595{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008596 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008597 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008598 xmlChar *ret = NULL;
8599
8600 GROW;
8601 in = (xmlChar *) CUR_PTR;
8602 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008603 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008604 return (NULL);
8605 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008606 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008607
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008608 /*
8609 * try to handle in this routine the most common case where no
8610 * allocation of a new string is required and where content is
8611 * pure ASCII.
8612 */
8613 limit = *in++;
8614 end = ctxt->input->end;
8615 start = in;
8616 if (in >= end) {
8617 const xmlChar *oldbase = ctxt->input->base;
8618 GROW;
8619 if (oldbase != ctxt->input->base) {
8620 long delta = ctxt->input->base - oldbase;
8621 start = start + delta;
8622 in = in + delta;
8623 }
8624 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008625 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008626 if (normalize) {
8627 /*
8628 * Skip any leading spaces
8629 */
8630 while ((in < end) && (*in != limit) &&
8631 ((*in == 0x20) || (*in == 0x9) ||
8632 (*in == 0xA) || (*in == 0xD))) {
8633 in++;
8634 start = in;
8635 if (in >= end) {
8636 const xmlChar *oldbase = ctxt->input->base;
8637 GROW;
8638 if (oldbase != ctxt->input->base) {
8639 long delta = ctxt->input->base - oldbase;
8640 start = start + delta;
8641 in = in + delta;
8642 }
8643 end = ctxt->input->end;
8644 }
8645 }
8646 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8647 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8648 if ((*in++ == 0x20) && (*in == 0x20)) break;
8649 if (in >= end) {
8650 const xmlChar *oldbase = ctxt->input->base;
8651 GROW;
8652 if (oldbase != ctxt->input->base) {
8653 long delta = ctxt->input->base - oldbase;
8654 start = start + delta;
8655 in = in + delta;
8656 }
8657 end = ctxt->input->end;
8658 }
8659 }
8660 last = in;
8661 /*
8662 * skip the trailing blanks
8663 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008664 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008665 while ((in < end) && (*in != limit) &&
8666 ((*in == 0x20) || (*in == 0x9) ||
8667 (*in == 0xA) || (*in == 0xD))) {
8668 in++;
8669 if (in >= end) {
8670 const xmlChar *oldbase = ctxt->input->base;
8671 GROW;
8672 if (oldbase != ctxt->input->base) {
8673 long delta = ctxt->input->base - oldbase;
8674 start = start + delta;
8675 in = in + delta;
8676 last = last + delta;
8677 }
8678 end = ctxt->input->end;
8679 }
8680 }
8681 if (*in != limit) goto need_complex;
8682 } else {
8683 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8684 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8685 in++;
8686 if (in >= end) {
8687 const xmlChar *oldbase = ctxt->input->base;
8688 GROW;
8689 if (oldbase != ctxt->input->base) {
8690 long delta = ctxt->input->base - oldbase;
8691 start = start + delta;
8692 in = in + delta;
8693 }
8694 end = ctxt->input->end;
8695 }
8696 }
8697 last = in;
8698 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008699 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008700 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008701 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008702 *len = last - start;
8703 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008704 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008705 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008706 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008707 }
8708 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008709 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008710 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008711need_complex:
8712 if (alloc) *alloc = 1;
8713 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008714}
8715
8716/**
8717 * xmlParseAttribute2:
8718 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008719 * @pref: the element prefix
8720 * @elem: the element name
8721 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008722 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008723 * @len: an int * to save the length of the attribute
8724 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008725 *
8726 * parse an attribute in the new SAX2 framework.
8727 *
8728 * Returns the attribute name, and the value in *value, .
8729 */
8730
8731static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008732xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008733 const xmlChar * pref, const xmlChar * elem,
8734 const xmlChar ** prefix, xmlChar ** value,
8735 int *len, int *alloc)
8736{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008737 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008738 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008739 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008740
8741 *value = NULL;
8742 GROW;
8743 name = xmlParseQName(ctxt, prefix);
8744 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008745 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8746 "error parsing attribute name\n");
8747 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008748 }
8749
8750 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008751 * get the type if needed
8752 */
8753 if (ctxt->attsSpecial != NULL) {
8754 int type;
8755
8756 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008757 pref, elem, *prefix, name);
8758 if (type != 0)
8759 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008760 }
8761
8762 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008763 * read the value
8764 */
8765 SKIP_BLANKS;
8766 if (RAW == '=') {
8767 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008768 SKIP_BLANKS;
8769 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8770 if (normalize) {
8771 /*
8772 * Sometimes a second normalisation pass for spaces is needed
8773 * but that only happens if charrefs or entities refernces
8774 * have been used in the attribute value, i.e. the attribute
8775 * value have been extracted in an allocated string already.
8776 */
8777 if (*alloc) {
8778 const xmlChar *val2;
8779
8780 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008781 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008782 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008783 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008784 }
8785 }
8786 }
8787 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008788 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008789 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8790 "Specification mandate value for attribute %s\n",
8791 name);
8792 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008793 }
8794
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008795 if (*prefix == ctxt->str_xml) {
8796 /*
8797 * Check that xml:lang conforms to the specification
8798 * No more registered as an error, just generate a warning now
8799 * since this was deprecated in XML second edition
8800 */
8801 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8802 internal_val = xmlStrndup(val, *len);
8803 if (!xmlCheckLanguageID(internal_val)) {
8804 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8805 "Malformed value for xml:lang : %s\n",
8806 internal_val, NULL);
8807 }
8808 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008809
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008810 /*
8811 * Check that xml:space conforms to the specification
8812 */
8813 if (xmlStrEqual(name, BAD_CAST "space")) {
8814 internal_val = xmlStrndup(val, *len);
8815 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8816 *(ctxt->space) = 0;
8817 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8818 *(ctxt->space) = 1;
8819 else {
8820 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8821 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8822 internal_val, NULL);
8823 }
8824 }
8825 if (internal_val) {
8826 xmlFree(internal_val);
8827 }
8828 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008829
8830 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008831 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008832}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008833/**
8834 * xmlParseStartTag2:
8835 * @ctxt: an XML parser context
8836 *
8837 * parse a start of tag either for rule element or
8838 * EmptyElement. In both case we don't parse the tag closing chars.
8839 * This routine is called when running SAX2 parsing
8840 *
8841 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8842 *
8843 * [ WFC: Unique Att Spec ]
8844 * No attribute name may appear more than once in the same start-tag or
8845 * empty-element tag.
8846 *
8847 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8848 *
8849 * [ WFC: Unique Att Spec ]
8850 * No attribute name may appear more than once in the same start-tag or
8851 * empty-element tag.
8852 *
8853 * With namespace:
8854 *
8855 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8856 *
8857 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8858 *
8859 * Returns the element name parsed
8860 */
8861
8862static const xmlChar *
8863xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008864 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008865 const xmlChar *localname;
8866 const xmlChar *prefix;
8867 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008868 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008869 const xmlChar *nsname;
8870 xmlChar *attvalue;
8871 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008872 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008873 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008874 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008875 const xmlChar *base;
8876 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008877 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008878
8879 if (RAW != '<') return(NULL);
8880 NEXT1;
8881
8882 /*
8883 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8884 * point since the attribute values may be stored as pointers to
8885 * the buffer and calling SHRINK would destroy them !
8886 * The Shrinking is only possible once the full set of attribute
8887 * callbacks have been done.
8888 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008889reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008890 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008891 base = ctxt->input->base;
8892 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008893 oldline = ctxt->input->line;
8894 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008895 nbatts = 0;
8896 nratts = 0;
8897 nbdef = 0;
8898 nbNs = 0;
8899 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008900 /* Forget any namespaces added during an earlier parse of this element. */
8901 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008902
8903 localname = xmlParseQName(ctxt, &prefix);
8904 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008905 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8906 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008907 return(NULL);
8908 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008909 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008910
8911 /*
8912 * Now parse the attributes, it ends up with the ending
8913 *
8914 * (S Attribute)* S?
8915 */
8916 SKIP_BLANKS;
8917 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008918 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008919
8920 while ((RAW != '>') &&
8921 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008922 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008923 const xmlChar *q = CUR_PTR;
8924 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008925 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008926
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008927 attname = xmlParseAttribute2(ctxt, prefix, localname,
8928 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008929 if (ctxt->input->base != base) {
8930 if ((attvalue != NULL) && (alloc != 0))
8931 xmlFree(attvalue);
8932 attvalue = NULL;
8933 goto base_changed;
8934 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008935 if ((attname != NULL) && (attvalue != NULL)) {
8936 if (len < 0) len = xmlStrlen(attvalue);
8937 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008938 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8939 xmlURIPtr uri;
8940
8941 if (*URL != 0) {
8942 uri = xmlParseURI((const char *) URL);
8943 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008944 xmlNsErr(ctxt, XML_WAR_NS_URI,
8945 "xmlns: '%s' is not a valid URI\n",
8946 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008947 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008948 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008949 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8950 "xmlns: URI %s is not absolute\n",
8951 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008952 }
8953 xmlFreeURI(uri);
8954 }
Daniel Veillard37334572008-07-31 08:20:02 +00008955 if (URL == ctxt->str_xml_ns) {
8956 if (attname != ctxt->str_xml) {
8957 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8958 "xml namespace URI cannot be the default namespace\n",
8959 NULL, NULL, NULL);
8960 }
8961 goto skip_default_ns;
8962 }
8963 if ((len == 29) &&
8964 (xmlStrEqual(URL,
8965 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8966 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8967 "reuse of the xmlns namespace name is forbidden\n",
8968 NULL, NULL, NULL);
8969 goto skip_default_ns;
8970 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008971 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008972 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008973 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008974 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008975 for (j = 1;j <= nbNs;j++)
8976 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8977 break;
8978 if (j <= nbNs)
8979 xmlErrAttributeDup(ctxt, NULL, attname);
8980 else
8981 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008982skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008983 if (alloc != 0) xmlFree(attvalue);
8984 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008985 continue;
8986 }
8987 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008988 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8989 xmlURIPtr uri;
8990
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008991 if (attname == ctxt->str_xml) {
8992 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008993 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8994 "xml namespace prefix mapped to wrong URI\n",
8995 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008996 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008997 /*
8998 * Do not keep a namespace definition node
8999 */
Daniel Veillard37334572008-07-31 08:20:02 +00009000 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009001 }
Daniel Veillard37334572008-07-31 08:20:02 +00009002 if (URL == ctxt->str_xml_ns) {
9003 if (attname != ctxt->str_xml) {
9004 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9005 "xml namespace URI mapped to wrong prefix\n",
9006 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009007 }
Daniel Veillard37334572008-07-31 08:20:02 +00009008 goto skip_ns;
9009 }
9010 if (attname == ctxt->str_xmlns) {
9011 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9012 "redefinition of the xmlns prefix is forbidden\n",
9013 NULL, NULL, NULL);
9014 goto skip_ns;
9015 }
9016 if ((len == 29) &&
9017 (xmlStrEqual(URL,
9018 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9019 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9020 "reuse of the xmlns namespace name is forbidden\n",
9021 NULL, NULL, NULL);
9022 goto skip_ns;
9023 }
9024 if ((URL == NULL) || (URL[0] == 0)) {
9025 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9026 "xmlns:%s: Empty XML namespace is not allowed\n",
9027 attname, NULL, NULL);
9028 goto skip_ns;
9029 } else {
9030 uri = xmlParseURI((const char *) URL);
9031 if (uri == NULL) {
9032 xmlNsErr(ctxt, XML_WAR_NS_URI,
9033 "xmlns:%s: '%s' is not a valid URI\n",
9034 attname, URL, NULL);
9035 } else {
9036 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9037 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9038 "xmlns:%s: URI %s is not absolute\n",
9039 attname, URL, NULL);
9040 }
9041 xmlFreeURI(uri);
9042 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009043 }
9044
Daniel Veillard0fb18932003-09-07 09:14:37 +00009045 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009046 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009047 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009048 for (j = 1;j <= nbNs;j++)
9049 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9050 break;
9051 if (j <= nbNs)
9052 xmlErrAttributeDup(ctxt, aprefix, attname);
9053 else
9054 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009055skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009056 if (alloc != 0) xmlFree(attvalue);
9057 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009058 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009059 continue;
9060 }
9061
9062 /*
9063 * Add the pair to atts
9064 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009065 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9066 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009067 if (attvalue[len] == 0)
9068 xmlFree(attvalue);
9069 goto failed;
9070 }
9071 maxatts = ctxt->maxatts;
9072 atts = ctxt->atts;
9073 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009074 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009075 atts[nbatts++] = attname;
9076 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009077 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009078 atts[nbatts++] = attvalue;
9079 attvalue += len;
9080 atts[nbatts++] = attvalue;
9081 /*
9082 * tag if some deallocation is needed
9083 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009084 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009085 } else {
9086 if ((attvalue != NULL) && (attvalue[len] == 0))
9087 xmlFree(attvalue);
9088 }
9089
Daniel Veillard37334572008-07-31 08:20:02 +00009090failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009091
9092 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00009093 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009094 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9095 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009096 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009097 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9098 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009099 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009100 }
9101 SKIP_BLANKS;
9102 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9103 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009104 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009105 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009106 break;
9107 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009108 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009109 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009110 }
9111
Daniel Veillard0fb18932003-09-07 09:14:37 +00009112 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009113 * The attributes defaulting
9114 */
9115 if (ctxt->attsDefault != NULL) {
9116 xmlDefAttrsPtr defaults;
9117
9118 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9119 if (defaults != NULL) {
9120 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009121 attname = defaults->values[5 * i];
9122 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009123
9124 /*
9125 * special work for namespaces defaulted defs
9126 */
9127 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9128 /*
9129 * check that it's not a defined namespace
9130 */
9131 for (j = 1;j <= nbNs;j++)
9132 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9133 break;
9134 if (j <= nbNs) continue;
9135
9136 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009137 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009138 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009139 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009140 nbNs++;
9141 }
9142 } else if (aprefix == ctxt->str_xmlns) {
9143 /*
9144 * check that it's not a defined namespace
9145 */
9146 for (j = 1;j <= nbNs;j++)
9147 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9148 break;
9149 if (j <= nbNs) continue;
9150
9151 nsname = xmlGetNamespace(ctxt, attname);
9152 if (nsname != defaults->values[2]) {
9153 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009154 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009155 nbNs++;
9156 }
9157 } else {
9158 /*
9159 * check that it's not a defined attribute
9160 */
9161 for (j = 0;j < nbatts;j+=5) {
9162 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9163 break;
9164 }
9165 if (j < nbatts) continue;
9166
9167 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9168 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009169 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009170 }
9171 maxatts = ctxt->maxatts;
9172 atts = ctxt->atts;
9173 }
9174 atts[nbatts++] = attname;
9175 atts[nbatts++] = aprefix;
9176 if (aprefix == NULL)
9177 atts[nbatts++] = NULL;
9178 else
9179 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009180 atts[nbatts++] = defaults->values[5 * i + 2];
9181 atts[nbatts++] = defaults->values[5 * i + 3];
9182 if ((ctxt->standalone == 1) &&
9183 (defaults->values[5 * i + 4] != NULL)) {
9184 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9185 "standalone: attribute %s on %s defaulted from external subset\n",
9186 attname, localname);
9187 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009188 nbdef++;
9189 }
9190 }
9191 }
9192 }
9193
Daniel Veillarde70c8772003-11-25 07:21:18 +00009194 /*
9195 * The attributes checkings
9196 */
9197 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009198 /*
9199 * The default namespace does not apply to attribute names.
9200 */
9201 if (atts[i + 1] != NULL) {
9202 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9203 if (nsname == NULL) {
9204 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9205 "Namespace prefix %s for %s on %s is not defined\n",
9206 atts[i + 1], atts[i], localname);
9207 }
9208 atts[i + 2] = nsname;
9209 } else
9210 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009211 /*
9212 * [ WFC: Unique Att Spec ]
9213 * No attribute name may appear more than once in the same
9214 * start-tag or empty-element tag.
9215 * As extended by the Namespace in XML REC.
9216 */
9217 for (j = 0; j < i;j += 5) {
9218 if (atts[i] == atts[j]) {
9219 if (atts[i+1] == atts[j+1]) {
9220 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9221 break;
9222 }
9223 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9224 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9225 "Namespaced Attribute %s in '%s' redefined\n",
9226 atts[i], nsname, NULL);
9227 break;
9228 }
9229 }
9230 }
9231 }
9232
Daniel Veillarde57ec792003-09-10 10:50:59 +00009233 nsname = xmlGetNamespace(ctxt, prefix);
9234 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009235 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9236 "Namespace prefix %s on %s is not defined\n",
9237 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009238 }
9239 *pref = prefix;
9240 *URI = nsname;
9241
9242 /*
9243 * SAX: Start of Element !
9244 */
9245 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9246 (!ctxt->disableSAX)) {
9247 if (nbNs > 0)
9248 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9249 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9250 nbatts / 5, nbdef, atts);
9251 else
9252 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9253 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9254 }
9255
9256 /*
9257 * Free up attribute allocated strings if needed
9258 */
9259 if (attval != 0) {
9260 for (i = 3,j = 0; j < nratts;i += 5,j++)
9261 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9262 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009263 }
9264
9265 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009266
9267base_changed:
9268 /*
9269 * the attribute strings are valid iif the base didn't changed
9270 */
9271 if (attval != 0) {
9272 for (i = 3,j = 0; j < nratts;i += 5,j++)
9273 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9274 xmlFree((xmlChar *) atts[i]);
9275 }
9276 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009277 ctxt->input->line = oldline;
9278 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009279 if (ctxt->wellFormed == 1) {
9280 goto reparse;
9281 }
9282 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009283}
9284
9285/**
9286 * xmlParseEndTag2:
9287 * @ctxt: an XML parser context
9288 * @line: line of the start tag
9289 * @nsNr: number of namespaces on the start tag
9290 *
9291 * parse an end of tag
9292 *
9293 * [42] ETag ::= '</' Name S? '>'
9294 *
9295 * With namespace
9296 *
9297 * [NS 9] ETag ::= '</' QName S? '>'
9298 */
9299
9300static void
9301xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009302 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009303 const xmlChar *name;
9304
9305 GROW;
9306 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009307 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009308 return;
9309 }
9310 SKIP(2);
9311
William M. Brack13dfa872004-09-18 04:52:08 +00009312 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009313 if (ctxt->input->cur[tlen] == '>') {
9314 ctxt->input->cur += tlen + 1;
9315 goto done;
9316 }
9317 ctxt->input->cur += tlen;
9318 name = (xmlChar*)1;
9319 } else {
9320 if (prefix == NULL)
9321 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9322 else
9323 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9324 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009325
9326 /*
9327 * We should definitely be at the ending "S? '>'" part
9328 */
9329 GROW;
9330 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009331 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009332 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009333 } else
9334 NEXT1;
9335
9336 /*
9337 * [ WFC: Element Type Match ]
9338 * The Name in an element's end-tag must match the element type in the
9339 * start-tag.
9340 *
9341 */
9342 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009343 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009344 if ((line == 0) && (ctxt->node != NULL))
9345 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009346 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009347 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009348 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009349 }
9350
9351 /*
9352 * SAX: End of Tag
9353 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009354done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009355 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9356 (!ctxt->disableSAX))
9357 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9358
Daniel Veillard0fb18932003-09-07 09:14:37 +00009359 spacePop(ctxt);
9360 if (nsNr != 0)
9361 nsPop(ctxt, nsNr);
9362 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009363}
9364
9365/**
Owen Taylor3473f882001-02-23 17:55:21 +00009366 * xmlParseCDSect:
9367 * @ctxt: an XML parser context
9368 *
9369 * Parse escaped pure raw content.
9370 *
9371 * [18] CDSect ::= CDStart CData CDEnd
9372 *
9373 * [19] CDStart ::= '<![CDATA['
9374 *
9375 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9376 *
9377 * [21] CDEnd ::= ']]>'
9378 */
9379void
9380xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9381 xmlChar *buf = NULL;
9382 int len = 0;
9383 int size = XML_PARSER_BUFFER_SIZE;
9384 int r, rl;
9385 int s, sl;
9386 int cur, l;
9387 int count = 0;
9388
Daniel Veillard8f597c32003-10-06 08:19:27 +00009389 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009390 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009391 SKIP(9);
9392 } else
9393 return;
9394
9395 ctxt->instate = XML_PARSER_CDATA_SECTION;
9396 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009397 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009398 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009399 ctxt->instate = XML_PARSER_CONTENT;
9400 return;
9401 }
9402 NEXTL(rl);
9403 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009404 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009405 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009406 ctxt->instate = XML_PARSER_CONTENT;
9407 return;
9408 }
9409 NEXTL(sl);
9410 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009411 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009412 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009413 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009414 return;
9415 }
William M. Brack871611b2003-10-18 04:53:14 +00009416 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009417 ((r != ']') || (s != ']') || (cur != '>'))) {
9418 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009419 xmlChar *tmp;
9420
Owen Taylor3473f882001-02-23 17:55:21 +00009421 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009422 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9423 if (tmp == NULL) {
9424 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009425 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009426 return;
9427 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009428 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009429 }
9430 COPY_BUF(rl,buf,len,r);
9431 r = s;
9432 rl = sl;
9433 s = cur;
9434 sl = l;
9435 count++;
9436 if (count > 50) {
9437 GROW;
9438 count = 0;
9439 }
9440 NEXTL(l);
9441 cur = CUR_CHAR(l);
9442 }
9443 buf[len] = 0;
9444 ctxt->instate = XML_PARSER_CONTENT;
9445 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009446 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009447 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009448 xmlFree(buf);
9449 return;
9450 }
9451 NEXTL(l);
9452
9453 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009454 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009455 */
9456 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9457 if (ctxt->sax->cdataBlock != NULL)
9458 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009459 else if (ctxt->sax->characters != NULL)
9460 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009461 }
9462 xmlFree(buf);
9463}
9464
9465/**
9466 * xmlParseContent:
9467 * @ctxt: an XML parser context
9468 *
9469 * Parse a content:
9470 *
9471 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9472 */
9473
9474void
9475xmlParseContent(xmlParserCtxtPtr ctxt) {
9476 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009477 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009478 ((RAW != '<') || (NXT(1) != '/')) &&
9479 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009480 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009481 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009482 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009483
9484 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009485 * First case : a Processing Instruction.
9486 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009487 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009488 xmlParsePI(ctxt);
9489 }
9490
9491 /*
9492 * Second case : a CDSection
9493 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009494 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009495 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009496 xmlParseCDSect(ctxt);
9497 }
9498
9499 /*
9500 * Third case : a comment
9501 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009502 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009503 (NXT(2) == '-') && (NXT(3) == '-')) {
9504 xmlParseComment(ctxt);
9505 ctxt->instate = XML_PARSER_CONTENT;
9506 }
9507
9508 /*
9509 * Fourth case : a sub-element.
9510 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009511 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009512 xmlParseElement(ctxt);
9513 }
9514
9515 /*
9516 * Fifth case : a reference. If if has not been resolved,
9517 * parsing returns it's Name, create the node
9518 */
9519
Daniel Veillard21a0f912001-02-25 19:54:14 +00009520 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009521 xmlParseReference(ctxt);
9522 }
9523
9524 /*
9525 * Last case, text. Note that References are handled directly.
9526 */
9527 else {
9528 xmlParseCharData(ctxt, 0);
9529 }
9530
9531 GROW;
9532 /*
9533 * Pop-up of finished entities.
9534 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009535 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009536 xmlPopInput(ctxt);
9537 SHRINK;
9538
Daniel Veillardfdc91562002-07-01 21:52:03 +00009539 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009540 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9541 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009542 ctxt->instate = XML_PARSER_EOF;
9543 break;
9544 }
9545 }
9546}
9547
9548/**
9549 * xmlParseElement:
9550 * @ctxt: an XML parser context
9551 *
9552 * parse an XML element, this is highly recursive
9553 *
9554 * [39] element ::= EmptyElemTag | STag content ETag
9555 *
9556 * [ WFC: Element Type Match ]
9557 * The Name in an element's end-tag must match the element type in the
9558 * start-tag.
9559 *
Owen Taylor3473f882001-02-23 17:55:21 +00009560 */
9561
9562void
9563xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009564 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009565 const xmlChar *prefix = NULL;
9566 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009567 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009568 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009569 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009570 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009571
Daniel Veillard8915c152008-08-26 13:05:34 +00009572 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9573 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9574 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9575 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9576 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009577 ctxt->instate = XML_PARSER_EOF;
9578 return;
9579 }
9580
Owen Taylor3473f882001-02-23 17:55:21 +00009581 /* Capture start position */
9582 if (ctxt->record_info) {
9583 node_info.begin_pos = ctxt->input->consumed +
9584 (CUR_PTR - ctxt->input->base);
9585 node_info.begin_line = ctxt->input->line;
9586 }
9587
9588 if (ctxt->spaceNr == 0)
9589 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009590 else if (*ctxt->space == -2)
9591 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009592 else
9593 spacePush(ctxt, *ctxt->space);
9594
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009595 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009596#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009597 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009598#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009599 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009600#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009601 else
9602 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009603#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009604 if (ctxt->instate == XML_PARSER_EOF)
9605 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009606 if (name == NULL) {
9607 spacePop(ctxt);
9608 return;
9609 }
9610 namePush(ctxt, name);
9611 ret = ctxt->node;
9612
Daniel Veillard4432df22003-09-28 18:58:27 +00009613#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009614 /*
9615 * [ VC: Root Element Type ]
9616 * The Name in the document type declaration must match the element
9617 * type of the root element.
9618 */
9619 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9620 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9621 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009622#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009623
9624 /*
9625 * Check for an Empty Element.
9626 */
9627 if ((RAW == '/') && (NXT(1) == '>')) {
9628 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009629 if (ctxt->sax2) {
9630 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9631 (!ctxt->disableSAX))
9632 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009633#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009634 } else {
9635 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9636 (!ctxt->disableSAX))
9637 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009638#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009639 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009640 namePop(ctxt);
9641 spacePop(ctxt);
9642 if (nsNr != ctxt->nsNr)
9643 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009644 if ( ret != NULL && ctxt->record_info ) {
9645 node_info.end_pos = ctxt->input->consumed +
9646 (CUR_PTR - ctxt->input->base);
9647 node_info.end_line = ctxt->input->line;
9648 node_info.node = ret;
9649 xmlParserAddNodeInfo(ctxt, &node_info);
9650 }
9651 return;
9652 }
9653 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009654 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009655 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009656 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9657 "Couldn't find end of Start Tag %s line %d\n",
9658 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009659
9660 /*
9661 * end of parsing of this node.
9662 */
9663 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009664 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009665 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009666 if (nsNr != ctxt->nsNr)
9667 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009668
9669 /*
9670 * Capture end position and add node
9671 */
9672 if ( ret != NULL && ctxt->record_info ) {
9673 node_info.end_pos = ctxt->input->consumed +
9674 (CUR_PTR - ctxt->input->base);
9675 node_info.end_line = ctxt->input->line;
9676 node_info.node = ret;
9677 xmlParserAddNodeInfo(ctxt, &node_info);
9678 }
9679 return;
9680 }
9681
9682 /*
9683 * Parse the content of the element:
9684 */
9685 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009686 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009687 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009688 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009689 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009690
9691 /*
9692 * end of parsing of this node.
9693 */
9694 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009695 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009696 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009697 if (nsNr != ctxt->nsNr)
9698 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009699 return;
9700 }
9701
9702 /*
9703 * parse the end of tag: '</' should be here.
9704 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009705 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009706 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009707 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009708 }
9709#ifdef LIBXML_SAX1_ENABLED
9710 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009711 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009712#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009713
9714 /*
9715 * Capture end position and add node
9716 */
9717 if ( ret != NULL && ctxt->record_info ) {
9718 node_info.end_pos = ctxt->input->consumed +
9719 (CUR_PTR - ctxt->input->base);
9720 node_info.end_line = ctxt->input->line;
9721 node_info.node = ret;
9722 xmlParserAddNodeInfo(ctxt, &node_info);
9723 }
9724}
9725
9726/**
9727 * xmlParseVersionNum:
9728 * @ctxt: an XML parser context
9729 *
9730 * parse the XML version value.
9731 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009732 * [26] VersionNum ::= '1.' [0-9]+
9733 *
9734 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009735 *
9736 * Returns the string giving the XML version number, or NULL
9737 */
9738xmlChar *
9739xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9740 xmlChar *buf = NULL;
9741 int len = 0;
9742 int size = 10;
9743 xmlChar cur;
9744
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009745 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009746 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009747 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009748 return(NULL);
9749 }
9750 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009751 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009752 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009753 return(NULL);
9754 }
9755 buf[len++] = cur;
9756 NEXT;
9757 cur=CUR;
9758 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009759 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009760 return(NULL);
9761 }
9762 buf[len++] = cur;
9763 NEXT;
9764 cur=CUR;
9765 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009766 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009767 xmlChar *tmp;
9768
Owen Taylor3473f882001-02-23 17:55:21 +00009769 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009770 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9771 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009772 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009773 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009774 return(NULL);
9775 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009776 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009777 }
9778 buf[len++] = cur;
9779 NEXT;
9780 cur=CUR;
9781 }
9782 buf[len] = 0;
9783 return(buf);
9784}
9785
9786/**
9787 * xmlParseVersionInfo:
9788 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009789 *
Owen Taylor3473f882001-02-23 17:55:21 +00009790 * parse the XML version.
9791 *
9792 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009793 *
Owen Taylor3473f882001-02-23 17:55:21 +00009794 * [25] Eq ::= S? '=' S?
9795 *
9796 * Returns the version string, e.g. "1.0"
9797 */
9798
9799xmlChar *
9800xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9801 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009802
Daniel Veillarda07050d2003-10-19 14:46:32 +00009803 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009804 SKIP(7);
9805 SKIP_BLANKS;
9806 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009807 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009808 return(NULL);
9809 }
9810 NEXT;
9811 SKIP_BLANKS;
9812 if (RAW == '"') {
9813 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009814 version = xmlParseVersionNum(ctxt);
9815 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009816 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009817 } else
9818 NEXT;
9819 } else if (RAW == '\''){
9820 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009821 version = xmlParseVersionNum(ctxt);
9822 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009823 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009824 } else
9825 NEXT;
9826 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009827 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009828 }
9829 }
9830 return(version);
9831}
9832
9833/**
9834 * xmlParseEncName:
9835 * @ctxt: an XML parser context
9836 *
9837 * parse the XML encoding name
9838 *
9839 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9840 *
9841 * Returns the encoding name value or NULL
9842 */
9843xmlChar *
9844xmlParseEncName(xmlParserCtxtPtr ctxt) {
9845 xmlChar *buf = NULL;
9846 int len = 0;
9847 int size = 10;
9848 xmlChar cur;
9849
9850 cur = CUR;
9851 if (((cur >= 'a') && (cur <= 'z')) ||
9852 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009853 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009854 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009855 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009856 return(NULL);
9857 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009858
Owen Taylor3473f882001-02-23 17:55:21 +00009859 buf[len++] = cur;
9860 NEXT;
9861 cur = CUR;
9862 while (((cur >= 'a') && (cur <= 'z')) ||
9863 ((cur >= 'A') && (cur <= 'Z')) ||
9864 ((cur >= '0') && (cur <= '9')) ||
9865 (cur == '.') || (cur == '_') ||
9866 (cur == '-')) {
9867 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009868 xmlChar *tmp;
9869
Owen Taylor3473f882001-02-23 17:55:21 +00009870 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009871 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9872 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009873 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009874 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009875 return(NULL);
9876 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009877 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009878 }
9879 buf[len++] = cur;
9880 NEXT;
9881 cur = CUR;
9882 if (cur == 0) {
9883 SHRINK;
9884 GROW;
9885 cur = CUR;
9886 }
9887 }
9888 buf[len] = 0;
9889 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009890 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009891 }
9892 return(buf);
9893}
9894
9895/**
9896 * xmlParseEncodingDecl:
9897 * @ctxt: an XML parser context
9898 *
9899 * parse the XML encoding declaration
9900 *
9901 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9902 *
9903 * this setups the conversion filters.
9904 *
9905 * Returns the encoding value or NULL
9906 */
9907
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009908const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009909xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9910 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009911
9912 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009913 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009914 SKIP(8);
9915 SKIP_BLANKS;
9916 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009917 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009918 return(NULL);
9919 }
9920 NEXT;
9921 SKIP_BLANKS;
9922 if (RAW == '"') {
9923 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009924 encoding = xmlParseEncName(ctxt);
9925 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009926 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009927 } else
9928 NEXT;
9929 } else if (RAW == '\''){
9930 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009931 encoding = xmlParseEncName(ctxt);
9932 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009933 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009934 } else
9935 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009936 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009937 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009938 }
Daniel Veillardc62efc82011-05-16 16:03:50 +08009939
9940 /*
9941 * Non standard parsing, allowing the user to ignore encoding
9942 */
9943 if (ctxt->options & XML_PARSE_IGNORE_ENC)
9944 return(encoding);
9945
Daniel Veillard6b621b82003-08-11 15:03:34 +00009946 /*
9947 * UTF-16 encoding stwich has already taken place at this stage,
9948 * more over the little-endian/big-endian selection is already done
9949 */
9950 if ((encoding != NULL) &&
9951 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9952 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009953 /*
9954 * If no encoding was passed to the parser, that we are
9955 * using UTF-16 and no decoder is present i.e. the
9956 * document is apparently UTF-8 compatible, then raise an
9957 * encoding mismatch fatal error
9958 */
9959 if ((ctxt->encoding == NULL) &&
9960 (ctxt->input->buf != NULL) &&
9961 (ctxt->input->buf->encoder == NULL)) {
9962 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9963 "Document labelled UTF-16 but has UTF-8 content\n");
9964 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009965 if (ctxt->encoding != NULL)
9966 xmlFree((xmlChar *) ctxt->encoding);
9967 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009968 }
9969 /*
9970 * UTF-8 encoding is handled natively
9971 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009972 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009973 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9974 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009975 if (ctxt->encoding != NULL)
9976 xmlFree((xmlChar *) ctxt->encoding);
9977 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009978 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009979 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009980 xmlCharEncodingHandlerPtr handler;
9981
9982 if (ctxt->input->encoding != NULL)
9983 xmlFree((xmlChar *) ctxt->input->encoding);
9984 ctxt->input->encoding = encoding;
9985
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009986 handler = xmlFindCharEncodingHandler((const char *) encoding);
9987 if (handler != NULL) {
9988 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009989 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009990 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009991 "Unsupported encoding %s\n", encoding);
9992 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009993 }
9994 }
9995 }
9996 return(encoding);
9997}
9998
9999/**
10000 * xmlParseSDDecl:
10001 * @ctxt: an XML parser context
10002 *
10003 * parse the XML standalone declaration
10004 *
10005 * [32] SDDecl ::= S 'standalone' Eq
10006 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10007 *
10008 * [ VC: Standalone Document Declaration ]
10009 * TODO The standalone document declaration must have the value "no"
10010 * if any external markup declarations contain declarations of:
10011 * - attributes with default values, if elements to which these
10012 * attributes apply appear in the document without specifications
10013 * of values for these attributes, or
10014 * - entities (other than amp, lt, gt, apos, quot), if references
10015 * to those entities appear in the document, or
10016 * - attributes with values subject to normalization, where the
10017 * attribute appears in the document with a value which will change
10018 * as a result of normalization, or
10019 * - element types with element content, if white space occurs directly
10020 * within any instance of those types.
10021 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010022 * Returns:
10023 * 1 if standalone="yes"
10024 * 0 if standalone="no"
10025 * -2 if standalone attribute is missing or invalid
10026 * (A standalone value of -2 means that the XML declaration was found,
10027 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010028 */
10029
10030int
10031xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010032 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010033
10034 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010035 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010036 SKIP(10);
10037 SKIP_BLANKS;
10038 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010039 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010040 return(standalone);
10041 }
10042 NEXT;
10043 SKIP_BLANKS;
10044 if (RAW == '\''){
10045 NEXT;
10046 if ((RAW == 'n') && (NXT(1) == 'o')) {
10047 standalone = 0;
10048 SKIP(2);
10049 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10050 (NXT(2) == 's')) {
10051 standalone = 1;
10052 SKIP(3);
10053 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010054 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010055 }
10056 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010057 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010058 } else
10059 NEXT;
10060 } else if (RAW == '"'){
10061 NEXT;
10062 if ((RAW == 'n') && (NXT(1) == 'o')) {
10063 standalone = 0;
10064 SKIP(2);
10065 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10066 (NXT(2) == 's')) {
10067 standalone = 1;
10068 SKIP(3);
10069 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010070 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010071 }
10072 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010073 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010074 } else
10075 NEXT;
10076 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010077 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010078 }
10079 }
10080 return(standalone);
10081}
10082
10083/**
10084 * xmlParseXMLDecl:
10085 * @ctxt: an XML parser context
10086 *
10087 * parse an XML declaration header
10088 *
10089 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10090 */
10091
10092void
10093xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10094 xmlChar *version;
10095
10096 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010097 * This value for standalone indicates that the document has an
10098 * XML declaration but it does not have a standalone attribute.
10099 * It will be overwritten later if a standalone attribute is found.
10100 */
10101 ctxt->input->standalone = -2;
10102
10103 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010104 * We know that '<?xml' is here.
10105 */
10106 SKIP(5);
10107
William M. Brack76e95df2003-10-18 16:20:14 +000010108 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010109 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10110 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010111 }
10112 SKIP_BLANKS;
10113
10114 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010115 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010116 */
10117 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010118 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010119 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010120 } else {
10121 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10122 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010123 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010124 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010125 if (ctxt->options & XML_PARSE_OLD10) {
10126 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10127 "Unsupported version '%s'\n",
10128 version);
10129 } else {
10130 if ((version[0] == '1') && ((version[1] == '.'))) {
10131 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10132 "Unsupported version '%s'\n",
10133 version, NULL);
10134 } else {
10135 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10136 "Unsupported version '%s'\n",
10137 version);
10138 }
10139 }
Daniel Veillard19840942001-11-29 16:11:38 +000010140 }
10141 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010142 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010143 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010144 }
Owen Taylor3473f882001-02-23 17:55:21 +000010145
10146 /*
10147 * We may have the encoding declaration
10148 */
William M. Brack76e95df2003-10-18 16:20:14 +000010149 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010150 if ((RAW == '?') && (NXT(1) == '>')) {
10151 SKIP(2);
10152 return;
10153 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010154 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010155 }
10156 xmlParseEncodingDecl(ctxt);
10157 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10158 /*
10159 * The XML REC instructs us to stop parsing right here
10160 */
10161 return;
10162 }
10163
10164 /*
10165 * We may have the standalone status.
10166 */
William M. Brack76e95df2003-10-18 16:20:14 +000010167 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010168 if ((RAW == '?') && (NXT(1) == '>')) {
10169 SKIP(2);
10170 return;
10171 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010172 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010173 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010174
10175 /*
10176 * We can grow the input buffer freely at that point
10177 */
10178 GROW;
10179
Owen Taylor3473f882001-02-23 17:55:21 +000010180 SKIP_BLANKS;
10181 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10182
10183 SKIP_BLANKS;
10184 if ((RAW == '?') && (NXT(1) == '>')) {
10185 SKIP(2);
10186 } else if (RAW == '>') {
10187 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010188 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010189 NEXT;
10190 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010191 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010192 MOVETO_ENDTAG(CUR_PTR);
10193 NEXT;
10194 }
10195}
10196
10197/**
10198 * xmlParseMisc:
10199 * @ctxt: an XML parser context
10200 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010201 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010202 *
10203 * [27] Misc ::= Comment | PI | S
10204 */
10205
10206void
10207xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010208 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010209 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010210 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010211 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010212 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010213 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010214 NEXT;
10215 } else
10216 xmlParseComment(ctxt);
10217 }
10218}
10219
10220/**
10221 * xmlParseDocument:
10222 * @ctxt: an XML parser context
10223 *
10224 * parse an XML document (and build a tree if using the standard SAX
10225 * interface).
10226 *
10227 * [1] document ::= prolog element Misc*
10228 *
10229 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10230 *
10231 * Returns 0, -1 in case of error. the parser context is augmented
10232 * as a result of the parsing.
10233 */
10234
10235int
10236xmlParseDocument(xmlParserCtxtPtr ctxt) {
10237 xmlChar start[4];
10238 xmlCharEncoding enc;
10239
10240 xmlInitParser();
10241
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010242 if ((ctxt == NULL) || (ctxt->input == NULL))
10243 return(-1);
10244
Owen Taylor3473f882001-02-23 17:55:21 +000010245 GROW;
10246
10247 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010248 * SAX: detecting the level.
10249 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010250 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010251
10252 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010253 * SAX: beginning of the document processing.
10254 */
10255 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10256 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10257
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010258 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010259 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010260 /*
10261 * Get the 4 first bytes and decode the charset
10262 * if enc != XML_CHAR_ENCODING_NONE
10263 * plug some encoding conversion routines.
10264 */
10265 start[0] = RAW;
10266 start[1] = NXT(1);
10267 start[2] = NXT(2);
10268 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010269 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010270 if (enc != XML_CHAR_ENCODING_NONE) {
10271 xmlSwitchEncoding(ctxt, enc);
10272 }
Owen Taylor3473f882001-02-23 17:55:21 +000010273 }
10274
10275
10276 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010277 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010278 }
10279
10280 /*
10281 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010282 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010283 * than just the first line, unless the amount of data is really
10284 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010285 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010286 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10287 GROW;
10288 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010289 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010290
10291 /*
10292 * Note that we will switch encoding on the fly.
10293 */
10294 xmlParseXMLDecl(ctxt);
10295 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10296 /*
10297 * The XML REC instructs us to stop parsing right here
10298 */
10299 return(-1);
10300 }
10301 ctxt->standalone = ctxt->input->standalone;
10302 SKIP_BLANKS;
10303 } else {
10304 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10305 }
10306 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10307 ctxt->sax->startDocument(ctxt->userData);
10308
10309 /*
10310 * The Misc part of the Prolog
10311 */
10312 GROW;
10313 xmlParseMisc(ctxt);
10314
10315 /*
10316 * Then possibly doc type declaration(s) and more Misc
10317 * (doctypedecl Misc*)?
10318 */
10319 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010320 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010321
10322 ctxt->inSubset = 1;
10323 xmlParseDocTypeDecl(ctxt);
10324 if (RAW == '[') {
10325 ctxt->instate = XML_PARSER_DTD;
10326 xmlParseInternalSubset(ctxt);
10327 }
10328
10329 /*
10330 * Create and update the external subset.
10331 */
10332 ctxt->inSubset = 2;
10333 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10334 (!ctxt->disableSAX))
10335 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10336 ctxt->extSubSystem, ctxt->extSubURI);
10337 ctxt->inSubset = 0;
10338
Daniel Veillardac4118d2008-01-11 05:27:32 +000010339 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010340
10341 ctxt->instate = XML_PARSER_PROLOG;
10342 xmlParseMisc(ctxt);
10343 }
10344
10345 /*
10346 * Time to start parsing the tree itself
10347 */
10348 GROW;
10349 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010350 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10351 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010352 } else {
10353 ctxt->instate = XML_PARSER_CONTENT;
10354 xmlParseElement(ctxt);
10355 ctxt->instate = XML_PARSER_EPILOG;
10356
10357
10358 /*
10359 * The Misc part at the end
10360 */
10361 xmlParseMisc(ctxt);
10362
Daniel Veillard561b7f82002-03-20 21:55:57 +000010363 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010364 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010365 }
10366 ctxt->instate = XML_PARSER_EOF;
10367 }
10368
10369 /*
10370 * SAX: end of the document processing.
10371 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010372 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010373 ctxt->sax->endDocument(ctxt->userData);
10374
Daniel Veillard5997aca2002-03-18 18:36:20 +000010375 /*
10376 * Remove locally kept entity definitions if the tree was not built
10377 */
10378 if ((ctxt->myDoc != NULL) &&
10379 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10380 xmlFreeDoc(ctxt->myDoc);
10381 ctxt->myDoc = NULL;
10382 }
10383
Daniel Veillardae0765b2008-07-31 19:54:59 +000010384 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10385 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10386 if (ctxt->valid)
10387 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10388 if (ctxt->nsWellFormed)
10389 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10390 if (ctxt->options & XML_PARSE_OLD10)
10391 ctxt->myDoc->properties |= XML_DOC_OLD10;
10392 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010393 if (! ctxt->wellFormed) {
10394 ctxt->valid = 0;
10395 return(-1);
10396 }
Owen Taylor3473f882001-02-23 17:55:21 +000010397 return(0);
10398}
10399
10400/**
10401 * xmlParseExtParsedEnt:
10402 * @ctxt: an XML parser context
10403 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010404 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010405 * An external general parsed entity is well-formed if it matches the
10406 * production labeled extParsedEnt.
10407 *
10408 * [78] extParsedEnt ::= TextDecl? content
10409 *
10410 * Returns 0, -1 in case of error. the parser context is augmented
10411 * as a result of the parsing.
10412 */
10413
10414int
10415xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10416 xmlChar start[4];
10417 xmlCharEncoding enc;
10418
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010419 if ((ctxt == NULL) || (ctxt->input == NULL))
10420 return(-1);
10421
Owen Taylor3473f882001-02-23 17:55:21 +000010422 xmlDefaultSAXHandlerInit();
10423
Daniel Veillard309f81d2003-09-23 09:02:53 +000010424 xmlDetectSAX2(ctxt);
10425
Owen Taylor3473f882001-02-23 17:55:21 +000010426 GROW;
10427
10428 /*
10429 * SAX: beginning of the document processing.
10430 */
10431 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10432 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10433
10434 /*
10435 * Get the 4 first bytes and decode the charset
10436 * if enc != XML_CHAR_ENCODING_NONE
10437 * plug some encoding conversion routines.
10438 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010439 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10440 start[0] = RAW;
10441 start[1] = NXT(1);
10442 start[2] = NXT(2);
10443 start[3] = NXT(3);
10444 enc = xmlDetectCharEncoding(start, 4);
10445 if (enc != XML_CHAR_ENCODING_NONE) {
10446 xmlSwitchEncoding(ctxt, enc);
10447 }
Owen Taylor3473f882001-02-23 17:55:21 +000010448 }
10449
10450
10451 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010452 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010453 }
10454
10455 /*
10456 * Check for the XMLDecl in the Prolog.
10457 */
10458 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010459 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010460
10461 /*
10462 * Note that we will switch encoding on the fly.
10463 */
10464 xmlParseXMLDecl(ctxt);
10465 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10466 /*
10467 * The XML REC instructs us to stop parsing right here
10468 */
10469 return(-1);
10470 }
10471 SKIP_BLANKS;
10472 } else {
10473 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10474 }
10475 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10476 ctxt->sax->startDocument(ctxt->userData);
10477
10478 /*
10479 * Doing validity checking on chunk doesn't make sense
10480 */
10481 ctxt->instate = XML_PARSER_CONTENT;
10482 ctxt->validate = 0;
10483 ctxt->loadsubset = 0;
10484 ctxt->depth = 0;
10485
10486 xmlParseContent(ctxt);
10487
10488 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010489 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010490 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010491 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010492 }
10493
10494 /*
10495 * SAX: end of the document processing.
10496 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010497 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010498 ctxt->sax->endDocument(ctxt->userData);
10499
10500 if (! ctxt->wellFormed) return(-1);
10501 return(0);
10502}
10503
Daniel Veillard73b013f2003-09-30 12:36:01 +000010504#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010505/************************************************************************
10506 * *
10507 * Progressive parsing interfaces *
10508 * *
10509 ************************************************************************/
10510
10511/**
10512 * xmlParseLookupSequence:
10513 * @ctxt: an XML parser context
10514 * @first: the first char to lookup
10515 * @next: the next char to lookup or zero
10516 * @third: the next char to lookup or zero
10517 *
10518 * Try to find if a sequence (first, next, third) or just (first next) or
10519 * (first) is available in the input stream.
10520 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10521 * to avoid rescanning sequences of bytes, it DOES change the state of the
10522 * parser, do not use liberally.
10523 *
10524 * Returns the index to the current parsing point if the full sequence
10525 * is available, -1 otherwise.
10526 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010527static int
Owen Taylor3473f882001-02-23 17:55:21 +000010528xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10529 xmlChar next, xmlChar third) {
10530 int base, len;
10531 xmlParserInputPtr in;
10532 const xmlChar *buf;
10533
10534 in = ctxt->input;
10535 if (in == NULL) return(-1);
10536 base = in->cur - in->base;
10537 if (base < 0) return(-1);
10538 if (ctxt->checkIndex > base)
10539 base = ctxt->checkIndex;
10540 if (in->buf == NULL) {
10541 buf = in->base;
10542 len = in->length;
10543 } else {
10544 buf = in->buf->buffer->content;
10545 len = in->buf->buffer->use;
10546 }
10547 /* take into account the sequence length */
10548 if (third) len -= 2;
10549 else if (next) len --;
10550 for (;base < len;base++) {
10551 if (buf[base] == first) {
10552 if (third != 0) {
10553 if ((buf[base + 1] != next) ||
10554 (buf[base + 2] != third)) continue;
10555 } else if (next != 0) {
10556 if (buf[base + 1] != next) continue;
10557 }
10558 ctxt->checkIndex = 0;
10559#ifdef DEBUG_PUSH
10560 if (next == 0)
10561 xmlGenericError(xmlGenericErrorContext,
10562 "PP: lookup '%c' found at %d\n",
10563 first, base);
10564 else if (third == 0)
10565 xmlGenericError(xmlGenericErrorContext,
10566 "PP: lookup '%c%c' found at %d\n",
10567 first, next, base);
10568 else
10569 xmlGenericError(xmlGenericErrorContext,
10570 "PP: lookup '%c%c%c' found at %d\n",
10571 first, next, third, base);
10572#endif
10573 return(base - (in->cur - in->base));
10574 }
10575 }
10576 ctxt->checkIndex = base;
10577#ifdef DEBUG_PUSH
10578 if (next == 0)
10579 xmlGenericError(xmlGenericErrorContext,
10580 "PP: lookup '%c' failed\n", first);
10581 else if (third == 0)
10582 xmlGenericError(xmlGenericErrorContext,
10583 "PP: lookup '%c%c' failed\n", first, next);
10584 else
10585 xmlGenericError(xmlGenericErrorContext,
10586 "PP: lookup '%c%c%c' failed\n", first, next, third);
10587#endif
10588 return(-1);
10589}
10590
10591/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010592 * xmlParseGetLasts:
10593 * @ctxt: an XML parser context
10594 * @lastlt: pointer to store the last '<' from the input
10595 * @lastgt: pointer to store the last '>' from the input
10596 *
10597 * Lookup the last < and > in the current chunk
10598 */
10599static void
10600xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10601 const xmlChar **lastgt) {
10602 const xmlChar *tmp;
10603
10604 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10605 xmlGenericError(xmlGenericErrorContext,
10606 "Internal error: xmlParseGetLasts\n");
10607 return;
10608 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010609 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010610 tmp = ctxt->input->end;
10611 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010612 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010613 if (tmp < ctxt->input->base) {
10614 *lastlt = NULL;
10615 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010616 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010617 *lastlt = tmp;
10618 tmp++;
10619 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10620 if (*tmp == '\'') {
10621 tmp++;
10622 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10623 if (tmp < ctxt->input->end) tmp++;
10624 } else if (*tmp == '"') {
10625 tmp++;
10626 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10627 if (tmp < ctxt->input->end) tmp++;
10628 } else
10629 tmp++;
10630 }
10631 if (tmp < ctxt->input->end)
10632 *lastgt = tmp;
10633 else {
10634 tmp = *lastlt;
10635 tmp--;
10636 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10637 if (tmp >= ctxt->input->base)
10638 *lastgt = tmp;
10639 else
10640 *lastgt = NULL;
10641 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010642 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010643 } else {
10644 *lastlt = NULL;
10645 *lastgt = NULL;
10646 }
10647}
10648/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010649 * xmlCheckCdataPush:
10650 * @cur: pointer to the bock of characters
10651 * @len: length of the block in bytes
10652 *
10653 * Check that the block of characters is okay as SCdata content [20]
10654 *
10655 * Returns the number of bytes to pass if okay, a negative index where an
10656 * UTF-8 error occured otherwise
10657 */
10658static int
10659xmlCheckCdataPush(const xmlChar *utf, int len) {
10660 int ix;
10661 unsigned char c;
10662 int codepoint;
10663
10664 if ((utf == NULL) || (len <= 0))
10665 return(0);
10666
10667 for (ix = 0; ix < len;) { /* string is 0-terminated */
10668 c = utf[ix];
10669 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10670 if (c >= 0x20)
10671 ix++;
10672 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10673 ix++;
10674 else
10675 return(-ix);
10676 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10677 if (ix + 2 > len) return(ix);
10678 if ((utf[ix+1] & 0xc0 ) != 0x80)
10679 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010680 codepoint = (utf[ix] & 0x1f) << 6;
10681 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010682 if (!xmlIsCharQ(codepoint))
10683 return(-ix);
10684 ix += 2;
10685 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10686 if (ix + 3 > len) return(ix);
10687 if (((utf[ix+1] & 0xc0) != 0x80) ||
10688 ((utf[ix+2] & 0xc0) != 0x80))
10689 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010690 codepoint = (utf[ix] & 0xf) << 12;
10691 codepoint |= (utf[ix+1] & 0x3f) << 6;
10692 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010693 if (!xmlIsCharQ(codepoint))
10694 return(-ix);
10695 ix += 3;
10696 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10697 if (ix + 4 > len) return(ix);
10698 if (((utf[ix+1] & 0xc0) != 0x80) ||
10699 ((utf[ix+2] & 0xc0) != 0x80) ||
10700 ((utf[ix+3] & 0xc0) != 0x80))
10701 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010702 codepoint = (utf[ix] & 0x7) << 18;
10703 codepoint |= (utf[ix+1] & 0x3f) << 12;
10704 codepoint |= (utf[ix+2] & 0x3f) << 6;
10705 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010706 if (!xmlIsCharQ(codepoint))
10707 return(-ix);
10708 ix += 4;
10709 } else /* unknown encoding */
10710 return(-ix);
10711 }
10712 return(ix);
10713}
10714
10715/**
Owen Taylor3473f882001-02-23 17:55:21 +000010716 * xmlParseTryOrFinish:
10717 * @ctxt: an XML parser context
10718 * @terminate: last chunk indicator
10719 *
10720 * Try to progress on parsing
10721 *
10722 * Returns zero if no parsing was possible
10723 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010724static int
Owen Taylor3473f882001-02-23 17:55:21 +000010725xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10726 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010727 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010728 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010729 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010730
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010731 if (ctxt->input == NULL)
10732 return(0);
10733
Owen Taylor3473f882001-02-23 17:55:21 +000010734#ifdef DEBUG_PUSH
10735 switch (ctxt->instate) {
10736 case XML_PARSER_EOF:
10737 xmlGenericError(xmlGenericErrorContext,
10738 "PP: try EOF\n"); break;
10739 case XML_PARSER_START:
10740 xmlGenericError(xmlGenericErrorContext,
10741 "PP: try START\n"); break;
10742 case XML_PARSER_MISC:
10743 xmlGenericError(xmlGenericErrorContext,
10744 "PP: try MISC\n");break;
10745 case XML_PARSER_COMMENT:
10746 xmlGenericError(xmlGenericErrorContext,
10747 "PP: try COMMENT\n");break;
10748 case XML_PARSER_PROLOG:
10749 xmlGenericError(xmlGenericErrorContext,
10750 "PP: try PROLOG\n");break;
10751 case XML_PARSER_START_TAG:
10752 xmlGenericError(xmlGenericErrorContext,
10753 "PP: try START_TAG\n");break;
10754 case XML_PARSER_CONTENT:
10755 xmlGenericError(xmlGenericErrorContext,
10756 "PP: try CONTENT\n");break;
10757 case XML_PARSER_CDATA_SECTION:
10758 xmlGenericError(xmlGenericErrorContext,
10759 "PP: try CDATA_SECTION\n");break;
10760 case XML_PARSER_END_TAG:
10761 xmlGenericError(xmlGenericErrorContext,
10762 "PP: try END_TAG\n");break;
10763 case XML_PARSER_ENTITY_DECL:
10764 xmlGenericError(xmlGenericErrorContext,
10765 "PP: try ENTITY_DECL\n");break;
10766 case XML_PARSER_ENTITY_VALUE:
10767 xmlGenericError(xmlGenericErrorContext,
10768 "PP: try ENTITY_VALUE\n");break;
10769 case XML_PARSER_ATTRIBUTE_VALUE:
10770 xmlGenericError(xmlGenericErrorContext,
10771 "PP: try ATTRIBUTE_VALUE\n");break;
10772 case XML_PARSER_DTD:
10773 xmlGenericError(xmlGenericErrorContext,
10774 "PP: try DTD\n");break;
10775 case XML_PARSER_EPILOG:
10776 xmlGenericError(xmlGenericErrorContext,
10777 "PP: try EPILOG\n");break;
10778 case XML_PARSER_PI:
10779 xmlGenericError(xmlGenericErrorContext,
10780 "PP: try PI\n");break;
10781 case XML_PARSER_IGNORE:
10782 xmlGenericError(xmlGenericErrorContext,
10783 "PP: try IGNORE\n");break;
10784 }
10785#endif
10786
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010787 if ((ctxt->input != NULL) &&
10788 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010789 xmlSHRINK(ctxt);
10790 ctxt->checkIndex = 0;
10791 }
10792 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010793
Daniel Veillarda880b122003-04-21 21:36:41 +000010794 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010795 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010796 return(0);
10797
10798
Owen Taylor3473f882001-02-23 17:55:21 +000010799 /*
10800 * Pop-up of finished entities.
10801 */
10802 while ((RAW == 0) && (ctxt->inputNr > 1))
10803 xmlPopInput(ctxt);
10804
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010805 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010806 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010807 avail = ctxt->input->length -
10808 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010809 else {
10810 /*
10811 * If we are operating on converted input, try to flush
10812 * remainng chars to avoid them stalling in the non-converted
10813 * buffer.
10814 */
10815 if ((ctxt->input->buf->raw != NULL) &&
10816 (ctxt->input->buf->raw->use > 0)) {
10817 int base = ctxt->input->base -
10818 ctxt->input->buf->buffer->content;
10819 int current = ctxt->input->cur - ctxt->input->base;
10820
10821 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10822 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10823 ctxt->input->cur = ctxt->input->base + current;
10824 ctxt->input->end =
10825 &ctxt->input->buf->buffer->content[
10826 ctxt->input->buf->buffer->use];
10827 }
10828 avail = ctxt->input->buf->buffer->use -
10829 (ctxt->input->cur - ctxt->input->base);
10830 }
Owen Taylor3473f882001-02-23 17:55:21 +000010831 if (avail < 1)
10832 goto done;
10833 switch (ctxt->instate) {
10834 case XML_PARSER_EOF:
10835 /*
10836 * Document parsing is done !
10837 */
10838 goto done;
10839 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010840 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10841 xmlChar start[4];
10842 xmlCharEncoding enc;
10843
10844 /*
10845 * Very first chars read from the document flow.
10846 */
10847 if (avail < 4)
10848 goto done;
10849
10850 /*
10851 * Get the 4 first bytes and decode the charset
10852 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010853 * plug some encoding conversion routines,
10854 * else xmlSwitchEncoding will set to (default)
10855 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010856 */
10857 start[0] = RAW;
10858 start[1] = NXT(1);
10859 start[2] = NXT(2);
10860 start[3] = NXT(3);
10861 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010862 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010863 break;
10864 }
Owen Taylor3473f882001-02-23 17:55:21 +000010865
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010866 if (avail < 2)
10867 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010868 cur = ctxt->input->cur[0];
10869 next = ctxt->input->cur[1];
10870 if (cur == 0) {
10871 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10872 ctxt->sax->setDocumentLocator(ctxt->userData,
10873 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010874 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010875 ctxt->instate = XML_PARSER_EOF;
10876#ifdef DEBUG_PUSH
10877 xmlGenericError(xmlGenericErrorContext,
10878 "PP: entering EOF\n");
10879#endif
10880 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10881 ctxt->sax->endDocument(ctxt->userData);
10882 goto done;
10883 }
10884 if ((cur == '<') && (next == '?')) {
10885 /* PI or XML decl */
10886 if (avail < 5) return(ret);
10887 if ((!terminate) &&
10888 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10889 return(ret);
10890 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10891 ctxt->sax->setDocumentLocator(ctxt->userData,
10892 &xmlDefaultSAXLocator);
10893 if ((ctxt->input->cur[2] == 'x') &&
10894 (ctxt->input->cur[3] == 'm') &&
10895 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010896 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010897 ret += 5;
10898#ifdef DEBUG_PUSH
10899 xmlGenericError(xmlGenericErrorContext,
10900 "PP: Parsing XML Decl\n");
10901#endif
10902 xmlParseXMLDecl(ctxt);
10903 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10904 /*
10905 * The XML REC instructs us to stop parsing right
10906 * here
10907 */
10908 ctxt->instate = XML_PARSER_EOF;
10909 return(0);
10910 }
10911 ctxt->standalone = ctxt->input->standalone;
10912 if ((ctxt->encoding == NULL) &&
10913 (ctxt->input->encoding != NULL))
10914 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10915 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10916 (!ctxt->disableSAX))
10917 ctxt->sax->startDocument(ctxt->userData);
10918 ctxt->instate = XML_PARSER_MISC;
10919#ifdef DEBUG_PUSH
10920 xmlGenericError(xmlGenericErrorContext,
10921 "PP: entering MISC\n");
10922#endif
10923 } else {
10924 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10925 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10926 (!ctxt->disableSAX))
10927 ctxt->sax->startDocument(ctxt->userData);
10928 ctxt->instate = XML_PARSER_MISC;
10929#ifdef DEBUG_PUSH
10930 xmlGenericError(xmlGenericErrorContext,
10931 "PP: entering MISC\n");
10932#endif
10933 }
10934 } else {
10935 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10936 ctxt->sax->setDocumentLocator(ctxt->userData,
10937 &xmlDefaultSAXLocator);
10938 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010939 if (ctxt->version == NULL) {
10940 xmlErrMemory(ctxt, NULL);
10941 break;
10942 }
Owen Taylor3473f882001-02-23 17:55:21 +000010943 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10944 (!ctxt->disableSAX))
10945 ctxt->sax->startDocument(ctxt->userData);
10946 ctxt->instate = XML_PARSER_MISC;
10947#ifdef DEBUG_PUSH
10948 xmlGenericError(xmlGenericErrorContext,
10949 "PP: entering MISC\n");
10950#endif
10951 }
10952 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010953 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010954 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010955 const xmlChar *prefix = NULL;
10956 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010957 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010958
10959 if ((avail < 2) && (ctxt->inputNr == 1))
10960 goto done;
10961 cur = ctxt->input->cur[0];
10962 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010963 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010964 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010965 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10966 ctxt->sax->endDocument(ctxt->userData);
10967 goto done;
10968 }
10969 if (!terminate) {
10970 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010971 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010972 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010973 goto done;
10974 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10975 goto done;
10976 }
10977 }
10978 if (ctxt->spaceNr == 0)
10979 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010980 else if (*ctxt->space == -2)
10981 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010982 else
10983 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010984#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010985 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010986#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010987 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010988#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010989 else
10990 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010991#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010992 if (ctxt->instate == XML_PARSER_EOF)
10993 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010994 if (name == NULL) {
10995 spacePop(ctxt);
10996 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010997 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10998 ctxt->sax->endDocument(ctxt->userData);
10999 goto done;
11000 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011001#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011002 /*
11003 * [ VC: Root Element Type ]
11004 * The Name in the document type declaration must match
11005 * the element type of the root element.
11006 */
11007 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11008 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11009 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011010#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011011
11012 /*
11013 * Check for an Empty Element.
11014 */
11015 if ((RAW == '/') && (NXT(1) == '>')) {
11016 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011017
11018 if (ctxt->sax2) {
11019 if ((ctxt->sax != NULL) &&
11020 (ctxt->sax->endElementNs != NULL) &&
11021 (!ctxt->disableSAX))
11022 ctxt->sax->endElementNs(ctxt->userData, name,
11023 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011024 if (ctxt->nsNr - nsNr > 0)
11025 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011026#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011027 } else {
11028 if ((ctxt->sax != NULL) &&
11029 (ctxt->sax->endElement != NULL) &&
11030 (!ctxt->disableSAX))
11031 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011032#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011033 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011034 spacePop(ctxt);
11035 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011036 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011037 } else {
11038 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011039 }
11040 break;
11041 }
11042 if (RAW == '>') {
11043 NEXT;
11044 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011045 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011046 "Couldn't find end of Start Tag %s\n",
11047 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011048 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011049 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011050 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011051 if (ctxt->sax2)
11052 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011053#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011054 else
11055 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011056#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011057
Daniel Veillarda880b122003-04-21 21:36:41 +000011058 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011059 break;
11060 }
11061 case XML_PARSER_CONTENT: {
11062 const xmlChar *test;
11063 unsigned int cons;
11064 if ((avail < 2) && (ctxt->inputNr == 1))
11065 goto done;
11066 cur = ctxt->input->cur[0];
11067 next = ctxt->input->cur[1];
11068
11069 test = CUR_PTR;
11070 cons = ctxt->input->consumed;
11071 if ((cur == '<') && (next == '/')) {
11072 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011073 break;
11074 } else if ((cur == '<') && (next == '?')) {
11075 if ((!terminate) &&
11076 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11077 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011078 xmlParsePI(ctxt);
11079 } else if ((cur == '<') && (next != '!')) {
11080 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011081 break;
11082 } else if ((cur == '<') && (next == '!') &&
11083 (ctxt->input->cur[2] == '-') &&
11084 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011085 int term;
11086
11087 if (avail < 4)
11088 goto done;
11089 ctxt->input->cur += 4;
11090 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11091 ctxt->input->cur -= 4;
11092 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000011093 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011094 xmlParseComment(ctxt);
11095 ctxt->instate = XML_PARSER_CONTENT;
11096 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11097 (ctxt->input->cur[2] == '[') &&
11098 (ctxt->input->cur[3] == 'C') &&
11099 (ctxt->input->cur[4] == 'D') &&
11100 (ctxt->input->cur[5] == 'A') &&
11101 (ctxt->input->cur[6] == 'T') &&
11102 (ctxt->input->cur[7] == 'A') &&
11103 (ctxt->input->cur[8] == '[')) {
11104 SKIP(9);
11105 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011106 break;
11107 } else if ((cur == '<') && (next == '!') &&
11108 (avail < 9)) {
11109 goto done;
11110 } else if (cur == '&') {
11111 if ((!terminate) &&
11112 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11113 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011114 xmlParseReference(ctxt);
11115 } else {
11116 /* TODO Avoid the extra copy, handle directly !!! */
11117 /*
11118 * Goal of the following test is:
11119 * - minimize calls to the SAX 'character' callback
11120 * when they are mergeable
11121 * - handle an problem for isBlank when we only parse
11122 * a sequence of blank chars and the next one is
11123 * not available to check against '<' presence.
11124 * - tries to homogenize the differences in SAX
11125 * callbacks between the push and pull versions
11126 * of the parser.
11127 */
11128 if ((ctxt->inputNr == 1) &&
11129 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11130 if (!terminate) {
11131 if (ctxt->progressive) {
11132 if ((lastlt == NULL) ||
11133 (ctxt->input->cur > lastlt))
11134 goto done;
11135 } else if (xmlParseLookupSequence(ctxt,
11136 '<', 0, 0) < 0) {
11137 goto done;
11138 }
11139 }
11140 }
11141 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011142 xmlParseCharData(ctxt, 0);
11143 }
11144 /*
11145 * Pop-up of finished entities.
11146 */
11147 while ((RAW == 0) && (ctxt->inputNr > 1))
11148 xmlPopInput(ctxt);
11149 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011150 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11151 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011152 ctxt->instate = XML_PARSER_EOF;
11153 break;
11154 }
11155 break;
11156 }
11157 case XML_PARSER_END_TAG:
11158 if (avail < 2)
11159 goto done;
11160 if (!terminate) {
11161 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011162 /* > can be found unescaped in attribute values */
11163 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011164 goto done;
11165 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11166 goto done;
11167 }
11168 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011169 if (ctxt->sax2) {
11170 xmlParseEndTag2(ctxt,
11171 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11172 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011173 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011174 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011175 }
11176#ifdef LIBXML_SAX1_ENABLED
11177 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011178 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011179#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011180 if (ctxt->instate == XML_PARSER_EOF) {
11181 /* Nothing */
11182 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011183 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011184 } else {
11185 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011186 }
11187 break;
11188 case XML_PARSER_CDATA_SECTION: {
11189 /*
11190 * The Push mode need to have the SAX callback for
11191 * cdataBlock merge back contiguous callbacks.
11192 */
11193 int base;
11194
11195 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11196 if (base < 0) {
11197 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011198 int tmp;
11199
11200 tmp = xmlCheckCdataPush(ctxt->input->cur,
11201 XML_PARSER_BIG_BUFFER_SIZE);
11202 if (tmp < 0) {
11203 tmp = -tmp;
11204 ctxt->input->cur += tmp;
11205 goto encoding_error;
11206 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011207 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11208 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011209 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011210 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011211 else if (ctxt->sax->characters != NULL)
11212 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011213 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011214 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011215 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011216 ctxt->checkIndex = 0;
11217 }
11218 goto done;
11219 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011220 int tmp;
11221
11222 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11223 if ((tmp < 0) || (tmp != base)) {
11224 tmp = -tmp;
11225 ctxt->input->cur += tmp;
11226 goto encoding_error;
11227 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011228 if ((ctxt->sax != NULL) && (base == 0) &&
11229 (ctxt->sax->cdataBlock != NULL) &&
11230 (!ctxt->disableSAX)) {
11231 /*
11232 * Special case to provide identical behaviour
11233 * between pull and push parsers on enpty CDATA
11234 * sections
11235 */
11236 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11237 (!strncmp((const char *)&ctxt->input->cur[-9],
11238 "<![CDATA[", 9)))
11239 ctxt->sax->cdataBlock(ctxt->userData,
11240 BAD_CAST "", 0);
11241 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011242 (!ctxt->disableSAX)) {
11243 if (ctxt->sax->cdataBlock != NULL)
11244 ctxt->sax->cdataBlock(ctxt->userData,
11245 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011246 else if (ctxt->sax->characters != NULL)
11247 ctxt->sax->characters(ctxt->userData,
11248 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011249 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011250 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011251 ctxt->checkIndex = 0;
11252 ctxt->instate = XML_PARSER_CONTENT;
11253#ifdef DEBUG_PUSH
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: entering CONTENT\n");
11256#endif
11257 }
11258 break;
11259 }
Owen Taylor3473f882001-02-23 17:55:21 +000011260 case XML_PARSER_MISC:
11261 SKIP_BLANKS;
11262 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011263 avail = ctxt->input->length -
11264 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011265 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011266 avail = ctxt->input->buf->buffer->use -
11267 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011268 if (avail < 2)
11269 goto done;
11270 cur = ctxt->input->cur[0];
11271 next = ctxt->input->cur[1];
11272 if ((cur == '<') && (next == '?')) {
11273 if ((!terminate) &&
11274 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11275 goto done;
11276#ifdef DEBUG_PUSH
11277 xmlGenericError(xmlGenericErrorContext,
11278 "PP: Parsing PI\n");
11279#endif
11280 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011281 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011282 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011283 (ctxt->input->cur[2] == '-') &&
11284 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011285 if ((!terminate) &&
11286 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11287 goto done;
11288#ifdef DEBUG_PUSH
11289 xmlGenericError(xmlGenericErrorContext,
11290 "PP: Parsing Comment\n");
11291#endif
11292 xmlParseComment(ctxt);
11293 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011294 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011295 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011296 (ctxt->input->cur[2] == 'D') &&
11297 (ctxt->input->cur[3] == 'O') &&
11298 (ctxt->input->cur[4] == 'C') &&
11299 (ctxt->input->cur[5] == 'T') &&
11300 (ctxt->input->cur[6] == 'Y') &&
11301 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011302 (ctxt->input->cur[8] == 'E')) {
11303 if ((!terminate) &&
11304 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11305 goto done;
11306#ifdef DEBUG_PUSH
11307 xmlGenericError(xmlGenericErrorContext,
11308 "PP: Parsing internal subset\n");
11309#endif
11310 ctxt->inSubset = 1;
11311 xmlParseDocTypeDecl(ctxt);
11312 if (RAW == '[') {
11313 ctxt->instate = XML_PARSER_DTD;
11314#ifdef DEBUG_PUSH
11315 xmlGenericError(xmlGenericErrorContext,
11316 "PP: entering DTD\n");
11317#endif
11318 } else {
11319 /*
11320 * Create and update the external subset.
11321 */
11322 ctxt->inSubset = 2;
11323 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11324 (ctxt->sax->externalSubset != NULL))
11325 ctxt->sax->externalSubset(ctxt->userData,
11326 ctxt->intSubName, ctxt->extSubSystem,
11327 ctxt->extSubURI);
11328 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011329 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011330 ctxt->instate = XML_PARSER_PROLOG;
11331#ifdef DEBUG_PUSH
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: entering PROLOG\n");
11334#endif
11335 }
11336 } else if ((cur == '<') && (next == '!') &&
11337 (avail < 9)) {
11338 goto done;
11339 } else {
11340 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011341 ctxt->progressive = 1;
11342 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011343#ifdef DEBUG_PUSH
11344 xmlGenericError(xmlGenericErrorContext,
11345 "PP: entering START_TAG\n");
11346#endif
11347 }
11348 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011349 case XML_PARSER_PROLOG:
11350 SKIP_BLANKS;
11351 if (ctxt->input->buf == NULL)
11352 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11353 else
11354 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11355 if (avail < 2)
11356 goto done;
11357 cur = ctxt->input->cur[0];
11358 next = ctxt->input->cur[1];
11359 if ((cur == '<') && (next == '?')) {
11360 if ((!terminate) &&
11361 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11362 goto done;
11363#ifdef DEBUG_PUSH
11364 xmlGenericError(xmlGenericErrorContext,
11365 "PP: Parsing PI\n");
11366#endif
11367 xmlParsePI(ctxt);
11368 } else if ((cur == '<') && (next == '!') &&
11369 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11370 if ((!terminate) &&
11371 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11372 goto done;
11373#ifdef DEBUG_PUSH
11374 xmlGenericError(xmlGenericErrorContext,
11375 "PP: Parsing Comment\n");
11376#endif
11377 xmlParseComment(ctxt);
11378 ctxt->instate = XML_PARSER_PROLOG;
11379 } else if ((cur == '<') && (next == '!') &&
11380 (avail < 4)) {
11381 goto done;
11382 } else {
11383 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011384 if (ctxt->progressive == 0)
11385 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011386 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011387#ifdef DEBUG_PUSH
11388 xmlGenericError(xmlGenericErrorContext,
11389 "PP: entering START_TAG\n");
11390#endif
11391 }
11392 break;
11393 case XML_PARSER_EPILOG:
11394 SKIP_BLANKS;
11395 if (ctxt->input->buf == NULL)
11396 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11397 else
11398 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11399 if (avail < 2)
11400 goto done;
11401 cur = ctxt->input->cur[0];
11402 next = ctxt->input->cur[1];
11403 if ((cur == '<') && (next == '?')) {
11404 if ((!terminate) &&
11405 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11406 goto done;
11407#ifdef DEBUG_PUSH
11408 xmlGenericError(xmlGenericErrorContext,
11409 "PP: Parsing PI\n");
11410#endif
11411 xmlParsePI(ctxt);
11412 ctxt->instate = XML_PARSER_EPILOG;
11413 } else if ((cur == '<') && (next == '!') &&
11414 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11415 if ((!terminate) &&
11416 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11417 goto done;
11418#ifdef DEBUG_PUSH
11419 xmlGenericError(xmlGenericErrorContext,
11420 "PP: Parsing Comment\n");
11421#endif
11422 xmlParseComment(ctxt);
11423 ctxt->instate = XML_PARSER_EPILOG;
11424 } else if ((cur == '<') && (next == '!') &&
11425 (avail < 4)) {
11426 goto done;
11427 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011428 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011429 ctxt->instate = XML_PARSER_EOF;
11430#ifdef DEBUG_PUSH
11431 xmlGenericError(xmlGenericErrorContext,
11432 "PP: entering EOF\n");
11433#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011434 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011435 ctxt->sax->endDocument(ctxt->userData);
11436 goto done;
11437 }
11438 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011439 case XML_PARSER_DTD: {
11440 /*
11441 * Sorry but progressive parsing of the internal subset
11442 * is not expected to be supported. We first check that
11443 * the full content of the internal subset is available and
11444 * the parsing is launched only at that point.
11445 * Internal subset ends up with "']' S? '>'" in an unescaped
11446 * section and not in a ']]>' sequence which are conditional
11447 * sections (whoever argued to keep that crap in XML deserve
11448 * a place in hell !).
11449 */
11450 int base, i;
11451 xmlChar *buf;
11452 xmlChar quote = 0;
11453
11454 base = ctxt->input->cur - ctxt->input->base;
11455 if (base < 0) return(0);
11456 if (ctxt->checkIndex > base)
11457 base = ctxt->checkIndex;
11458 buf = ctxt->input->buf->buffer->content;
11459 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11460 base++) {
11461 if (quote != 0) {
11462 if (buf[base] == quote)
11463 quote = 0;
11464 continue;
11465 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011466 if ((quote == 0) && (buf[base] == '<')) {
11467 int found = 0;
11468 /* special handling of comments */
11469 if (((unsigned int) base + 4 <
11470 ctxt->input->buf->buffer->use) &&
11471 (buf[base + 1] == '!') &&
11472 (buf[base + 2] == '-') &&
11473 (buf[base + 3] == '-')) {
11474 for (;(unsigned int) base + 3 <
11475 ctxt->input->buf->buffer->use; base++) {
11476 if ((buf[base] == '-') &&
11477 (buf[base + 1] == '-') &&
11478 (buf[base + 2] == '>')) {
11479 found = 1;
11480 base += 2;
11481 break;
11482 }
11483 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011484 if (!found) {
11485#if 0
11486 fprintf(stderr, "unfinished comment\n");
11487#endif
11488 break; /* for */
11489 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011490 continue;
11491 }
11492 }
Owen Taylor3473f882001-02-23 17:55:21 +000011493 if (buf[base] == '"') {
11494 quote = '"';
11495 continue;
11496 }
11497 if (buf[base] == '\'') {
11498 quote = '\'';
11499 continue;
11500 }
11501 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011502#if 0
11503 fprintf(stderr, "%c%c%c%c: ", buf[base],
11504 buf[base + 1], buf[base + 2], buf[base + 3]);
11505#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011506 if ((unsigned int) base +1 >=
11507 ctxt->input->buf->buffer->use)
11508 break;
11509 if (buf[base + 1] == ']') {
11510 /* conditional crap, skip both ']' ! */
11511 base++;
11512 continue;
11513 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011514 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011515 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11516 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011517 if (buf[base + i] == '>') {
11518#if 0
11519 fprintf(stderr, "found\n");
11520#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011521 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011522 }
11523 if (!IS_BLANK_CH(buf[base + i])) {
11524#if 0
11525 fprintf(stderr, "not found\n");
11526#endif
11527 goto not_end_of_int_subset;
11528 }
Owen Taylor3473f882001-02-23 17:55:21 +000011529 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011530#if 0
11531 fprintf(stderr, "end of stream\n");
11532#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011533 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011534
Owen Taylor3473f882001-02-23 17:55:21 +000011535 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011536not_end_of_int_subset:
11537 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011538 }
11539 /*
11540 * We didn't found the end of the Internal subset
11541 */
Owen Taylor3473f882001-02-23 17:55:21 +000011542#ifdef DEBUG_PUSH
11543 if (next == 0)
11544 xmlGenericError(xmlGenericErrorContext,
11545 "PP: lookup of int subset end filed\n");
11546#endif
11547 goto done;
11548
11549found_end_int_subset:
11550 xmlParseInternalSubset(ctxt);
11551 ctxt->inSubset = 2;
11552 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11553 (ctxt->sax->externalSubset != NULL))
11554 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11555 ctxt->extSubSystem, ctxt->extSubURI);
11556 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011557 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011558 ctxt->instate = XML_PARSER_PROLOG;
11559 ctxt->checkIndex = 0;
11560#ifdef DEBUG_PUSH
11561 xmlGenericError(xmlGenericErrorContext,
11562 "PP: entering PROLOG\n");
11563#endif
11564 break;
11565 }
11566 case XML_PARSER_COMMENT:
11567 xmlGenericError(xmlGenericErrorContext,
11568 "PP: internal error, state == COMMENT\n");
11569 ctxt->instate = XML_PARSER_CONTENT;
11570#ifdef DEBUG_PUSH
11571 xmlGenericError(xmlGenericErrorContext,
11572 "PP: entering CONTENT\n");
11573#endif
11574 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011575 case XML_PARSER_IGNORE:
11576 xmlGenericError(xmlGenericErrorContext,
11577 "PP: internal error, state == IGNORE");
11578 ctxt->instate = XML_PARSER_DTD;
11579#ifdef DEBUG_PUSH
11580 xmlGenericError(xmlGenericErrorContext,
11581 "PP: entering DTD\n");
11582#endif
11583 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011584 case XML_PARSER_PI:
11585 xmlGenericError(xmlGenericErrorContext,
11586 "PP: internal error, state == PI\n");
11587 ctxt->instate = XML_PARSER_CONTENT;
11588#ifdef DEBUG_PUSH
11589 xmlGenericError(xmlGenericErrorContext,
11590 "PP: entering CONTENT\n");
11591#endif
11592 break;
11593 case XML_PARSER_ENTITY_DECL:
11594 xmlGenericError(xmlGenericErrorContext,
11595 "PP: internal error, state == ENTITY_DECL\n");
11596 ctxt->instate = XML_PARSER_DTD;
11597#ifdef DEBUG_PUSH
11598 xmlGenericError(xmlGenericErrorContext,
11599 "PP: entering DTD\n");
11600#endif
11601 break;
11602 case XML_PARSER_ENTITY_VALUE:
11603 xmlGenericError(xmlGenericErrorContext,
11604 "PP: internal error, state == ENTITY_VALUE\n");
11605 ctxt->instate = XML_PARSER_CONTENT;
11606#ifdef DEBUG_PUSH
11607 xmlGenericError(xmlGenericErrorContext,
11608 "PP: entering DTD\n");
11609#endif
11610 break;
11611 case XML_PARSER_ATTRIBUTE_VALUE:
11612 xmlGenericError(xmlGenericErrorContext,
11613 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11614 ctxt->instate = XML_PARSER_START_TAG;
11615#ifdef DEBUG_PUSH
11616 xmlGenericError(xmlGenericErrorContext,
11617 "PP: entering START_TAG\n");
11618#endif
11619 break;
11620 case XML_PARSER_SYSTEM_LITERAL:
11621 xmlGenericError(xmlGenericErrorContext,
11622 "PP: internal error, state == SYSTEM_LITERAL\n");
11623 ctxt->instate = XML_PARSER_START_TAG;
11624#ifdef DEBUG_PUSH
11625 xmlGenericError(xmlGenericErrorContext,
11626 "PP: entering START_TAG\n");
11627#endif
11628 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011629 case XML_PARSER_PUBLIC_LITERAL:
11630 xmlGenericError(xmlGenericErrorContext,
11631 "PP: internal error, state == PUBLIC_LITERAL\n");
11632 ctxt->instate = XML_PARSER_START_TAG;
11633#ifdef DEBUG_PUSH
11634 xmlGenericError(xmlGenericErrorContext,
11635 "PP: entering START_TAG\n");
11636#endif
11637 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011638 }
11639 }
11640done:
11641#ifdef DEBUG_PUSH
11642 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11643#endif
11644 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011645encoding_error:
11646 {
11647 char buffer[150];
11648
11649 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11650 ctxt->input->cur[0], ctxt->input->cur[1],
11651 ctxt->input->cur[2], ctxt->input->cur[3]);
11652 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11653 "Input is not proper UTF-8, indicate encoding !\n%s",
11654 BAD_CAST buffer, NULL);
11655 }
11656 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011657}
11658
11659/**
Owen Taylor3473f882001-02-23 17:55:21 +000011660 * xmlParseChunk:
11661 * @ctxt: an XML parser context
11662 * @chunk: an char array
11663 * @size: the size in byte of the chunk
11664 * @terminate: last chunk indicator
11665 *
11666 * Parse a Chunk of memory
11667 *
11668 * Returns zero if no error, the xmlParserErrors otherwise.
11669 */
11670int
11671xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11672 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011673 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011674 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011675
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011676 if (ctxt == NULL)
11677 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011678 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011679 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011680 if (ctxt->instate == XML_PARSER_START)
11681 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011682 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11683 (chunk[size - 1] == '\r')) {
11684 end_in_lf = 1;
11685 size--;
11686 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011687
11688xmldecl_done:
11689
Owen Taylor3473f882001-02-23 17:55:21 +000011690 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11691 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11692 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11693 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011694 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011695
11696 /*
11697 * Specific handling if we autodetected an encoding, we should not
11698 * push more than the first line ... which depend on the encoding
11699 * And only push the rest once the final encoding was detected
11700 */
11701 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11702 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010011703 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011704
11705 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11706 BAD_CAST "UTF-16")) ||
11707 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11708 BAD_CAST "UTF16")))
11709 len = 90;
11710 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11711 BAD_CAST "UCS-4")) ||
11712 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11713 BAD_CAST "UCS4")))
11714 len = 180;
11715
11716 if (ctxt->input->buf->rawconsumed < len)
11717 len -= ctxt->input->buf->rawconsumed;
11718
Raul Hudeaba9716a2010-03-15 10:13:29 +010011719 /*
11720 * Change size for reading the initial declaration only
11721 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11722 * will blindly copy extra bytes from memory.
11723 */
Daniel Veillard60587d62010-11-04 15:16:27 +010011724 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010011725 remain = size - len;
11726 size = len;
11727 } else {
11728 remain = 0;
11729 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011730 }
William M. Bracka3215c72004-07-31 16:24:01 +000011731 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11732 if (res < 0) {
11733 ctxt->errNo = XML_PARSER_EOF;
11734 ctxt->disableSAX = 1;
11735 return (XML_PARSER_EOF);
11736 }
Owen Taylor3473f882001-02-23 17:55:21 +000011737 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11738 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011739 ctxt->input->end =
11740 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011741#ifdef DEBUG_PUSH
11742 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11743#endif
11744
Owen Taylor3473f882001-02-23 17:55:21 +000011745 } else if (ctxt->instate != XML_PARSER_EOF) {
11746 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11747 xmlParserInputBufferPtr in = ctxt->input->buf;
11748 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11749 (in->raw != NULL)) {
11750 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011751
Owen Taylor3473f882001-02-23 17:55:21 +000011752 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11753 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011754 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011755 xmlGenericError(xmlGenericErrorContext,
11756 "xmlParseChunk: encoder error\n");
11757 return(XML_ERR_INVALID_ENCODING);
11758 }
11759 }
11760 }
11761 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011762 if (remain != 0)
11763 xmlParseTryOrFinish(ctxt, 0);
11764 else
11765 xmlParseTryOrFinish(ctxt, terminate);
11766 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11767 return(ctxt->errNo);
11768
11769 if (remain != 0) {
11770 chunk += size;
11771 size = remain;
11772 remain = 0;
11773 goto xmldecl_done;
11774 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011775 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11776 (ctxt->input->buf != NULL)) {
11777 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11778 }
Owen Taylor3473f882001-02-23 17:55:21 +000011779 if (terminate) {
11780 /*
11781 * Check for termination
11782 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011783 int avail = 0;
11784
11785 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011786 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011787 avail = ctxt->input->length -
11788 (ctxt->input->cur - ctxt->input->base);
11789 else
11790 avail = ctxt->input->buf->buffer->use -
11791 (ctxt->input->cur - ctxt->input->base);
11792 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011793
Owen Taylor3473f882001-02-23 17:55:21 +000011794 if ((ctxt->instate != XML_PARSER_EOF) &&
11795 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011796 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011797 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011798 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011799 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011800 }
Owen Taylor3473f882001-02-23 17:55:21 +000011801 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011802 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011803 ctxt->sax->endDocument(ctxt->userData);
11804 }
11805 ctxt->instate = XML_PARSER_EOF;
11806 }
11807 return((xmlParserErrors) ctxt->errNo);
11808}
11809
11810/************************************************************************
11811 * *
11812 * I/O front end functions to the parser *
11813 * *
11814 ************************************************************************/
11815
11816/**
Owen Taylor3473f882001-02-23 17:55:21 +000011817 * xmlCreatePushParserCtxt:
11818 * @sax: a SAX handler
11819 * @user_data: The user data returned on SAX callbacks
11820 * @chunk: a pointer to an array of chars
11821 * @size: number of chars in the array
11822 * @filename: an optional file name or URI
11823 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011824 * Create a parser context for using the XML parser in push mode.
11825 * If @buffer and @size are non-NULL, the data is used to detect
11826 * the encoding. The remaining characters will be parsed so they
11827 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011828 * To allow content encoding detection, @size should be >= 4
11829 * The value of @filename is used for fetching external entities
11830 * and error/warning reports.
11831 *
11832 * Returns the new parser context or NULL
11833 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011834
Owen Taylor3473f882001-02-23 17:55:21 +000011835xmlParserCtxtPtr
11836xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11837 const char *chunk, int size, const char *filename) {
11838 xmlParserCtxtPtr ctxt;
11839 xmlParserInputPtr inputStream;
11840 xmlParserInputBufferPtr buf;
11841 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11842
11843 /*
11844 * plug some encoding conversion routines
11845 */
11846 if ((chunk != NULL) && (size >= 4))
11847 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11848
11849 buf = xmlAllocParserInputBuffer(enc);
11850 if (buf == NULL) return(NULL);
11851
11852 ctxt = xmlNewParserCtxt();
11853 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011854 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011855 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011856 return(NULL);
11857 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011858 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011859 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11860 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011861 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011862 xmlFreeParserInputBuffer(buf);
11863 xmlFreeParserCtxt(ctxt);
11864 return(NULL);
11865 }
Owen Taylor3473f882001-02-23 17:55:21 +000011866 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011867#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011868 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011869#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011870 xmlFree(ctxt->sax);
11871 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11872 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011873 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011874 xmlFreeParserInputBuffer(buf);
11875 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011876 return(NULL);
11877 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011878 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11879 if (sax->initialized == XML_SAX2_MAGIC)
11880 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11881 else
11882 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011883 if (user_data != NULL)
11884 ctxt->userData = user_data;
11885 }
11886 if (filename == NULL) {
11887 ctxt->directory = NULL;
11888 } else {
11889 ctxt->directory = xmlParserGetDirectory(filename);
11890 }
11891
11892 inputStream = xmlNewInputStream(ctxt);
11893 if (inputStream == NULL) {
11894 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011895 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011896 return(NULL);
11897 }
11898
11899 if (filename == NULL)
11900 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011901 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011902 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011903 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011904 if (inputStream->filename == NULL) {
11905 xmlFreeParserCtxt(ctxt);
11906 xmlFreeParserInputBuffer(buf);
11907 return(NULL);
11908 }
11909 }
Owen Taylor3473f882001-02-23 17:55:21 +000011910 inputStream->buf = buf;
11911 inputStream->base = inputStream->buf->buffer->content;
11912 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011913 inputStream->end =
11914 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011915
11916 inputPush(ctxt, inputStream);
11917
William M. Brack3a1cd212005-02-11 14:35:54 +000011918 /*
11919 * If the caller didn't provide an initial 'chunk' for determining
11920 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11921 * that it can be automatically determined later
11922 */
11923 if ((size == 0) || (chunk == NULL)) {
11924 ctxt->charset = XML_CHAR_ENCODING_NONE;
11925 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011926 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11927 int cur = ctxt->input->cur - ctxt->input->base;
11928
Owen Taylor3473f882001-02-23 17:55:21 +000011929 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011930
11931 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11932 ctxt->input->cur = ctxt->input->base + cur;
11933 ctxt->input->end =
11934 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011935#ifdef DEBUG_PUSH
11936 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11937#endif
11938 }
11939
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011940 if (enc != XML_CHAR_ENCODING_NONE) {
11941 xmlSwitchEncoding(ctxt, enc);
11942 }
11943
Owen Taylor3473f882001-02-23 17:55:21 +000011944 return(ctxt);
11945}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011946#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011947
11948/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011949 * xmlStopParser:
11950 * @ctxt: an XML parser context
11951 *
11952 * Blocks further parser processing
11953 */
11954void
11955xmlStopParser(xmlParserCtxtPtr ctxt) {
11956 if (ctxt == NULL)
11957 return;
11958 ctxt->instate = XML_PARSER_EOF;
11959 ctxt->disableSAX = 1;
11960 if (ctxt->input != NULL) {
11961 ctxt->input->cur = BAD_CAST"";
11962 ctxt->input->base = ctxt->input->cur;
11963 }
11964}
11965
11966/**
Owen Taylor3473f882001-02-23 17:55:21 +000011967 * xmlCreateIOParserCtxt:
11968 * @sax: a SAX handler
11969 * @user_data: The user data returned on SAX callbacks
11970 * @ioread: an I/O read function
11971 * @ioclose: an I/O close function
11972 * @ioctx: an I/O handler
11973 * @enc: the charset encoding if known
11974 *
11975 * Create a parser context for using the XML parser with an existing
11976 * I/O stream
11977 *
11978 * Returns the new parser context or NULL
11979 */
11980xmlParserCtxtPtr
11981xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11982 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11983 void *ioctx, xmlCharEncoding enc) {
11984 xmlParserCtxtPtr ctxt;
11985 xmlParserInputPtr inputStream;
11986 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080011987
Daniel Veillard42595322004-11-08 10:52:06 +000011988 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011989
11990 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080011991 if (buf == NULL) {
11992 if (ioclose != NULL)
11993 ioclose(ioctx);
11994 return (NULL);
11995 }
Owen Taylor3473f882001-02-23 17:55:21 +000011996
11997 ctxt = xmlNewParserCtxt();
11998 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011999 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012000 return(NULL);
12001 }
12002 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012003#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012004 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012005#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012006 xmlFree(ctxt->sax);
12007 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12008 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012009 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012010 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012011 return(NULL);
12012 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012013 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12014 if (sax->initialized == XML_SAX2_MAGIC)
12015 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12016 else
12017 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012018 if (user_data != NULL)
12019 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012020 }
Owen Taylor3473f882001-02-23 17:55:21 +000012021
12022 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12023 if (inputStream == NULL) {
12024 xmlFreeParserCtxt(ctxt);
12025 return(NULL);
12026 }
12027 inputPush(ctxt, inputStream);
12028
12029 return(ctxt);
12030}
12031
Daniel Veillard4432df22003-09-28 18:58:27 +000012032#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012033/************************************************************************
12034 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012035 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012036 * *
12037 ************************************************************************/
12038
12039/**
12040 * xmlIOParseDTD:
12041 * @sax: the SAX handler block or NULL
12042 * @input: an Input Buffer
12043 * @enc: the charset encoding if known
12044 *
12045 * Load and parse a DTD
12046 *
12047 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012048 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012049 */
12050
12051xmlDtdPtr
12052xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12053 xmlCharEncoding enc) {
12054 xmlDtdPtr ret = NULL;
12055 xmlParserCtxtPtr ctxt;
12056 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012057 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012058
12059 if (input == NULL)
12060 return(NULL);
12061
12062 ctxt = xmlNewParserCtxt();
12063 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012064 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012065 return(NULL);
12066 }
12067
12068 /*
12069 * Set-up the SAX context
12070 */
12071 if (sax != NULL) {
12072 if (ctxt->sax != NULL)
12073 xmlFree(ctxt->sax);
12074 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012075 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012076 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012077 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012078
12079 /*
12080 * generate a parser input from the I/O handler
12081 */
12082
Daniel Veillard43caefb2003-12-07 19:32:22 +000012083 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012084 if (pinput == NULL) {
12085 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012086 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012087 xmlFreeParserCtxt(ctxt);
12088 return(NULL);
12089 }
12090
12091 /*
12092 * plug some encoding conversion routines here.
12093 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012094 if (xmlPushInput(ctxt, pinput) < 0) {
12095 if (sax != NULL) ctxt->sax = NULL;
12096 xmlFreeParserCtxt(ctxt);
12097 return(NULL);
12098 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012099 if (enc != XML_CHAR_ENCODING_NONE) {
12100 xmlSwitchEncoding(ctxt, enc);
12101 }
Owen Taylor3473f882001-02-23 17:55:21 +000012102
12103 pinput->filename = NULL;
12104 pinput->line = 1;
12105 pinput->col = 1;
12106 pinput->base = ctxt->input->cur;
12107 pinput->cur = ctxt->input->cur;
12108 pinput->free = NULL;
12109
12110 /*
12111 * let's parse that entity knowing it's an external subset.
12112 */
12113 ctxt->inSubset = 2;
12114 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012115 if (ctxt->myDoc == NULL) {
12116 xmlErrMemory(ctxt, "New Doc failed");
12117 return(NULL);
12118 }
12119 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012120 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12121 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012122
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012123 if ((enc == XML_CHAR_ENCODING_NONE) &&
12124 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000012125 /*
12126 * Get the 4 first bytes and decode the charset
12127 * if enc != XML_CHAR_ENCODING_NONE
12128 * plug some encoding conversion routines.
12129 */
12130 start[0] = RAW;
12131 start[1] = NXT(1);
12132 start[2] = NXT(2);
12133 start[3] = NXT(3);
12134 enc = xmlDetectCharEncoding(start, 4);
12135 if (enc != XML_CHAR_ENCODING_NONE) {
12136 xmlSwitchEncoding(ctxt, enc);
12137 }
12138 }
12139
Owen Taylor3473f882001-02-23 17:55:21 +000012140 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12141
12142 if (ctxt->myDoc != NULL) {
12143 if (ctxt->wellFormed) {
12144 ret = ctxt->myDoc->extSubset;
12145 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012146 if (ret != NULL) {
12147 xmlNodePtr tmp;
12148
12149 ret->doc = NULL;
12150 tmp = ret->children;
12151 while (tmp != NULL) {
12152 tmp->doc = NULL;
12153 tmp = tmp->next;
12154 }
12155 }
Owen Taylor3473f882001-02-23 17:55:21 +000012156 } else {
12157 ret = NULL;
12158 }
12159 xmlFreeDoc(ctxt->myDoc);
12160 ctxt->myDoc = NULL;
12161 }
12162 if (sax != NULL) ctxt->sax = NULL;
12163 xmlFreeParserCtxt(ctxt);
12164
12165 return(ret);
12166}
12167
12168/**
12169 * xmlSAXParseDTD:
12170 * @sax: the SAX handler block
12171 * @ExternalID: a NAME* containing the External ID of the DTD
12172 * @SystemID: a NAME* containing the URL to the DTD
12173 *
12174 * Load and parse an external subset.
12175 *
12176 * Returns the resulting xmlDtdPtr or NULL in case of error.
12177 */
12178
12179xmlDtdPtr
12180xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12181 const xmlChar *SystemID) {
12182 xmlDtdPtr ret = NULL;
12183 xmlParserCtxtPtr ctxt;
12184 xmlParserInputPtr input = NULL;
12185 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012186 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012187
12188 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12189
12190 ctxt = xmlNewParserCtxt();
12191 if (ctxt == NULL) {
12192 return(NULL);
12193 }
12194
12195 /*
12196 * Set-up the SAX context
12197 */
12198 if (sax != NULL) {
12199 if (ctxt->sax != NULL)
12200 xmlFree(ctxt->sax);
12201 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012202 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012203 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012204
12205 /*
12206 * Canonicalise the system ID
12207 */
12208 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012209 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012210 xmlFreeParserCtxt(ctxt);
12211 return(NULL);
12212 }
Owen Taylor3473f882001-02-23 17:55:21 +000012213
12214 /*
12215 * Ask the Entity resolver to load the damn thing
12216 */
12217
12218 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012219 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12220 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012221 if (input == NULL) {
12222 if (sax != NULL) ctxt->sax = NULL;
12223 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012224 if (systemIdCanonic != NULL)
12225 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012226 return(NULL);
12227 }
12228
12229 /*
12230 * plug some encoding conversion routines here.
12231 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012232 if (xmlPushInput(ctxt, input) < 0) {
12233 if (sax != NULL) ctxt->sax = NULL;
12234 xmlFreeParserCtxt(ctxt);
12235 if (systemIdCanonic != NULL)
12236 xmlFree(systemIdCanonic);
12237 return(NULL);
12238 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012239 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12240 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12241 xmlSwitchEncoding(ctxt, enc);
12242 }
Owen Taylor3473f882001-02-23 17:55:21 +000012243
12244 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012245 input->filename = (char *) systemIdCanonic;
12246 else
12247 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012248 input->line = 1;
12249 input->col = 1;
12250 input->base = ctxt->input->cur;
12251 input->cur = ctxt->input->cur;
12252 input->free = NULL;
12253
12254 /*
12255 * let's parse that entity knowing it's an external subset.
12256 */
12257 ctxt->inSubset = 2;
12258 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012259 if (ctxt->myDoc == NULL) {
12260 xmlErrMemory(ctxt, "New Doc failed");
12261 if (sax != NULL) ctxt->sax = NULL;
12262 xmlFreeParserCtxt(ctxt);
12263 return(NULL);
12264 }
12265 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012266 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12267 ExternalID, SystemID);
12268 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12269
12270 if (ctxt->myDoc != NULL) {
12271 if (ctxt->wellFormed) {
12272 ret = ctxt->myDoc->extSubset;
12273 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012274 if (ret != NULL) {
12275 xmlNodePtr tmp;
12276
12277 ret->doc = NULL;
12278 tmp = ret->children;
12279 while (tmp != NULL) {
12280 tmp->doc = NULL;
12281 tmp = tmp->next;
12282 }
12283 }
Owen Taylor3473f882001-02-23 17:55:21 +000012284 } else {
12285 ret = NULL;
12286 }
12287 xmlFreeDoc(ctxt->myDoc);
12288 ctxt->myDoc = NULL;
12289 }
12290 if (sax != NULL) ctxt->sax = NULL;
12291 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012292
Owen Taylor3473f882001-02-23 17:55:21 +000012293 return(ret);
12294}
12295
Daniel Veillard4432df22003-09-28 18:58:27 +000012296
Owen Taylor3473f882001-02-23 17:55:21 +000012297/**
12298 * xmlParseDTD:
12299 * @ExternalID: a NAME* containing the External ID of the DTD
12300 * @SystemID: a NAME* containing the URL to the DTD
12301 *
12302 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012303 *
Owen Taylor3473f882001-02-23 17:55:21 +000012304 * Returns the resulting xmlDtdPtr or NULL in case of error.
12305 */
12306
12307xmlDtdPtr
12308xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12309 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12310}
Daniel Veillard4432df22003-09-28 18:58:27 +000012311#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012312
12313/************************************************************************
12314 * *
12315 * Front ends when parsing an Entity *
12316 * *
12317 ************************************************************************/
12318
12319/**
Owen Taylor3473f882001-02-23 17:55:21 +000012320 * xmlParseCtxtExternalEntity:
12321 * @ctx: the existing parsing context
12322 * @URL: the URL for the entity to load
12323 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012324 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012325 *
12326 * Parse an external general entity within an existing parsing context
12327 * An external general parsed entity is well-formed if it matches the
12328 * production labeled extParsedEnt.
12329 *
12330 * [78] extParsedEnt ::= TextDecl? content
12331 *
12332 * Returns 0 if the entity is well formed, -1 in case of args problem and
12333 * the parser error code otherwise
12334 */
12335
12336int
12337xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012338 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012339 xmlParserCtxtPtr ctxt;
12340 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012341 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012342 xmlSAXHandlerPtr oldsax = NULL;
12343 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012344 xmlChar start[4];
12345 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012346
Daniel Veillardce682bc2004-11-05 17:22:25 +000012347 if (ctx == NULL) return(-1);
12348
Daniel Veillard0161e632008-08-28 15:36:32 +000012349 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12350 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012351 return(XML_ERR_ENTITY_LOOP);
12352 }
12353
Daniel Veillardcda96922001-08-21 10:56:31 +000012354 if (lst != NULL)
12355 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012356 if ((URL == NULL) && (ID == NULL))
12357 return(-1);
12358 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12359 return(-1);
12360
Rob Richards798743a2009-06-19 13:54:25 -040012361 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012362 if (ctxt == NULL) {
12363 return(-1);
12364 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012365
Owen Taylor3473f882001-02-23 17:55:21 +000012366 oldsax = ctxt->sax;
12367 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012368 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012369 newDoc = xmlNewDoc(BAD_CAST "1.0");
12370 if (newDoc == NULL) {
12371 xmlFreeParserCtxt(ctxt);
12372 return(-1);
12373 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012374 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012375 if (ctx->myDoc->dict) {
12376 newDoc->dict = ctx->myDoc->dict;
12377 xmlDictReference(newDoc->dict);
12378 }
Owen Taylor3473f882001-02-23 17:55:21 +000012379 if (ctx->myDoc != NULL) {
12380 newDoc->intSubset = ctx->myDoc->intSubset;
12381 newDoc->extSubset = ctx->myDoc->extSubset;
12382 }
12383 if (ctx->myDoc->URL != NULL) {
12384 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12385 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012386 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12387 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012388 ctxt->sax = oldsax;
12389 xmlFreeParserCtxt(ctxt);
12390 newDoc->intSubset = NULL;
12391 newDoc->extSubset = NULL;
12392 xmlFreeDoc(newDoc);
12393 return(-1);
12394 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012395 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012396 nodePush(ctxt, newDoc->children);
12397 if (ctx->myDoc == NULL) {
12398 ctxt->myDoc = newDoc;
12399 } else {
12400 ctxt->myDoc = ctx->myDoc;
12401 newDoc->children->doc = ctx->myDoc;
12402 }
12403
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012404 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012405 * Get the 4 first bytes and decode the charset
12406 * if enc != XML_CHAR_ENCODING_NONE
12407 * plug some encoding conversion routines.
12408 */
12409 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012410 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12411 start[0] = RAW;
12412 start[1] = NXT(1);
12413 start[2] = NXT(2);
12414 start[3] = NXT(3);
12415 enc = xmlDetectCharEncoding(start, 4);
12416 if (enc != XML_CHAR_ENCODING_NONE) {
12417 xmlSwitchEncoding(ctxt, enc);
12418 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012419 }
12420
Owen Taylor3473f882001-02-23 17:55:21 +000012421 /*
12422 * Parse a possible text declaration first
12423 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012424 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012425 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012426 /*
12427 * An XML-1.0 document can't reference an entity not XML-1.0
12428 */
12429 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12430 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12431 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12432 "Version mismatch between document and entity\n");
12433 }
Owen Taylor3473f882001-02-23 17:55:21 +000012434 }
12435
12436 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012437 * If the user provided its own SAX callbacks then reuse the
12438 * useData callback field, otherwise the expected setup in a
12439 * DOM builder is to have userData == ctxt
12440 */
12441 if (ctx->userData == ctx)
12442 ctxt->userData = ctxt;
12443 else
12444 ctxt->userData = ctx->userData;
12445
12446 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012447 * Doing validity checking on chunk doesn't make sense
12448 */
12449 ctxt->instate = XML_PARSER_CONTENT;
12450 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012451 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012452 ctxt->loadsubset = ctx->loadsubset;
12453 ctxt->depth = ctx->depth + 1;
12454 ctxt->replaceEntities = ctx->replaceEntities;
12455 if (ctxt->validate) {
12456 ctxt->vctxt.error = ctx->vctxt.error;
12457 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012458 } else {
12459 ctxt->vctxt.error = NULL;
12460 ctxt->vctxt.warning = NULL;
12461 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012462 ctxt->vctxt.nodeTab = NULL;
12463 ctxt->vctxt.nodeNr = 0;
12464 ctxt->vctxt.nodeMax = 0;
12465 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012466 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12467 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012468 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12469 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12470 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012471 ctxt->dictNames = ctx->dictNames;
12472 ctxt->attsDefault = ctx->attsDefault;
12473 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012474 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012475
12476 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012477
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012478 ctx->validate = ctxt->validate;
12479 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012480 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012481 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012482 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012483 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012484 }
12485 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012486 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012487 }
12488
12489 if (!ctxt->wellFormed) {
12490 if (ctxt->errNo == 0)
12491 ret = 1;
12492 else
12493 ret = ctxt->errNo;
12494 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012495 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012496 xmlNodePtr cur;
12497
12498 /*
12499 * Return the newly created nodeset after unlinking it from
12500 * they pseudo parent.
12501 */
12502 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012503 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012504 while (cur != NULL) {
12505 cur->parent = NULL;
12506 cur = cur->next;
12507 }
12508 newDoc->children->children = NULL;
12509 }
12510 ret = 0;
12511 }
12512 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012513 ctxt->dict = NULL;
12514 ctxt->attsDefault = NULL;
12515 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012516 xmlFreeParserCtxt(ctxt);
12517 newDoc->intSubset = NULL;
12518 newDoc->extSubset = NULL;
12519 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012520
Owen Taylor3473f882001-02-23 17:55:21 +000012521 return(ret);
12522}
12523
12524/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012525 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012526 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012527 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012528 * @sax: the SAX handler bloc (possibly NULL)
12529 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12530 * @depth: Used for loop detection, use 0
12531 * @URL: the URL for the entity to load
12532 * @ID: the System ID for the entity to load
12533 * @list: the return value for the set of parsed nodes
12534 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012535 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012536 *
12537 * Returns 0 if the entity is well formed, -1 in case of args problem and
12538 * the parser error code otherwise
12539 */
12540
Daniel Veillard7d515752003-09-26 19:12:37 +000012541static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012542xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12543 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012544 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012545 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012546 xmlParserCtxtPtr ctxt;
12547 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012548 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012549 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012550 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012551 xmlChar start[4];
12552 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012553
Daniel Veillard0161e632008-08-28 15:36:32 +000012554 if (((depth > 40) &&
12555 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12556 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012557 return(XML_ERR_ENTITY_LOOP);
12558 }
12559
Owen Taylor3473f882001-02-23 17:55:21 +000012560 if (list != NULL)
12561 *list = NULL;
12562 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012563 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012564 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012565 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012566
12567
Rob Richards9c0aa472009-03-26 18:10:19 +000012568 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012569 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012570 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012571 if (oldctxt != NULL) {
12572 ctxt->_private = oldctxt->_private;
12573 ctxt->loadsubset = oldctxt->loadsubset;
12574 ctxt->validate = oldctxt->validate;
12575 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012576 ctxt->record_info = oldctxt->record_info;
12577 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12578 ctxt->node_seq.length = oldctxt->node_seq.length;
12579 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012580 } else {
12581 /*
12582 * Doing validity checking on chunk without context
12583 * doesn't make sense
12584 */
12585 ctxt->_private = NULL;
12586 ctxt->validate = 0;
12587 ctxt->external = 2;
12588 ctxt->loadsubset = 0;
12589 }
Owen Taylor3473f882001-02-23 17:55:21 +000012590 if (sax != NULL) {
12591 oldsax = ctxt->sax;
12592 ctxt->sax = sax;
12593 if (user_data != NULL)
12594 ctxt->userData = user_data;
12595 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012596 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012597 newDoc = xmlNewDoc(BAD_CAST "1.0");
12598 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012599 ctxt->node_seq.maximum = 0;
12600 ctxt->node_seq.length = 0;
12601 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012602 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012603 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012604 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012605 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012606 newDoc->intSubset = doc->intSubset;
12607 newDoc->extSubset = doc->extSubset;
12608 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012609 xmlDictReference(newDoc->dict);
12610
Owen Taylor3473f882001-02-23 17:55:21 +000012611 if (doc->URL != NULL) {
12612 newDoc->URL = xmlStrdup(doc->URL);
12613 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012614 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12615 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012616 if (sax != NULL)
12617 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012618 ctxt->node_seq.maximum = 0;
12619 ctxt->node_seq.length = 0;
12620 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012621 xmlFreeParserCtxt(ctxt);
12622 newDoc->intSubset = NULL;
12623 newDoc->extSubset = NULL;
12624 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012625 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012626 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012627 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012628 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012629 ctxt->myDoc = doc;
12630 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012631
Daniel Veillard0161e632008-08-28 15:36:32 +000012632 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012633 * Get the 4 first bytes and decode the charset
12634 * if enc != XML_CHAR_ENCODING_NONE
12635 * plug some encoding conversion routines.
12636 */
12637 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012638 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12639 start[0] = RAW;
12640 start[1] = NXT(1);
12641 start[2] = NXT(2);
12642 start[3] = NXT(3);
12643 enc = xmlDetectCharEncoding(start, 4);
12644 if (enc != XML_CHAR_ENCODING_NONE) {
12645 xmlSwitchEncoding(ctxt, enc);
12646 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012647 }
12648
Owen Taylor3473f882001-02-23 17:55:21 +000012649 /*
12650 * Parse a possible text declaration first
12651 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012652 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012653 xmlParseTextDecl(ctxt);
12654 }
12655
Owen Taylor3473f882001-02-23 17:55:21 +000012656 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012657 ctxt->depth = depth;
12658
12659 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012660
Daniel Veillard561b7f82002-03-20 21:55:57 +000012661 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012662 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012663 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012664 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012665 }
12666 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012667 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012668 }
12669
12670 if (!ctxt->wellFormed) {
12671 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012672 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012673 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012674 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012675 } else {
12676 if (list != NULL) {
12677 xmlNodePtr cur;
12678
12679 /*
12680 * Return the newly created nodeset after unlinking it from
12681 * they pseudo parent.
12682 */
12683 cur = newDoc->children->children;
12684 *list = cur;
12685 while (cur != NULL) {
12686 cur->parent = NULL;
12687 cur = cur->next;
12688 }
12689 newDoc->children->children = NULL;
12690 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012691 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012692 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012693
12694 /*
12695 * Record in the parent context the number of entities replacement
12696 * done when parsing that reference.
12697 */
Daniel Veillard76d36452009-09-07 11:19:33 +020012698 if (oldctxt != NULL)
12699 oldctxt->nbentities += ctxt->nbentities;
12700
Daniel Veillard0161e632008-08-28 15:36:32 +000012701 /*
12702 * Also record the size of the entity parsed
12703 */
12704 if (ctxt->input != NULL) {
12705 oldctxt->sizeentities += ctxt->input->consumed;
12706 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12707 }
12708 /*
12709 * And record the last error if any
12710 */
12711 if (ctxt->lastError.code != XML_ERR_OK)
12712 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12713
Owen Taylor3473f882001-02-23 17:55:21 +000012714 if (sax != NULL)
12715 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012716 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12717 oldctxt->node_seq.length = ctxt->node_seq.length;
12718 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012719 ctxt->node_seq.maximum = 0;
12720 ctxt->node_seq.length = 0;
12721 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012722 xmlFreeParserCtxt(ctxt);
12723 newDoc->intSubset = NULL;
12724 newDoc->extSubset = NULL;
12725 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012726
Owen Taylor3473f882001-02-23 17:55:21 +000012727 return(ret);
12728}
12729
Daniel Veillard81273902003-09-30 00:43:48 +000012730#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012731/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012732 * xmlParseExternalEntity:
12733 * @doc: the document the chunk pertains to
12734 * @sax: the SAX handler bloc (possibly NULL)
12735 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12736 * @depth: Used for loop detection, use 0
12737 * @URL: the URL for the entity to load
12738 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012739 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012740 *
12741 * Parse an external general entity
12742 * An external general parsed entity is well-formed if it matches the
12743 * production labeled extParsedEnt.
12744 *
12745 * [78] extParsedEnt ::= TextDecl? content
12746 *
12747 * Returns 0 if the entity is well formed, -1 in case of args problem and
12748 * the parser error code otherwise
12749 */
12750
12751int
12752xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012753 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012754 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012755 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012756}
12757
12758/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012759 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012760 * @doc: the document the chunk pertains to
12761 * @sax: the SAX handler bloc (possibly NULL)
12762 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12763 * @depth: Used for loop detection, use 0
12764 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012765 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012766 *
12767 * Parse a well-balanced chunk of an XML document
12768 * called by the parser
12769 * The allowed sequence for the Well Balanced Chunk is the one defined by
12770 * the content production in the XML grammar:
12771 *
12772 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12773 *
12774 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12775 * the parser error code otherwise
12776 */
12777
12778int
12779xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012780 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012781 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12782 depth, string, lst, 0 );
12783}
Daniel Veillard81273902003-09-30 00:43:48 +000012784#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012785
12786/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012787 * xmlParseBalancedChunkMemoryInternal:
12788 * @oldctxt: the existing parsing context
12789 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12790 * @user_data: the user data field for the parser context
12791 * @lst: the return value for the set of parsed nodes
12792 *
12793 *
12794 * Parse a well-balanced chunk of an XML document
12795 * called by the parser
12796 * The allowed sequence for the Well Balanced Chunk is the one defined by
12797 * the content production in the XML grammar:
12798 *
12799 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12800 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012801 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12802 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012803 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012804 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012805 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012806 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012807static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012808xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12809 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12810 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012811 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012812 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012813 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012814 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012815 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012816 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012817 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012818#ifdef SAX2
12819 int i;
12820#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012821
Daniel Veillard0161e632008-08-28 15:36:32 +000012822 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12823 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012824 return(XML_ERR_ENTITY_LOOP);
12825 }
12826
12827
12828 if (lst != NULL)
12829 *lst = NULL;
12830 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012831 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012832
12833 size = xmlStrlen(string);
12834
12835 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012836 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012837 if (user_data != NULL)
12838 ctxt->userData = user_data;
12839 else
12840 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012841 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12842 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012843 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12844 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12845 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012846
Daniel Veillard74eaec12009-08-26 15:57:20 +020012847#ifdef SAX2
12848 /* propagate namespaces down the entity */
12849 for (i = 0;i < oldctxt->nsNr;i += 2) {
12850 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12851 }
12852#endif
12853
Daniel Veillard328f48c2002-11-15 15:24:34 +000012854 oldsax = ctxt->sax;
12855 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012856 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012857 ctxt->replaceEntities = oldctxt->replaceEntities;
12858 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012859
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012860 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012861 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012862 newDoc = xmlNewDoc(BAD_CAST "1.0");
12863 if (newDoc == NULL) {
12864 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012865 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012866 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012867 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012868 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012869 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012870 newDoc->dict = ctxt->dict;
12871 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012872 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012873 } else {
12874 ctxt->myDoc = oldctxt->myDoc;
12875 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012876 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012877 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012878 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12879 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012880 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012881 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012882 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012883 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012884 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012885 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012886 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012887 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012888 ctxt->myDoc->children = NULL;
12889 ctxt->myDoc->last = NULL;
12890 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012891 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012892 ctxt->instate = XML_PARSER_CONTENT;
12893 ctxt->depth = oldctxt->depth + 1;
12894
Daniel Veillard328f48c2002-11-15 15:24:34 +000012895 ctxt->validate = 0;
12896 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012897 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12898 /*
12899 * ID/IDREF registration will be done in xmlValidateElement below
12900 */
12901 ctxt->loadsubset |= XML_SKIP_IDS;
12902 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012903 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012904 ctxt->attsDefault = oldctxt->attsDefault;
12905 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012906
Daniel Veillard68e9e742002-11-16 15:35:11 +000012907 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012908 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012909 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012910 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012911 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012912 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012913 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012914 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012915 }
12916
12917 if (!ctxt->wellFormed) {
12918 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012919 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012920 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012921 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012922 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012923 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012924 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012925
William M. Brack7b9154b2003-09-27 19:23:50 +000012926 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012927 xmlNodePtr cur;
12928
12929 /*
12930 * Return the newly created nodeset after unlinking it from
12931 * they pseudo parent.
12932 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012933 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012934 *lst = cur;
12935 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012936#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012937 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12938 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12939 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012940 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12941 oldctxt->myDoc, cur);
12942 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012943#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012944 cur->parent = NULL;
12945 cur = cur->next;
12946 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012947 ctxt->myDoc->children->children = NULL;
12948 }
12949 if (ctxt->myDoc != NULL) {
12950 xmlFreeNode(ctxt->myDoc->children);
12951 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012952 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012953 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012954
12955 /*
12956 * Record in the parent context the number of entities replacement
12957 * done when parsing that reference.
12958 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020012959 if (oldctxt != NULL)
12960 oldctxt->nbentities += ctxt->nbentities;
12961
Daniel Veillard0161e632008-08-28 15:36:32 +000012962 /*
12963 * Also record the last error if any
12964 */
12965 if (ctxt->lastError.code != XML_ERR_OK)
12966 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12967
Daniel Veillard328f48c2002-11-15 15:24:34 +000012968 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012969 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012970 ctxt->attsDefault = NULL;
12971 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012972 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012973 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012974 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012975 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012976
Daniel Veillard328f48c2002-11-15 15:24:34 +000012977 return(ret);
12978}
12979
Daniel Veillard29b17482004-08-16 00:39:03 +000012980/**
12981 * xmlParseInNodeContext:
12982 * @node: the context node
12983 * @data: the input string
12984 * @datalen: the input string length in bytes
12985 * @options: a combination of xmlParserOption
12986 * @lst: the return value for the set of parsed nodes
12987 *
12988 * Parse a well-balanced chunk of an XML document
12989 * within the context (DTD, namespaces, etc ...) of the given node.
12990 *
12991 * The allowed sequence for the data is a Well Balanced Chunk defined by
12992 * the content production in the XML grammar:
12993 *
12994 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12995 *
12996 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12997 * error code otherwise
12998 */
12999xmlParserErrors
13000xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13001 int options, xmlNodePtr *lst) {
13002#ifdef SAX2
13003 xmlParserCtxtPtr ctxt;
13004 xmlDocPtr doc = NULL;
13005 xmlNodePtr fake, cur;
13006 int nsnr = 0;
13007
13008 xmlParserErrors ret = XML_ERR_OK;
13009
13010 /*
13011 * check all input parameters, grab the document
13012 */
13013 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13014 return(XML_ERR_INTERNAL_ERROR);
13015 switch (node->type) {
13016 case XML_ELEMENT_NODE:
13017 case XML_ATTRIBUTE_NODE:
13018 case XML_TEXT_NODE:
13019 case XML_CDATA_SECTION_NODE:
13020 case XML_ENTITY_REF_NODE:
13021 case XML_PI_NODE:
13022 case XML_COMMENT_NODE:
13023 case XML_DOCUMENT_NODE:
13024 case XML_HTML_DOCUMENT_NODE:
13025 break;
13026 default:
13027 return(XML_ERR_INTERNAL_ERROR);
13028
13029 }
13030 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13031 (node->type != XML_DOCUMENT_NODE) &&
13032 (node->type != XML_HTML_DOCUMENT_NODE))
13033 node = node->parent;
13034 if (node == NULL)
13035 return(XML_ERR_INTERNAL_ERROR);
13036 if (node->type == XML_ELEMENT_NODE)
13037 doc = node->doc;
13038 else
13039 doc = (xmlDocPtr) node;
13040 if (doc == NULL)
13041 return(XML_ERR_INTERNAL_ERROR);
13042
13043 /*
13044 * allocate a context and set-up everything not related to the
13045 * node position in the tree
13046 */
13047 if (doc->type == XML_DOCUMENT_NODE)
13048 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13049#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013050 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013051 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013052 /*
13053 * When parsing in context, it makes no sense to add implied
13054 * elements like html/body/etc...
13055 */
13056 options |= HTML_PARSE_NOIMPLIED;
13057 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013058#endif
13059 else
13060 return(XML_ERR_INTERNAL_ERROR);
13061
13062 if (ctxt == NULL)
13063 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013064
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013065 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013066 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13067 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13068 * we must wait until the last moment to free the original one.
13069 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013070 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013071 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013072 xmlDictFree(ctxt->dict);
13073 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013074 } else
13075 options |= XML_PARSE_NODICT;
13076
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013077 if (doc->encoding != NULL) {
13078 xmlCharEncodingHandlerPtr hdlr;
13079
13080 if (ctxt->encoding != NULL)
13081 xmlFree((xmlChar *) ctxt->encoding);
13082 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13083
13084 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13085 if (hdlr != NULL) {
13086 xmlSwitchToEncoding(ctxt, hdlr);
13087 } else {
13088 return(XML_ERR_UNSUPPORTED_ENCODING);
13089 }
13090 }
13091
Daniel Veillard37334572008-07-31 08:20:02 +000013092 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013093 xmlDetectSAX2(ctxt);
13094 ctxt->myDoc = doc;
13095
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013096 fake = xmlNewComment(NULL);
13097 if (fake == NULL) {
13098 xmlFreeParserCtxt(ctxt);
13099 return(XML_ERR_NO_MEMORY);
13100 }
13101 xmlAddChild(node, fake);
13102
Daniel Veillard29b17482004-08-16 00:39:03 +000013103 if (node->type == XML_ELEMENT_NODE) {
13104 nodePush(ctxt, node);
13105 /*
13106 * initialize the SAX2 namespaces stack
13107 */
13108 cur = node;
13109 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13110 xmlNsPtr ns = cur->nsDef;
13111 const xmlChar *iprefix, *ihref;
13112
13113 while (ns != NULL) {
13114 if (ctxt->dict) {
13115 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13116 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13117 } else {
13118 iprefix = ns->prefix;
13119 ihref = ns->href;
13120 }
13121
13122 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13123 nsPush(ctxt, iprefix, ihref);
13124 nsnr++;
13125 }
13126 ns = ns->next;
13127 }
13128 cur = cur->parent;
13129 }
13130 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013131 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013132
13133 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13134 /*
13135 * ID/IDREF registration will be done in xmlValidateElement below
13136 */
13137 ctxt->loadsubset |= XML_SKIP_IDS;
13138 }
13139
Daniel Veillard499cc922006-01-18 17:22:35 +000013140#ifdef LIBXML_HTML_ENABLED
13141 if (doc->type == XML_HTML_DOCUMENT_NODE)
13142 __htmlParseContent(ctxt);
13143 else
13144#endif
13145 xmlParseContent(ctxt);
13146
Daniel Veillard29b17482004-08-16 00:39:03 +000013147 nsPop(ctxt, nsnr);
13148 if ((RAW == '<') && (NXT(1) == '/')) {
13149 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13150 } else if (RAW != 0) {
13151 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13152 }
13153 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13154 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13155 ctxt->wellFormed = 0;
13156 }
13157
13158 if (!ctxt->wellFormed) {
13159 if (ctxt->errNo == 0)
13160 ret = XML_ERR_INTERNAL_ERROR;
13161 else
13162 ret = (xmlParserErrors)ctxt->errNo;
13163 } else {
13164 ret = XML_ERR_OK;
13165 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013166
Daniel Veillard29b17482004-08-16 00:39:03 +000013167 /*
13168 * Return the newly created nodeset after unlinking it from
13169 * the pseudo sibling.
13170 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013171
Daniel Veillard29b17482004-08-16 00:39:03 +000013172 cur = fake->next;
13173 fake->next = NULL;
13174 node->last = fake;
13175
13176 if (cur != NULL) {
13177 cur->prev = NULL;
13178 }
13179
13180 *lst = cur;
13181
13182 while (cur != NULL) {
13183 cur->parent = NULL;
13184 cur = cur->next;
13185 }
13186
13187 xmlUnlinkNode(fake);
13188 xmlFreeNode(fake);
13189
13190
13191 if (ret != XML_ERR_OK) {
13192 xmlFreeNodeList(*lst);
13193 *lst = NULL;
13194 }
William M. Brackc3f81342004-10-03 01:22:44 +000013195
William M. Brackb7b54de2004-10-06 16:38:01 +000013196 if (doc->dict != NULL)
13197 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013198 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013199
Daniel Veillard29b17482004-08-16 00:39:03 +000013200 return(ret);
13201#else /* !SAX2 */
13202 return(XML_ERR_INTERNAL_ERROR);
13203#endif
13204}
13205
Daniel Veillard81273902003-09-30 00:43:48 +000013206#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013207/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013208 * xmlParseBalancedChunkMemoryRecover:
13209 * @doc: the document the chunk pertains to
13210 * @sax: the SAX handler bloc (possibly NULL)
13211 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13212 * @depth: Used for loop detection, use 0
13213 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13214 * @lst: the return value for the set of parsed nodes
13215 * @recover: return nodes even if the data is broken (use 0)
13216 *
13217 *
13218 * Parse a well-balanced chunk of an XML document
13219 * called by the parser
13220 * The allowed sequence for the Well Balanced Chunk is the one defined by
13221 * the content production in the XML grammar:
13222 *
13223 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13224 *
13225 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13226 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013227 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013228 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013229 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13230 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013231 */
13232int
13233xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013234 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013235 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013236 xmlParserCtxtPtr ctxt;
13237 xmlDocPtr newDoc;
13238 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013239 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013240 int size;
13241 int ret = 0;
13242
Daniel Veillard0161e632008-08-28 15:36:32 +000013243 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013244 return(XML_ERR_ENTITY_LOOP);
13245 }
13246
13247
Daniel Veillardcda96922001-08-21 10:56:31 +000013248 if (lst != NULL)
13249 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013250 if (string == NULL)
13251 return(-1);
13252
13253 size = xmlStrlen(string);
13254
13255 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13256 if (ctxt == NULL) return(-1);
13257 ctxt->userData = ctxt;
13258 if (sax != NULL) {
13259 oldsax = ctxt->sax;
13260 ctxt->sax = sax;
13261 if (user_data != NULL)
13262 ctxt->userData = user_data;
13263 }
13264 newDoc = xmlNewDoc(BAD_CAST "1.0");
13265 if (newDoc == NULL) {
13266 xmlFreeParserCtxt(ctxt);
13267 return(-1);
13268 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013269 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013270 if ((doc != NULL) && (doc->dict != NULL)) {
13271 xmlDictFree(ctxt->dict);
13272 ctxt->dict = doc->dict;
13273 xmlDictReference(ctxt->dict);
13274 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13275 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13276 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13277 ctxt->dictNames = 1;
13278 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013279 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013280 }
Owen Taylor3473f882001-02-23 17:55:21 +000013281 if (doc != NULL) {
13282 newDoc->intSubset = doc->intSubset;
13283 newDoc->extSubset = doc->extSubset;
13284 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013285 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13286 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013287 if (sax != NULL)
13288 ctxt->sax = oldsax;
13289 xmlFreeParserCtxt(ctxt);
13290 newDoc->intSubset = NULL;
13291 newDoc->extSubset = NULL;
13292 xmlFreeDoc(newDoc);
13293 return(-1);
13294 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013295 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13296 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013297 if (doc == NULL) {
13298 ctxt->myDoc = newDoc;
13299 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013300 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013301 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013302 /* Ensure that doc has XML spec namespace */
13303 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13304 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013305 }
13306 ctxt->instate = XML_PARSER_CONTENT;
13307 ctxt->depth = depth;
13308
13309 /*
13310 * Doing validity checking on chunk doesn't make sense
13311 */
13312 ctxt->validate = 0;
13313 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013314 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013315
Daniel Veillardb39bc392002-10-26 19:29:51 +000013316 if ( doc != NULL ){
13317 content = doc->children;
13318 doc->children = NULL;
13319 xmlParseContent(ctxt);
13320 doc->children = content;
13321 }
13322 else {
13323 xmlParseContent(ctxt);
13324 }
Owen Taylor3473f882001-02-23 17:55:21 +000013325 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013326 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013327 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013328 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013329 }
13330 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013331 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013332 }
13333
13334 if (!ctxt->wellFormed) {
13335 if (ctxt->errNo == 0)
13336 ret = 1;
13337 else
13338 ret = ctxt->errNo;
13339 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013340 ret = 0;
13341 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013342
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013343 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13344 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013345
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013346 /*
13347 * Return the newly created nodeset after unlinking it from
13348 * they pseudo parent.
13349 */
13350 cur = newDoc->children->children;
13351 *lst = cur;
13352 while (cur != NULL) {
13353 xmlSetTreeDoc(cur, doc);
13354 cur->parent = NULL;
13355 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013356 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013357 newDoc->children->children = NULL;
13358 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013359
13360 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013361 ctxt->sax = oldsax;
13362 xmlFreeParserCtxt(ctxt);
13363 newDoc->intSubset = NULL;
13364 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013365 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013366 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013367
Owen Taylor3473f882001-02-23 17:55:21 +000013368 return(ret);
13369}
13370
13371/**
13372 * xmlSAXParseEntity:
13373 * @sax: the SAX handler block
13374 * @filename: the filename
13375 *
13376 * parse an XML external entity out of context and build a tree.
13377 * It use the given SAX function block to handle the parsing callback.
13378 * If sax is NULL, fallback to the default DOM tree building routines.
13379 *
13380 * [78] extParsedEnt ::= TextDecl? content
13381 *
13382 * This correspond to a "Well Balanced" chunk
13383 *
13384 * Returns the resulting document tree
13385 */
13386
13387xmlDocPtr
13388xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13389 xmlDocPtr ret;
13390 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013391
13392 ctxt = xmlCreateFileParserCtxt(filename);
13393 if (ctxt == NULL) {
13394 return(NULL);
13395 }
13396 if (sax != NULL) {
13397 if (ctxt->sax != NULL)
13398 xmlFree(ctxt->sax);
13399 ctxt->sax = sax;
13400 ctxt->userData = NULL;
13401 }
13402
Owen Taylor3473f882001-02-23 17:55:21 +000013403 xmlParseExtParsedEnt(ctxt);
13404
13405 if (ctxt->wellFormed)
13406 ret = ctxt->myDoc;
13407 else {
13408 ret = NULL;
13409 xmlFreeDoc(ctxt->myDoc);
13410 ctxt->myDoc = NULL;
13411 }
13412 if (sax != NULL)
13413 ctxt->sax = NULL;
13414 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013415
Owen Taylor3473f882001-02-23 17:55:21 +000013416 return(ret);
13417}
13418
13419/**
13420 * xmlParseEntity:
13421 * @filename: the filename
13422 *
13423 * parse an XML external entity out of context and build a tree.
13424 *
13425 * [78] extParsedEnt ::= TextDecl? content
13426 *
13427 * This correspond to a "Well Balanced" chunk
13428 *
13429 * Returns the resulting document tree
13430 */
13431
13432xmlDocPtr
13433xmlParseEntity(const char *filename) {
13434 return(xmlSAXParseEntity(NULL, filename));
13435}
Daniel Veillard81273902003-09-30 00:43:48 +000013436#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013437
13438/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013439 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013440 * @URL: the entity URL
13441 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013442 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013443 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013444 *
13445 * Create a parser context for an external entity
13446 * Automatic support for ZLIB/Compress compressed document is provided
13447 * by default if found at compile-time.
13448 *
13449 * Returns the new parser context or NULL
13450 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013451static xmlParserCtxtPtr
13452xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13453 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013454 xmlParserCtxtPtr ctxt;
13455 xmlParserInputPtr inputStream;
13456 char *directory = NULL;
13457 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013458
Owen Taylor3473f882001-02-23 17:55:21 +000013459 ctxt = xmlNewParserCtxt();
13460 if (ctxt == NULL) {
13461 return(NULL);
13462 }
13463
Daniel Veillard48247b42009-07-10 16:12:46 +020013464 if (pctx != NULL) {
13465 ctxt->options = pctx->options;
13466 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013467 }
13468
Owen Taylor3473f882001-02-23 17:55:21 +000013469 uri = xmlBuildURI(URL, base);
13470
13471 if (uri == NULL) {
13472 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13473 if (inputStream == NULL) {
13474 xmlFreeParserCtxt(ctxt);
13475 return(NULL);
13476 }
13477
13478 inputPush(ctxt, inputStream);
13479
13480 if ((ctxt->directory == NULL) && (directory == NULL))
13481 directory = xmlParserGetDirectory((char *)URL);
13482 if ((ctxt->directory == NULL) && (directory != NULL))
13483 ctxt->directory = directory;
13484 } else {
13485 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13486 if (inputStream == NULL) {
13487 xmlFree(uri);
13488 xmlFreeParserCtxt(ctxt);
13489 return(NULL);
13490 }
13491
13492 inputPush(ctxt, inputStream);
13493
13494 if ((ctxt->directory == NULL) && (directory == NULL))
13495 directory = xmlParserGetDirectory((char *)uri);
13496 if ((ctxt->directory == NULL) && (directory != NULL))
13497 ctxt->directory = directory;
13498 xmlFree(uri);
13499 }
Owen Taylor3473f882001-02-23 17:55:21 +000013500 return(ctxt);
13501}
13502
Rob Richards9c0aa472009-03-26 18:10:19 +000013503/**
13504 * xmlCreateEntityParserCtxt:
13505 * @URL: the entity URL
13506 * @ID: the entity PUBLIC ID
13507 * @base: a possible base for the target URI
13508 *
13509 * Create a parser context for an external entity
13510 * Automatic support for ZLIB/Compress compressed document is provided
13511 * by default if found at compile-time.
13512 *
13513 * Returns the new parser context or NULL
13514 */
13515xmlParserCtxtPtr
13516xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13517 const xmlChar *base) {
13518 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13519
13520}
13521
Owen Taylor3473f882001-02-23 17:55:21 +000013522/************************************************************************
13523 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013524 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013525 * *
13526 ************************************************************************/
13527
13528/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013529 * xmlCreateURLParserCtxt:
13530 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013531 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013532 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013533 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013534 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013535 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013536 *
13537 * Returns the new parser context or NULL
13538 */
13539xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013540xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013541{
13542 xmlParserCtxtPtr ctxt;
13543 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013544 char *directory = NULL;
13545
Owen Taylor3473f882001-02-23 17:55:21 +000013546 ctxt = xmlNewParserCtxt();
13547 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013548 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013549 return(NULL);
13550 }
13551
Daniel Veillarddf292f72005-01-16 19:00:15 +000013552 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013553 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013554 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013555
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013556 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013557 if (inputStream == NULL) {
13558 xmlFreeParserCtxt(ctxt);
13559 return(NULL);
13560 }
13561
Owen Taylor3473f882001-02-23 17:55:21 +000013562 inputPush(ctxt, inputStream);
13563 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013564 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013565 if ((ctxt->directory == NULL) && (directory != NULL))
13566 ctxt->directory = directory;
13567
13568 return(ctxt);
13569}
13570
Daniel Veillard61b93382003-11-03 14:28:31 +000013571/**
13572 * xmlCreateFileParserCtxt:
13573 * @filename: the filename
13574 *
13575 * Create a parser context for a file content.
13576 * Automatic support for ZLIB/Compress compressed document is provided
13577 * by default if found at compile-time.
13578 *
13579 * Returns the new parser context or NULL
13580 */
13581xmlParserCtxtPtr
13582xmlCreateFileParserCtxt(const char *filename)
13583{
13584 return(xmlCreateURLParserCtxt(filename, 0));
13585}
13586
Daniel Veillard81273902003-09-30 00:43:48 +000013587#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013588/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013589 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013590 * @sax: the SAX handler block
13591 * @filename: the filename
13592 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13593 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013594 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013595 *
13596 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13597 * compressed document is provided by default if found at compile-time.
13598 * It use the given SAX function block to handle the parsing callback.
13599 * If sax is NULL, fallback to the default DOM tree building routines.
13600 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013601 * User data (void *) is stored within the parser context in the
13602 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013603 *
Owen Taylor3473f882001-02-23 17:55:21 +000013604 * Returns the resulting document tree
13605 */
13606
13607xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013608xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13609 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013610 xmlDocPtr ret;
13611 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013612
Daniel Veillard635ef722001-10-29 11:48:19 +000013613 xmlInitParser();
13614
Owen Taylor3473f882001-02-23 17:55:21 +000013615 ctxt = xmlCreateFileParserCtxt(filename);
13616 if (ctxt == NULL) {
13617 return(NULL);
13618 }
13619 if (sax != NULL) {
13620 if (ctxt->sax != NULL)
13621 xmlFree(ctxt->sax);
13622 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013623 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013624 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013625 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013626 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013627 }
Owen Taylor3473f882001-02-23 17:55:21 +000013628
Daniel Veillard37d2d162008-03-14 10:54:00 +000013629 if (ctxt->directory == NULL)
13630 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013631
Daniel Veillarddad3f682002-11-17 16:47:27 +000013632 ctxt->recovery = recovery;
13633
Owen Taylor3473f882001-02-23 17:55:21 +000013634 xmlParseDocument(ctxt);
13635
William M. Brackc07329e2003-09-08 01:57:30 +000013636 if ((ctxt->wellFormed) || recovery) {
13637 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013638 if (ret != NULL) {
13639 if (ctxt->input->buf->compressed > 0)
13640 ret->compression = 9;
13641 else
13642 ret->compression = ctxt->input->buf->compressed;
13643 }
William M. Brackc07329e2003-09-08 01:57:30 +000013644 }
Owen Taylor3473f882001-02-23 17:55:21 +000013645 else {
13646 ret = NULL;
13647 xmlFreeDoc(ctxt->myDoc);
13648 ctxt->myDoc = NULL;
13649 }
13650 if (sax != NULL)
13651 ctxt->sax = NULL;
13652 xmlFreeParserCtxt(ctxt);
13653
13654 return(ret);
13655}
13656
13657/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013658 * xmlSAXParseFile:
13659 * @sax: the SAX handler block
13660 * @filename: the filename
13661 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13662 * documents
13663 *
13664 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13665 * compressed document is provided by default if found at compile-time.
13666 * It use the given SAX function block to handle the parsing callback.
13667 * If sax is NULL, fallback to the default DOM tree building routines.
13668 *
13669 * Returns the resulting document tree
13670 */
13671
13672xmlDocPtr
13673xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13674 int recovery) {
13675 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13676}
13677
13678/**
Owen Taylor3473f882001-02-23 17:55:21 +000013679 * xmlRecoverDoc:
13680 * @cur: a pointer to an array of xmlChar
13681 *
13682 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013683 * In the case the document is not Well Formed, a attempt to build a
13684 * tree is tried anyway
13685 *
13686 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013687 */
13688
13689xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013690xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013691 return(xmlSAXParseDoc(NULL, cur, 1));
13692}
13693
13694/**
13695 * xmlParseFile:
13696 * @filename: the filename
13697 *
13698 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13699 * compressed document is provided by default if found at compile-time.
13700 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013701 * Returns the resulting document tree if the file was wellformed,
13702 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013703 */
13704
13705xmlDocPtr
13706xmlParseFile(const char *filename) {
13707 return(xmlSAXParseFile(NULL, filename, 0));
13708}
13709
13710/**
13711 * xmlRecoverFile:
13712 * @filename: the filename
13713 *
13714 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13715 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013716 * In the case the document is not Well Formed, it attempts to build
13717 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013718 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013719 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013720 */
13721
13722xmlDocPtr
13723xmlRecoverFile(const char *filename) {
13724 return(xmlSAXParseFile(NULL, filename, 1));
13725}
13726
13727
13728/**
13729 * xmlSetupParserForBuffer:
13730 * @ctxt: an XML parser context
13731 * @buffer: a xmlChar * buffer
13732 * @filename: a file name
13733 *
13734 * Setup the parser context to parse a new buffer; Clears any prior
13735 * contents from the parser context. The buffer parameter must not be
13736 * NULL, but the filename parameter can be
13737 */
13738void
13739xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13740 const char* filename)
13741{
13742 xmlParserInputPtr input;
13743
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013744 if ((ctxt == NULL) || (buffer == NULL))
13745 return;
13746
Owen Taylor3473f882001-02-23 17:55:21 +000013747 input = xmlNewInputStream(ctxt);
13748 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013749 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013750 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013751 return;
13752 }
13753
13754 xmlClearParserCtxt(ctxt);
13755 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013756 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013757 input->base = buffer;
13758 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013759 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013760 inputPush(ctxt, input);
13761}
13762
13763/**
13764 * xmlSAXUserParseFile:
13765 * @sax: a SAX handler
13766 * @user_data: The user data returned on SAX callbacks
13767 * @filename: a file name
13768 *
13769 * parse an XML file and call the given SAX handler routines.
13770 * Automatic support for ZLIB/Compress compressed document is provided
13771 *
13772 * Returns 0 in case of success or a error number otherwise
13773 */
13774int
13775xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13776 const char *filename) {
13777 int ret = 0;
13778 xmlParserCtxtPtr ctxt;
13779
13780 ctxt = xmlCreateFileParserCtxt(filename);
13781 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013782 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013783 xmlFree(ctxt->sax);
13784 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013785 xmlDetectSAX2(ctxt);
13786
Owen Taylor3473f882001-02-23 17:55:21 +000013787 if (user_data != NULL)
13788 ctxt->userData = user_data;
13789
13790 xmlParseDocument(ctxt);
13791
13792 if (ctxt->wellFormed)
13793 ret = 0;
13794 else {
13795 if (ctxt->errNo != 0)
13796 ret = ctxt->errNo;
13797 else
13798 ret = -1;
13799 }
13800 if (sax != NULL)
13801 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013802 if (ctxt->myDoc != NULL) {
13803 xmlFreeDoc(ctxt->myDoc);
13804 ctxt->myDoc = NULL;
13805 }
Owen Taylor3473f882001-02-23 17:55:21 +000013806 xmlFreeParserCtxt(ctxt);
13807
13808 return ret;
13809}
Daniel Veillard81273902003-09-30 00:43:48 +000013810#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013811
13812/************************************************************************
13813 * *
13814 * Front ends when parsing from memory *
13815 * *
13816 ************************************************************************/
13817
13818/**
13819 * xmlCreateMemoryParserCtxt:
13820 * @buffer: a pointer to a char array
13821 * @size: the size of the array
13822 *
13823 * Create a parser context for an XML in-memory document.
13824 *
13825 * Returns the new parser context or NULL
13826 */
13827xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013828xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013829 xmlParserCtxtPtr ctxt;
13830 xmlParserInputPtr input;
13831 xmlParserInputBufferPtr buf;
13832
13833 if (buffer == NULL)
13834 return(NULL);
13835 if (size <= 0)
13836 return(NULL);
13837
13838 ctxt = xmlNewParserCtxt();
13839 if (ctxt == NULL)
13840 return(NULL);
13841
Daniel Veillard53350552003-09-18 13:35:51 +000013842 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013843 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013844 if (buf == NULL) {
13845 xmlFreeParserCtxt(ctxt);
13846 return(NULL);
13847 }
Owen Taylor3473f882001-02-23 17:55:21 +000013848
13849 input = xmlNewInputStream(ctxt);
13850 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013851 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013852 xmlFreeParserCtxt(ctxt);
13853 return(NULL);
13854 }
13855
13856 input->filename = NULL;
13857 input->buf = buf;
13858 input->base = input->buf->buffer->content;
13859 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013860 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013861
13862 inputPush(ctxt, input);
13863 return(ctxt);
13864}
13865
Daniel Veillard81273902003-09-30 00:43:48 +000013866#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013867/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013868 * xmlSAXParseMemoryWithData:
13869 * @sax: the SAX handler block
13870 * @buffer: an pointer to a char array
13871 * @size: the size of the array
13872 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13873 * documents
13874 * @data: the userdata
13875 *
13876 * parse an XML in-memory block and use the given SAX function block
13877 * to handle the parsing callback. If sax is NULL, fallback to the default
13878 * DOM tree building routines.
13879 *
13880 * User data (void *) is stored within the parser context in the
13881 * context's _private member, so it is available nearly everywhere in libxml
13882 *
13883 * Returns the resulting document tree
13884 */
13885
13886xmlDocPtr
13887xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13888 int size, int recovery, void *data) {
13889 xmlDocPtr ret;
13890 xmlParserCtxtPtr ctxt;
13891
Daniel Veillardab2a7632009-07-09 08:45:03 +020013892 xmlInitParser();
13893
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013894 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13895 if (ctxt == NULL) return(NULL);
13896 if (sax != NULL) {
13897 if (ctxt->sax != NULL)
13898 xmlFree(ctxt->sax);
13899 ctxt->sax = sax;
13900 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013901 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013902 if (data!=NULL) {
13903 ctxt->_private=data;
13904 }
13905
Daniel Veillardadba5f12003-04-04 16:09:01 +000013906 ctxt->recovery = recovery;
13907
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013908 xmlParseDocument(ctxt);
13909
13910 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13911 else {
13912 ret = NULL;
13913 xmlFreeDoc(ctxt->myDoc);
13914 ctxt->myDoc = NULL;
13915 }
13916 if (sax != NULL)
13917 ctxt->sax = NULL;
13918 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013919
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013920 return(ret);
13921}
13922
13923/**
Owen Taylor3473f882001-02-23 17:55:21 +000013924 * xmlSAXParseMemory:
13925 * @sax: the SAX handler block
13926 * @buffer: an pointer to a char array
13927 * @size: the size of the array
13928 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13929 * documents
13930 *
13931 * parse an XML in-memory block and use the given SAX function block
13932 * to handle the parsing callback. If sax is NULL, fallback to the default
13933 * DOM tree building routines.
13934 *
13935 * Returns the resulting document tree
13936 */
13937xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013938xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13939 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013940 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013941}
13942
13943/**
13944 * xmlParseMemory:
13945 * @buffer: an pointer to a char array
13946 * @size: the size of the array
13947 *
13948 * parse an XML in-memory block and build a tree.
13949 *
13950 * Returns the resulting document tree
13951 */
13952
Daniel Veillard50822cb2001-07-26 20:05:51 +000013953xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013954 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13955}
13956
13957/**
13958 * xmlRecoverMemory:
13959 * @buffer: an pointer to a char array
13960 * @size: the size of the array
13961 *
13962 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013963 * In the case the document is not Well Formed, an attempt to
13964 * build a tree is tried anyway
13965 *
13966 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013967 */
13968
Daniel Veillard50822cb2001-07-26 20:05:51 +000013969xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013970 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13971}
13972
13973/**
13974 * xmlSAXUserParseMemory:
13975 * @sax: a SAX handler
13976 * @user_data: The user data returned on SAX callbacks
13977 * @buffer: an in-memory XML document input
13978 * @size: the length of the XML document in bytes
13979 *
13980 * A better SAX parsing routine.
13981 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013982 *
Owen Taylor3473f882001-02-23 17:55:21 +000013983 * Returns 0 in case of success or a error number otherwise
13984 */
13985int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013986 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013987 int ret = 0;
13988 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013989
13990 xmlInitParser();
13991
Owen Taylor3473f882001-02-23 17:55:21 +000013992 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13993 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013994 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13995 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013996 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013997 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013998
Daniel Veillard30211a02001-04-26 09:33:18 +000013999 if (user_data != NULL)
14000 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014001
Owen Taylor3473f882001-02-23 17:55:21 +000014002 xmlParseDocument(ctxt);
14003
14004 if (ctxt->wellFormed)
14005 ret = 0;
14006 else {
14007 if (ctxt->errNo != 0)
14008 ret = ctxt->errNo;
14009 else
14010 ret = -1;
14011 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014012 if (sax != NULL)
14013 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014014 if (ctxt->myDoc != NULL) {
14015 xmlFreeDoc(ctxt->myDoc);
14016 ctxt->myDoc = NULL;
14017 }
Owen Taylor3473f882001-02-23 17:55:21 +000014018 xmlFreeParserCtxt(ctxt);
14019
14020 return ret;
14021}
Daniel Veillard81273902003-09-30 00:43:48 +000014022#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014023
14024/**
14025 * xmlCreateDocParserCtxt:
14026 * @cur: a pointer to an array of xmlChar
14027 *
14028 * Creates a parser context for an XML in-memory document.
14029 *
14030 * Returns the new parser context or NULL
14031 */
14032xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014033xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014034 int len;
14035
14036 if (cur == NULL)
14037 return(NULL);
14038 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014039 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014040}
14041
Daniel Veillard81273902003-09-30 00:43:48 +000014042#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014043/**
14044 * xmlSAXParseDoc:
14045 * @sax: the SAX handler block
14046 * @cur: a pointer to an array of xmlChar
14047 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14048 * documents
14049 *
14050 * parse an XML in-memory document and build a tree.
14051 * It use the given SAX function block to handle the parsing callback.
14052 * If sax is NULL, fallback to the default DOM tree building routines.
14053 *
14054 * Returns the resulting document tree
14055 */
14056
14057xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014058xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014059 xmlDocPtr ret;
14060 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014061 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014062
Daniel Veillard38936062004-11-04 17:45:11 +000014063 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014064
14065
14066 ctxt = xmlCreateDocParserCtxt(cur);
14067 if (ctxt == NULL) return(NULL);
14068 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014069 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014070 ctxt->sax = sax;
14071 ctxt->userData = NULL;
14072 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014073 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014074
14075 xmlParseDocument(ctxt);
14076 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14077 else {
14078 ret = NULL;
14079 xmlFreeDoc(ctxt->myDoc);
14080 ctxt->myDoc = NULL;
14081 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014082 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014083 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014084 xmlFreeParserCtxt(ctxt);
14085
14086 return(ret);
14087}
14088
14089/**
14090 * xmlParseDoc:
14091 * @cur: a pointer to an array of xmlChar
14092 *
14093 * parse an XML in-memory document and build a tree.
14094 *
14095 * Returns the resulting document tree
14096 */
14097
14098xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014099xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014100 return(xmlSAXParseDoc(NULL, cur, 0));
14101}
Daniel Veillard81273902003-09-30 00:43:48 +000014102#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014103
Daniel Veillard81273902003-09-30 00:43:48 +000014104#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014105/************************************************************************
14106 * *
14107 * Specific function to keep track of entities references *
14108 * and used by the XSLT debugger *
14109 * *
14110 ************************************************************************/
14111
14112static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14113
14114/**
14115 * xmlAddEntityReference:
14116 * @ent : A valid entity
14117 * @firstNode : A valid first node for children of entity
14118 * @lastNode : A valid last node of children entity
14119 *
14120 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14121 */
14122static void
14123xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14124 xmlNodePtr lastNode)
14125{
14126 if (xmlEntityRefFunc != NULL) {
14127 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14128 }
14129}
14130
14131
14132/**
14133 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014134 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014135 *
14136 * Set the function to call call back when a xml reference has been made
14137 */
14138void
14139xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14140{
14141 xmlEntityRefFunc = func;
14142}
Daniel Veillard81273902003-09-30 00:43:48 +000014143#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014144
14145/************************************************************************
14146 * *
14147 * Miscellaneous *
14148 * *
14149 ************************************************************************/
14150
14151#ifdef LIBXML_XPATH_ENABLED
14152#include <libxml/xpath.h>
14153#endif
14154
Daniel Veillardffa3c742005-07-21 13:24:09 +000014155extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014156static int xmlParserInitialized = 0;
14157
14158/**
14159 * xmlInitParser:
14160 *
14161 * Initialization function for the XML parser.
14162 * This is not reentrant. Call once before processing in case of
14163 * use in multithreaded programs.
14164 */
14165
14166void
14167xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014168 if (xmlParserInitialized != 0)
14169 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014170
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014171#ifdef LIBXML_THREAD_ENABLED
14172 __xmlGlobalInitMutexLock();
14173 if (xmlParserInitialized == 0) {
14174#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014175 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014176 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014177 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14178 (xmlGenericError == NULL))
14179 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014180 xmlInitMemory();
14181 xmlInitCharEncodingHandlers();
14182 xmlDefaultSAXHandlerInit();
14183 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014184#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014185 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014186#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014187#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014188 htmlInitAutoClose();
14189 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014190#endif
14191#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014192 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014193#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014194 xmlParserInitialized = 1;
14195#ifdef LIBXML_THREAD_ENABLED
14196 }
14197 __xmlGlobalInitMutexUnlock();
14198#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014199}
14200
14201/**
14202 * xmlCleanupParser:
14203 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014204 * This function name is somewhat misleading. It does not clean up
14205 * parser state, it cleans up memory allocated by the library itself.
14206 * It is a cleanup function for the XML library. It tries to reclaim all
14207 * related global memory allocated for the library processing.
14208 * It doesn't deallocate any document related memory. One should
14209 * call xmlCleanupParser() only when the process has finished using
14210 * the library and all XML/HTML documents built with it.
14211 * See also xmlInitParser() which has the opposite function of preparing
14212 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014213 *
14214 * WARNING: if your application is multithreaded or has plugin support
14215 * calling this may crash the application if another thread or
14216 * a plugin is still using libxml2. It's sometimes very hard to
14217 * guess if libxml2 is in use in the application, some libraries
14218 * or plugins may use it without notice. In case of doubt abstain
14219 * from calling this function or do it just before calling exit()
14220 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014221 */
14222
14223void
14224xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014225 if (!xmlParserInitialized)
14226 return;
14227
Owen Taylor3473f882001-02-23 17:55:21 +000014228 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014229#ifdef LIBXML_CATALOG_ENABLED
14230 xmlCatalogCleanup();
14231#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014232 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014233 xmlCleanupInputCallbacks();
14234#ifdef LIBXML_OUTPUT_ENABLED
14235 xmlCleanupOutputCallbacks();
14236#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014237#ifdef LIBXML_SCHEMAS_ENABLED
14238 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014239 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014240#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014241 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014242 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014243 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014244 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014245 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014246}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014247
14248/************************************************************************
14249 * *
14250 * New set (2.6.0) of simpler and more flexible APIs *
14251 * *
14252 ************************************************************************/
14253
14254/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014255 * DICT_FREE:
14256 * @str: a string
14257 *
14258 * Free a string if it is not owned by the "dict" dictionnary in the
14259 * current scope
14260 */
14261#define DICT_FREE(str) \
14262 if ((str) && ((!dict) || \
14263 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14264 xmlFree((char *)(str));
14265
14266/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014267 * xmlCtxtReset:
14268 * @ctxt: an XML parser context
14269 *
14270 * Reset a parser context
14271 */
14272void
14273xmlCtxtReset(xmlParserCtxtPtr ctxt)
14274{
14275 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014276 xmlDictPtr dict;
14277
14278 if (ctxt == NULL)
14279 return;
14280
14281 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014282
14283 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14284 xmlFreeInputStream(input);
14285 }
14286 ctxt->inputNr = 0;
14287 ctxt->input = NULL;
14288
14289 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014290 if (ctxt->spaceTab != NULL) {
14291 ctxt->spaceTab[0] = -1;
14292 ctxt->space = &ctxt->spaceTab[0];
14293 } else {
14294 ctxt->space = NULL;
14295 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014296
14297
14298 ctxt->nodeNr = 0;
14299 ctxt->node = NULL;
14300
14301 ctxt->nameNr = 0;
14302 ctxt->name = NULL;
14303
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014304 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014305 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014306 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014307 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014308 DICT_FREE(ctxt->directory);
14309 ctxt->directory = NULL;
14310 DICT_FREE(ctxt->extSubURI);
14311 ctxt->extSubURI = NULL;
14312 DICT_FREE(ctxt->extSubSystem);
14313 ctxt->extSubSystem = NULL;
14314 if (ctxt->myDoc != NULL)
14315 xmlFreeDoc(ctxt->myDoc);
14316 ctxt->myDoc = NULL;
14317
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014318 ctxt->standalone = -1;
14319 ctxt->hasExternalSubset = 0;
14320 ctxt->hasPErefs = 0;
14321 ctxt->html = 0;
14322 ctxt->external = 0;
14323 ctxt->instate = XML_PARSER_START;
14324 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014325
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014326 ctxt->wellFormed = 1;
14327 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014328 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014329 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014330#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014331 ctxt->vctxt.userData = ctxt;
14332 ctxt->vctxt.error = xmlParserValidityError;
14333 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014334#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014335 ctxt->record_info = 0;
14336 ctxt->nbChars = 0;
14337 ctxt->checkIndex = 0;
14338 ctxt->inSubset = 0;
14339 ctxt->errNo = XML_ERR_OK;
14340 ctxt->depth = 0;
14341 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14342 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014343 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014344 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014345 xmlInitNodeInfoSeq(&ctxt->node_seq);
14346
14347 if (ctxt->attsDefault != NULL) {
14348 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14349 ctxt->attsDefault = NULL;
14350 }
14351 if (ctxt->attsSpecial != NULL) {
14352 xmlHashFree(ctxt->attsSpecial, NULL);
14353 ctxt->attsSpecial = NULL;
14354 }
14355
Daniel Veillard4432df22003-09-28 18:58:27 +000014356#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014357 if (ctxt->catalogs != NULL)
14358 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014359#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014360 if (ctxt->lastError.code != XML_ERR_OK)
14361 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014362}
14363
14364/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014365 * xmlCtxtResetPush:
14366 * @ctxt: an XML parser context
14367 * @chunk: a pointer to an array of chars
14368 * @size: number of chars in the array
14369 * @filename: an optional file name or URI
14370 * @encoding: the document encoding, or NULL
14371 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014372 * Reset a push parser context
14373 *
14374 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014375 */
14376int
14377xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14378 int size, const char *filename, const char *encoding)
14379{
14380 xmlParserInputPtr inputStream;
14381 xmlParserInputBufferPtr buf;
14382 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14383
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014384 if (ctxt == NULL)
14385 return(1);
14386
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014387 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14388 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14389
14390 buf = xmlAllocParserInputBuffer(enc);
14391 if (buf == NULL)
14392 return(1);
14393
14394 if (ctxt == NULL) {
14395 xmlFreeParserInputBuffer(buf);
14396 return(1);
14397 }
14398
14399 xmlCtxtReset(ctxt);
14400
14401 if (ctxt->pushTab == NULL) {
14402 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14403 sizeof(xmlChar *));
14404 if (ctxt->pushTab == NULL) {
14405 xmlErrMemory(ctxt, NULL);
14406 xmlFreeParserInputBuffer(buf);
14407 return(1);
14408 }
14409 }
14410
14411 if (filename == NULL) {
14412 ctxt->directory = NULL;
14413 } else {
14414 ctxt->directory = xmlParserGetDirectory(filename);
14415 }
14416
14417 inputStream = xmlNewInputStream(ctxt);
14418 if (inputStream == NULL) {
14419 xmlFreeParserInputBuffer(buf);
14420 return(1);
14421 }
14422
14423 if (filename == NULL)
14424 inputStream->filename = NULL;
14425 else
14426 inputStream->filename = (char *)
14427 xmlCanonicPath((const xmlChar *) filename);
14428 inputStream->buf = buf;
14429 inputStream->base = inputStream->buf->buffer->content;
14430 inputStream->cur = inputStream->buf->buffer->content;
14431 inputStream->end =
14432 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14433
14434 inputPush(ctxt, inputStream);
14435
14436 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14437 (ctxt->input->buf != NULL)) {
14438 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14439 int cur = ctxt->input->cur - ctxt->input->base;
14440
14441 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14442
14443 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14444 ctxt->input->cur = ctxt->input->base + cur;
14445 ctxt->input->end =
14446 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14447 use];
14448#ifdef DEBUG_PUSH
14449 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14450#endif
14451 }
14452
14453 if (encoding != NULL) {
14454 xmlCharEncodingHandlerPtr hdlr;
14455
Daniel Veillard37334572008-07-31 08:20:02 +000014456 if (ctxt->encoding != NULL)
14457 xmlFree((xmlChar *) ctxt->encoding);
14458 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14459
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014460 hdlr = xmlFindCharEncodingHandler(encoding);
14461 if (hdlr != NULL) {
14462 xmlSwitchToEncoding(ctxt, hdlr);
14463 } else {
14464 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14465 "Unsupported encoding %s\n", BAD_CAST encoding);
14466 }
14467 } else if (enc != XML_CHAR_ENCODING_NONE) {
14468 xmlSwitchEncoding(ctxt, enc);
14469 }
14470
14471 return(0);
14472}
14473
Daniel Veillard37334572008-07-31 08:20:02 +000014474
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014475/**
Daniel Veillard37334572008-07-31 08:20:02 +000014476 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014477 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014478 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014479 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014480 *
14481 * Applies the options to the parser context
14482 *
14483 * Returns 0 in case of success, the set of unknown or unimplemented options
14484 * in case of error.
14485 */
Daniel Veillard37334572008-07-31 08:20:02 +000014486static int
14487xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014488{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014489 if (ctxt == NULL)
14490 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014491 if (encoding != NULL) {
14492 if (ctxt->encoding != NULL)
14493 xmlFree((xmlChar *) ctxt->encoding);
14494 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14495 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014496 if (options & XML_PARSE_RECOVER) {
14497 ctxt->recovery = 1;
14498 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014499 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014500 } else
14501 ctxt->recovery = 0;
14502 if (options & XML_PARSE_DTDLOAD) {
14503 ctxt->loadsubset = XML_DETECT_IDS;
14504 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014505 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014506 } else
14507 ctxt->loadsubset = 0;
14508 if (options & XML_PARSE_DTDATTR) {
14509 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14510 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014511 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014512 }
14513 if (options & XML_PARSE_NOENT) {
14514 ctxt->replaceEntities = 1;
14515 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14516 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014517 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014518 } else
14519 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014520 if (options & XML_PARSE_PEDANTIC) {
14521 ctxt->pedantic = 1;
14522 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014523 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014524 } else
14525 ctxt->pedantic = 0;
14526 if (options & XML_PARSE_NOBLANKS) {
14527 ctxt->keepBlanks = 0;
14528 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14529 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014530 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014531 } else
14532 ctxt->keepBlanks = 1;
14533 if (options & XML_PARSE_DTDVALID) {
14534 ctxt->validate = 1;
14535 if (options & XML_PARSE_NOWARNING)
14536 ctxt->vctxt.warning = NULL;
14537 if (options & XML_PARSE_NOERROR)
14538 ctxt->vctxt.error = NULL;
14539 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014540 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014541 } else
14542 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014543 if (options & XML_PARSE_NOWARNING) {
14544 ctxt->sax->warning = NULL;
14545 options -= XML_PARSE_NOWARNING;
14546 }
14547 if (options & XML_PARSE_NOERROR) {
14548 ctxt->sax->error = NULL;
14549 ctxt->sax->fatalError = NULL;
14550 options -= XML_PARSE_NOERROR;
14551 }
Daniel Veillard81273902003-09-30 00:43:48 +000014552#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014553 if (options & XML_PARSE_SAX1) {
14554 ctxt->sax->startElement = xmlSAX2StartElement;
14555 ctxt->sax->endElement = xmlSAX2EndElement;
14556 ctxt->sax->startElementNs = NULL;
14557 ctxt->sax->endElementNs = NULL;
14558 ctxt->sax->initialized = 1;
14559 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014560 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014561 }
Daniel Veillard81273902003-09-30 00:43:48 +000014562#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014563 if (options & XML_PARSE_NODICT) {
14564 ctxt->dictNames = 0;
14565 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014566 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014567 } else {
14568 ctxt->dictNames = 1;
14569 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014570 if (options & XML_PARSE_NOCDATA) {
14571 ctxt->sax->cdataBlock = NULL;
14572 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014573 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014574 }
14575 if (options & XML_PARSE_NSCLEAN) {
14576 ctxt->options |= XML_PARSE_NSCLEAN;
14577 options -= XML_PARSE_NSCLEAN;
14578 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014579 if (options & XML_PARSE_NONET) {
14580 ctxt->options |= XML_PARSE_NONET;
14581 options -= XML_PARSE_NONET;
14582 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014583 if (options & XML_PARSE_COMPACT) {
14584 ctxt->options |= XML_PARSE_COMPACT;
14585 options -= XML_PARSE_COMPACT;
14586 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014587 if (options & XML_PARSE_OLD10) {
14588 ctxt->options |= XML_PARSE_OLD10;
14589 options -= XML_PARSE_OLD10;
14590 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014591 if (options & XML_PARSE_NOBASEFIX) {
14592 ctxt->options |= XML_PARSE_NOBASEFIX;
14593 options -= XML_PARSE_NOBASEFIX;
14594 }
14595 if (options & XML_PARSE_HUGE) {
14596 ctxt->options |= XML_PARSE_HUGE;
14597 options -= XML_PARSE_HUGE;
14598 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014599 if (options & XML_PARSE_OLDSAX) {
14600 ctxt->options |= XML_PARSE_OLDSAX;
14601 options -= XML_PARSE_OLDSAX;
14602 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080014603 if (options & XML_PARSE_IGNORE_ENC) {
14604 ctxt->options |= XML_PARSE_IGNORE_ENC;
14605 options -= XML_PARSE_IGNORE_ENC;
14606 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014607 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014608 return (options);
14609}
14610
14611/**
Daniel Veillard37334572008-07-31 08:20:02 +000014612 * xmlCtxtUseOptions:
14613 * @ctxt: an XML parser context
14614 * @options: a combination of xmlParserOption
14615 *
14616 * Applies the options to the parser context
14617 *
14618 * Returns 0 in case of success, the set of unknown or unimplemented options
14619 * in case of error.
14620 */
14621int
14622xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14623{
14624 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14625}
14626
14627/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014628 * xmlDoRead:
14629 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014630 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014631 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014632 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014633 * @reuse: keep the context for reuse
14634 *
14635 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014636 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014637 * Returns the resulting document tree or NULL
14638 */
14639static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014640xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14641 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014642{
14643 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014644
14645 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014646 if (encoding != NULL) {
14647 xmlCharEncodingHandlerPtr hdlr;
14648
14649 hdlr = xmlFindCharEncodingHandler(encoding);
14650 if (hdlr != NULL)
14651 xmlSwitchToEncoding(ctxt, hdlr);
14652 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014653 if ((URL != NULL) && (ctxt->input != NULL) &&
14654 (ctxt->input->filename == NULL))
14655 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014656 xmlParseDocument(ctxt);
14657 if ((ctxt->wellFormed) || ctxt->recovery)
14658 ret = ctxt->myDoc;
14659 else {
14660 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014661 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014662 xmlFreeDoc(ctxt->myDoc);
14663 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014664 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014665 ctxt->myDoc = NULL;
14666 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014667 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014668 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014669
14670 return (ret);
14671}
14672
14673/**
14674 * xmlReadDoc:
14675 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014676 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014677 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014678 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014679 *
14680 * parse an XML in-memory document and build a tree.
14681 *
14682 * Returns the resulting document tree
14683 */
14684xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014685xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014686{
14687 xmlParserCtxtPtr ctxt;
14688
14689 if (cur == NULL)
14690 return (NULL);
14691
14692 ctxt = xmlCreateDocParserCtxt(cur);
14693 if (ctxt == NULL)
14694 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014695 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014696}
14697
14698/**
14699 * xmlReadFile:
14700 * @filename: a file or URL
14701 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014702 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014703 *
14704 * parse an XML file from the filesystem or the network.
14705 *
14706 * Returns the resulting document tree
14707 */
14708xmlDocPtr
14709xmlReadFile(const char *filename, const char *encoding, int options)
14710{
14711 xmlParserCtxtPtr ctxt;
14712
Daniel Veillard61b93382003-11-03 14:28:31 +000014713 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014714 if (ctxt == NULL)
14715 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014716 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014717}
14718
14719/**
14720 * xmlReadMemory:
14721 * @buffer: a pointer to a char array
14722 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014723 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014724 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014725 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014726 *
14727 * parse an XML in-memory document and build a tree.
14728 *
14729 * Returns the resulting document tree
14730 */
14731xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014732xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014733{
14734 xmlParserCtxtPtr ctxt;
14735
14736 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14737 if (ctxt == NULL)
14738 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014739 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014740}
14741
14742/**
14743 * xmlReadFd:
14744 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014745 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014746 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014747 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014748 *
14749 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014750 * NOTE that the file descriptor will not be closed when the
14751 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014752 *
14753 * Returns the resulting document tree
14754 */
14755xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014756xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014757{
14758 xmlParserCtxtPtr ctxt;
14759 xmlParserInputBufferPtr input;
14760 xmlParserInputPtr stream;
14761
14762 if (fd < 0)
14763 return (NULL);
14764
14765 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14766 if (input == NULL)
14767 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014768 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014769 ctxt = xmlNewParserCtxt();
14770 if (ctxt == NULL) {
14771 xmlFreeParserInputBuffer(input);
14772 return (NULL);
14773 }
14774 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14775 if (stream == NULL) {
14776 xmlFreeParserInputBuffer(input);
14777 xmlFreeParserCtxt(ctxt);
14778 return (NULL);
14779 }
14780 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014781 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014782}
14783
14784/**
14785 * xmlReadIO:
14786 * @ioread: an I/O read function
14787 * @ioclose: an I/O close function
14788 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014789 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014790 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014791 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014792 *
14793 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080014794 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014795 * Returns the resulting document tree
14796 */
14797xmlDocPtr
14798xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014799 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014800{
14801 xmlParserCtxtPtr ctxt;
14802 xmlParserInputBufferPtr input;
14803 xmlParserInputPtr stream;
14804
14805 if (ioread == NULL)
14806 return (NULL);
14807
14808 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14809 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080014810 if (input == NULL) {
14811 if (ioclose != NULL)
14812 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014813 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080014814 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014815 ctxt = xmlNewParserCtxt();
14816 if (ctxt == NULL) {
14817 xmlFreeParserInputBuffer(input);
14818 return (NULL);
14819 }
14820 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14821 if (stream == NULL) {
14822 xmlFreeParserInputBuffer(input);
14823 xmlFreeParserCtxt(ctxt);
14824 return (NULL);
14825 }
14826 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014827 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014828}
14829
14830/**
14831 * xmlCtxtReadDoc:
14832 * @ctxt: an XML parser context
14833 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014834 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014835 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014836 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014837 *
14838 * parse an XML in-memory document and build a tree.
14839 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080014840 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014841 * Returns the resulting document tree
14842 */
14843xmlDocPtr
14844xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014845 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014846{
14847 xmlParserInputPtr stream;
14848
14849 if (cur == NULL)
14850 return (NULL);
14851 if (ctxt == NULL)
14852 return (NULL);
14853
14854 xmlCtxtReset(ctxt);
14855
14856 stream = xmlNewStringInputStream(ctxt, cur);
14857 if (stream == NULL) {
14858 return (NULL);
14859 }
14860 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014861 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014862}
14863
14864/**
14865 * xmlCtxtReadFile:
14866 * @ctxt: an XML parser context
14867 * @filename: a file or URL
14868 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014869 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014870 *
14871 * parse an XML file from the filesystem or the network.
14872 * This reuses the existing @ctxt parser context
14873 *
14874 * Returns the resulting document tree
14875 */
14876xmlDocPtr
14877xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14878 const char *encoding, int options)
14879{
14880 xmlParserInputPtr stream;
14881
14882 if (filename == NULL)
14883 return (NULL);
14884 if (ctxt == NULL)
14885 return (NULL);
14886
14887 xmlCtxtReset(ctxt);
14888
Daniel Veillard29614c72004-11-26 10:47:26 +000014889 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014890 if (stream == NULL) {
14891 return (NULL);
14892 }
14893 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014894 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014895}
14896
14897/**
14898 * xmlCtxtReadMemory:
14899 * @ctxt: an XML parser context
14900 * @buffer: a pointer to a char array
14901 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014902 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014903 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014904 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014905 *
14906 * parse an XML in-memory document and build a tree.
14907 * This reuses the existing @ctxt parser context
14908 *
14909 * Returns the resulting document tree
14910 */
14911xmlDocPtr
14912xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014913 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014914{
14915 xmlParserInputBufferPtr input;
14916 xmlParserInputPtr stream;
14917
14918 if (ctxt == NULL)
14919 return (NULL);
14920 if (buffer == NULL)
14921 return (NULL);
14922
14923 xmlCtxtReset(ctxt);
14924
14925 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14926 if (input == NULL) {
14927 return(NULL);
14928 }
14929
14930 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14931 if (stream == NULL) {
14932 xmlFreeParserInputBuffer(input);
14933 return(NULL);
14934 }
14935
14936 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014937 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014938}
14939
14940/**
14941 * xmlCtxtReadFd:
14942 * @ctxt: an XML parser context
14943 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014944 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014945 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014946 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014947 *
14948 * parse an XML from a file descriptor and build a tree.
14949 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014950 * NOTE that the file descriptor will not be closed when the
14951 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014952 *
14953 * Returns the resulting document tree
14954 */
14955xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014956xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14957 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014958{
14959 xmlParserInputBufferPtr input;
14960 xmlParserInputPtr stream;
14961
14962 if (fd < 0)
14963 return (NULL);
14964 if (ctxt == NULL)
14965 return (NULL);
14966
14967 xmlCtxtReset(ctxt);
14968
14969
14970 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14971 if (input == NULL)
14972 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014973 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014974 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14975 if (stream == NULL) {
14976 xmlFreeParserInputBuffer(input);
14977 return (NULL);
14978 }
14979 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014980 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014981}
14982
14983/**
14984 * xmlCtxtReadIO:
14985 * @ctxt: an XML parser context
14986 * @ioread: an I/O read function
14987 * @ioclose: an I/O close function
14988 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014989 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014990 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014991 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014992 *
14993 * parse an XML document from I/O functions and source and build a tree.
14994 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080014995 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014996 * Returns the resulting document tree
14997 */
14998xmlDocPtr
14999xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15000 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015001 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015002 const char *encoding, int options)
15003{
15004 xmlParserInputBufferPtr input;
15005 xmlParserInputPtr stream;
15006
15007 if (ioread == NULL)
15008 return (NULL);
15009 if (ctxt == NULL)
15010 return (NULL);
15011
15012 xmlCtxtReset(ctxt);
15013
15014 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15015 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015016 if (input == NULL) {
15017 if (ioclose != NULL)
15018 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015019 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015020 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015021 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15022 if (stream == NULL) {
15023 xmlFreeParserInputBuffer(input);
15024 return (NULL);
15025 }
15026 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015027 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015028}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015029
15030#define bottom_parser
15031#include "elfgcchack.h"